Skip to content
Snippets Groups Projects
runner_doiact.h 103.50 KiB

/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

/* Includes. */
#include "cell.h"
#include "part.h"

/* Before including this file, define FUNCTION, which is the
   name of the interaction function. This creates the interaction functions
   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
   and runner_dosub_FUNCTION calling the pairwise interaction function
   runner_iact_FUNCTION. */

#define PASTE(x, y) x##_##y

#define _DOPAIR1(f) PASTE(runner_dopair1, f)
#define DOPAIR1 _DOPAIR1(FUNCTION)

#define _DOPAIR2(f) PASTE(runner_dopair2, f)
#define DOPAIR2 _DOPAIR2(FUNCTION)

#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)

#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)

#define _DOPAIR_NAIVE(f) PASTE(runner_dopair_naive, f)
#define DOPAIR_NAIVE _DOPAIR_NAIVE(FUNCTION)

#define _DOSELF_NAIVE(f) PASTE(runner_doself_naive, f)
#define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION)

#define _DOSELF1(f) PASTE(runner_doself1, f)
#define DOSELF1 _DOSELF1(FUNCTION)

#define _DOSELF2(f) PASTE(runner_doself2, f)
#define DOSELF2 _DOSELF2(FUNCTION)

#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)

#define _DOSUB1(f) PASTE(runner_dosub1, f)
#define DOSUB1 _DOSUB1(FUNCTION)

#define _DOSUB2(f) PASTE(runner_dosub2, f)
#define DOSUB2 _DOSUB2(FUNCTION)

#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)

#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
#define IACT_NONSYM _IACT_NONSYM(FUNCTION)

#define _IACT(f) PASTE(runner_iact, f)
#define IACT _IACT(FUNCTION)

#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)

#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)

#define _TIMER_DOSUB(f) PASTE(timer_dosub, f)
#define TIMER_DOSUB _TIMER_DOSUB(FUNCTION)

#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)

#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)

#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)

#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
#define IACT_VEC _IACT_VEC(FUNCTION)

/**
 * @brief Compute the interactions between a cell pair.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */

void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
                  struct cell *restrict cj) {

  struct engine *e = r->e;
  int pid, pjd, k, count_i = ci->count, count_j = cj->count;
  double shift[3] = {0.0, 0.0, 0.0};
  struct part *restrict parts_i = ci->parts, *restrict parts_j = cj->parts;
  struct part *restrict pi, *restrict pj;
  double pix[3];
  float dx[3], hi, hig2, r2;
  float t_current = e->time;
#ifdef VECTORIZE
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
  TIMER_TIC

  /* Anything to do here? */
  if (ci->t_end_min > t_current && cj->t_end_min > t_current) return;

  /* Get the relative distance between the pairs, wrapping. */
  for (k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
  %i/%i parts and shift = [ %g %g %g ].\n" ,
      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
  cj->loc[2] ,
      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
  tic = getticks(); */

  /* Loop over the parts in ci. */
  for (pid = 0; pid < count_i; pid++) {

    /* Get a hold of the ith part in ci. */
    pi = &parts_i[pid];
    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    hi = pi->h;
    hig2 = hi * hi * kernel_gamma2;

    /* Loop over the parts in cj. */
    for (pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      r2 = 0.0f;
      for (k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {

#ifndef VECTORIZE

        IACT(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

#ifdef VECTORIZE
  /* Pick up any leftovers. */
  if (icount > 0)
    for (k = 0; k < icount; k++)
      IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

#ifdef TIMER_VERBOSE
  printf(
      "runner_dopair_naive[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) "
      "took %.3f ms.\n",
      r->id, count_i, count_j, ci->depth, ci->h_max, cj->h_max,
      ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000);
#else
  TIMER_TOC(TIMER_DOPAIR);
#endif
}

void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {

  int pid, pjd, k, count = c->count;
  struct part *restrict parts = c->parts;
  struct part *restrict pi, *restrict pj;
  double pix[3] = {0.0, 0.0, 0.0};
  float dx[3], hi, hig2, r2;
  float t_current = r->e->time;
#ifdef VECTORIZE
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
  TIMER_TIC

  /* Anything to do here? */
  if (c->t_end_min > t_current) return;

  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
  %i/%i parts and shift = [ %g %g %g ].\n" ,
      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
  cj->loc[2] ,
      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
  tic = getticks(); */

  /* Loop over the parts in ci. */
  for (pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    pi = &parts[pid];
    pix[0] = pi->x[0];
    pix[1] = pi->x[1];
    pix[2] = pi->x[2];
    hi = pi->h;
    hig2 = hi * hi * kernel_gamma2;

    /* Loop over the parts in cj. */
    for (pjd = pid + 1; pjd < count; pjd++) {

      /* Get a pointer to the jth particle. */
      pj = &parts[pjd];

      /* Compute the pairwise distance. */
      r2 = 0.0f;
      for (k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {

#ifndef VECTORIZE

        IACT(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

#ifdef VECTORIZE
  /* Pick up any leftovers. */
  if (icount > 0)
    for (k = 0; k < icount; k++)
      IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

#ifdef TIMER_VERBOSE
  printf("runner_doself[%02i]: %i parts at depth %i took %.3f ms.\n", r->id,
         count, c->depth, ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000);
#else
  TIMER_TOC(TIMER_DOSELF);
#endif
}

/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts_i The #part to interact with @c cj.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */

void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts_i, int *restrict ind, int count,
                   struct cell *restrict cj) {

  struct engine *e = r->e;
  int pid, pjd, sid, k, count_j = cj->count, flipped;
  double shift[3] = {0.0, 0.0, 0.0};
  struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts;
  double pix[3];
  float dx[3], hi, hig2, r2, di, dxj;
  struct entry *sort_j;
#ifdef VECTORIZE
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
  TIMER_TIC

  /* Get the relative distance between the pairs, wrapping. */
  for (k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Get the sorting index. */
  for (sid = 0, k = 0; k < 3; k++)
    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                         ? 0
                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);

  /* Switch the cells around? */
  flipped = runner_flip[sid];
  sid = sortlistID[sid];

  /* Have the cells been sorted? */
  if (!(cj->sorted & (1 << sid))) error("Trying to interact unsorted cells.");

  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
  %i/%i parts and shift = [ %g %g %g ].\n" ,
      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
  cj->loc[2] ,
      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
  tic = getticks(); */

  /* Pick-out the sorted lists. */
  sort_j = &cj->sort[sid * (cj->count + 1)];
  dxj = cj->dx_max;

  /* Parts are on the left? */
  if (!flipped) {

    /* Loop over the parts_i. */
    for (pid = 0; pid < count; pid++) {

      /* Get a hold of the ith part in ci. */
      pi = &parts_i[ind[pid]];
      for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
      hi = pi->h;
      hig2 = hi * hi * kernel_gamma2;
      di = hi * kernel_gamma + dxj + pix[0] * runner_shift[3 * sid + 0] +
           pix[1] * runner_shift[3 * sid + 1] +
           pix[2] * runner_shift[3 * sid + 2];

      /* Loop over the parts in cj. */
      for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {

        /* Get a pointer to the jth particle. */
        pj = &parts_j[sort_j[pjd].i];

        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
        }

        /* Hit or miss? */
        if (r2 < hig2) {

#ifndef VECTORIZE

          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;

          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }

#endif
        }

      } /* loop over the parts in cj. */

    } /* loop over the parts in ci. */

  }

  /* Parts are on the right. */
  else {

    /* Loop over the parts_i. */
    for (pid = 0; pid < count; pid++) {

      /* Get a hold of the ith part in ci. */
      pi = &parts_i[ind[pid]];
      for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
      hi = pi->h;
      hig2 = hi * hi * kernel_gamma2;
      di = -hi * kernel_gamma - dxj + pix[0] * runner_shift[3 * sid + 0] +
           pix[1] * runner_shift[3 * sid + 1] +
           pix[2] * runner_shift[3 * sid + 2];

      /* Loop over the parts in cj. */
      for (pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {

        /* Get a pointer to the jth particle. */
        pj = &parts_j[sort_j[pjd].i];

        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
        }

        /* Hit or miss? */
        if (r2 < hig2) {

#ifndef VECTORIZE

          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;

          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }

#endif
        }

      } /* loop over the parts in cj. */

    } /* loop over the parts in ci. */
  }

#ifdef VECTORIZE
  /* Pick up any leftovers. */
  if (icount > 0)
    for (k = 0; k < icount; k++)
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

#ifdef TIMER_VERBOSE
  printf(
      "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) "
      "took %.3f ms.\n",
      r->id, count, count_j, ci->depth, ci->h_max, cj->h_max,
      ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000);
#else
  TIMER_TOC(timer_dopair_subset);
#endif
}

/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts_i The #part to interact with @c cj.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */

void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
                         struct part *restrict parts_i, int *restrict ind,
                         int count, struct cell *restrict cj) {

  struct engine *e = r->e;
  int pid, pjd, k, count_j = cj->count;
  double shift[3] = {0.0, 0.0, 0.0};
  struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts;
  double pix[3];
  float dx[3], hi, hig2, r2;
#ifdef VECTORIZE
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
  TIMER_TIC

  /* Get the relative distance between the pairs, wrapping. */
  for (k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
  %i/%i parts and shift = [ %g %g %g ].\n" ,
      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
  cj->loc[2] ,
      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
  tic = getticks(); */

  /* Loop over the parts_i. */
  for (pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    pi = &parts_i[ind[pid]];
    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    hi = pi->h;
    hig2 = hi * hi * kernel_gamma2;

    /* Loop over the parts in cj. */
    for (pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      r2 = 0.0f;
      for (k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2) {

#ifndef VECTORIZE

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

#ifdef VECTORIZE
  /* Pick up any leftovers. */
  if (icount > 0)
    for (k = 0; k < icount; k++)
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

#ifdef TIMER_VERBOSE
  printf(
      "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) "
      "took %.3f ms.\n",
      r->id, count, count_j, ci->depth, ci->h_max, cj->h_max,
      ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000);
#else
  TIMER_TOC(timer_dopair_subset);
#endif
}

/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts The #part to interact.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 */

void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts, int *restrict ind, int count) {

  int pid, pjd, k, count_i = ci->count;
  struct part *restrict parts_j = ci->parts;
  struct part *restrict pi, *restrict pj;
  double pix[3] = {0.0, 0.0, 0.0};
  float dx[3], hi, hig2, r2;
#ifdef VECTORIZE
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
  TIMER_TIC

  /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with
  %i/%i parts and shift = [ %g %g %g ].\n" ,
      ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] ,
  cj->loc[2] ,
      ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
  tic = getticks(); */

  /* Loop over the parts in ci. */
  for (pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    pi = &parts[ind[pid]];
    pix[0] = pi->x[0];
    pix[1] = pi->x[1];
    pix[2] = pi->x[2];
    hi = pi->h;
    hig2 = hi * hi * kernel_gamma2;

    /* Loop over the parts in cj. */
    for (pjd = 0; pjd < count_i; pjd++) {

      /* Get a pointer to the jth particle. */
      pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      r2 = 0.0f;
      for (k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 > 0.0f && r2 < hig2) {

#ifndef VECTORIZE

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

#ifdef VECTORIZE
  /* Pick up any leftovers. */
  if (icount > 0)
    for (k = 0; k < icount; k++)
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

#ifdef TIMER_VERBOSE
  printf("runner_doself_subset[%02i]: %i/%i parts at depth %i took %.3f ms.\n",
         r->id, count, ci->count, ci->depth,
         ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000);
#else
  TIMER_TOC(timer_dopair_subset);
#endif
}

/**
 * @brief Compute the interactions between a cell pair.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */

void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {

  struct engine *restrict e = r->e;
  int pid, pjd, k, sid;
  double rshift, shift[3] = {0.0, 0.0, 0.0};
  struct entry *restrict sort_i, *restrict sort_j;
  struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
  double pix[3], pjx[3], di, dj;
  float dx[3], hi, hig2, hj, hjg2, r2, dx_max;
  double hi_max, hj_max;
  double di_max, dj_min;
  int count_i, count_j;
  float t_current = e->time;
#ifdef VECTORIZE
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
  TIMER_TIC

  /* Anything to do here? */
  if (ci->t_end_min > t_current && cj->t_end_min > t_current)
    return;

  /* Get the sort ID. */
  sid = space_getsid(e->s, &ci, &cj, shift);

  /* Have the cells been sorted? */
  if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
    error("Trying to interact unsorted cells.");

  /* Get the cutoff shift. */
  for (rshift = 0.0, k = 0; k < 3; k++)
    rshift += shift[k] * runner_shift[3 * sid + k];

  /* Pick-out the sorted lists. */
  sort_i = &ci->sort[sid * (ci->count + 1)];
  sort_j = &cj->sort[sid * (cj->count + 1)];

  /* Get some other useful values. */
  hi_max = ci->h_max * kernel_gamma - rshift;
  hj_max = cj->h_max * kernel_gamma;
  count_i = ci->count;
  count_j = cj->count;
  parts_i = ci->parts;
  parts_j = cj->parts;
  di_max = sort_i[count_i - 1].d - rshift;
  dj_min = sort_j[0].d;
  dx_max = (ci->dx_max + cj->dx_max);

  /* Loop over the parts in ci. */
  for (pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min;
       pid--) {

    /* Get a hold of the ith part in ci. */
    pi = &parts_i[sort_i[pid].i];
    if (pi->t_end > t_current) continue;
    hi = pi->h;
    di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
    if (di < dj_min) continue;

    hig2 = hi * hi * kernel_gamma2;
    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];

    /* Loop over the parts in cj. */
    for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {

      /* Get a pointer to the jth particle. */
      pj = &parts_j[sort_j[pjd].i];

      /* Compute the pairwise distance. */
      r2 = 0.0f;
      for (k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }
      /* Hit or miss? */
      if (r2 < hig2) {

#ifndef VECTORIZE

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

  /* printf( "runner_dopair: first half took %.3f ms...\n" ,
  ((double)(getticks() - tic)) / CPU_TPS * 1000 );
  tic = getticks(); */

  /* Loop over the parts in cj. */
  for (pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
       pjd++) {

    /* Get a hold of the jth part in cj. */
    pj = &parts_j[sort_j[pjd].i];
    if (pj->t_end > t_current) continue;
    hj = pj->h;
    dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
    if (dj > di_max) continue;

    for (k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
    hjg2 = hj * hj * kernel_gamma2;

    /* Loop over the parts in ci. */
    for (pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {

      /* Get a pointer to the jth particle. */
      pi = &parts_i[sort_i[pid].i];

      /* Compute the pairwise distance. */
      r2 = 0.0f;
      for (k = 0; k < 3; k++) {
        dx[k] = pjx[k] - pi->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hjg2) {

#ifndef VECTORIZE

        IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hj;
        hjq[icount] = pi->h;
        piq[icount] = pj;
        pjq[icount] = pi;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

#ifdef VECTORIZE
  /* Pick up any leftovers. */
  if (icount > 0)
    for (k = 0; k < icount; k++)
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

#ifdef TIMER_VERBOSE
  printf(
      "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) "
      "took %.3f ms.\n",
      r->id, count_i, count_j, ci->depth, ci->h_max, cj->h_max,
      fmax(ci->h[0], fmax(ci->h[1], ci->h[2])),
      ((double)(TIMER_TOC(TIMER_DOPAIR))) / CPU_TPS * 1000);
#else
  TIMER_TOC(TIMER_DOPAIR);
#endif
}

void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {

  struct engine *restrict e = r->e;
  int pid, pjd, k, sid;
  double rshift, shift[3] = {0.0, 0.0, 0.0};
  struct entry *restrict sort_i, *restrict sort_j;
  struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL;
  int countdt_i = 0, countdt_j = 0;
  struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
  double pix[3], pjx[3], di, dj;
  float dx[3], hi, hig2, hj, hjg2, r2, dx_max;
  double hi_max, hj_max;
  double di_max, dj_min;
  int count_i, count_j;
  float t_current = e->time;
#ifdef VECTORIZE
  int icount1 = 0;
  float r2q1[VEC_SIZE] __attribute__((aligned(16)));
  float hiq1[VEC_SIZE] __attribute__((aligned(16)));
  float hjq1[VEC_SIZE] __attribute__((aligned(16)));
  float dxq1[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq1[VEC_SIZE], *pjq1[VEC_SIZE];
  int icount2 = 0;
  float r2q2[VEC_SIZE] __attribute__((aligned(16)));
  float hiq2[VEC_SIZE] __attribute__((aligned(16)));
  float hjq2[VEC_SIZE] __attribute__((aligned(16)));
  float dxq2[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE];
#endif
  TIMER_TIC

  /* Anything to do here? */
  if (ci->t_end_min > t_current && cj->t_end_min > t_current) return;

  /* Get the shift ID. */
  sid = space_getsid(e->s, &ci, &cj, shift);

  /* Have the cells been sorted? */
  if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
    error("Trying to interact unsorted cells.");

  /* Get the cutoff shift. */
  for (rshift = 0.0, k = 0; k < 3; k++)
    rshift += shift[k] * runner_shift[3 * sid + k];

  /* Pick-out the sorted lists. */
  sort_i = &ci->sort[sid * (ci->count + 1)];
  sort_j = &cj->sort[sid * (cj->count + 1)];

  /* Get some other useful values. */
  hi_max = ci->h_max * kernel_gamma - rshift;
  hj_max = cj->h_max * kernel_gamma;
  count_i = ci->count;
  count_j = cj->count;
  parts_i = ci->parts;
  parts_j = cj->parts;
  di_max = sort_i[count_i - 1].d - rshift;
  dj_min = sort_j[0].d;
  dx_max = (ci->dx_max + cj->dx_max);

  /* Collect the number of parts left and right below dt. */
  if (ci->t_end_max <= t_current) {
    sortdt_i = sort_i;
    countdt_i = count_i;
  } else if (ci->t_end_min <= t_current) {
    if ((sortdt_i = (struct entry *)alloca(sizeof(struct entry) * count_i)) ==
        NULL)
      error("Failed to allocate dt sortlists.");
    for (k = 0; k < count_i; k++)
      if (parts_i[sort_i[k].i].t_end <= t_current) {
        sortdt_i[countdt_i] = sort_i[k];
        countdt_i += 1;
      }
  }
  if (cj->t_end_max <= t_current) {
    sortdt_j = sort_j;
    countdt_j = count_j;
  } else if (cj->t_end_min <= t_current) {
    if ((sortdt_j = (struct entry *)alloca(sizeof(struct entry) * count_j)) ==
        NULL)
      error("Failed to allocate dt sortlists.");
    for (k = 0; k < count_j; k++)
      if (parts_j[sort_j[k].i].t_end <= t_current) {
        sortdt_j[countdt_j] = sort_j[k];
        countdt_j += 1;
      }
  }

  /* Loop over the parts in ci. */
  for (pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min;
       pid--) {

    /* Get a hold of the ith part in ci. */
    pi = &parts_i[sort_i[pid].i];
    hi = pi->h;
    di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
    if (di < dj_min) continue;

    hig2 = hi * hi * kernel_gamma2;
    for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];

    /* Look at valid dt parts only? */
    if (pi->t_end > t_current) {

      /* Loop over the parts in cj within dt. */
      for (pjd = 0; pjd < countdt_j && sortdt_j[pjd].d < di; pjd++) {

        /* Get a pointer to the jth particle. */
        pj = &parts_j[sortdt_j[pjd].i];
        hj = pj->h;

        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pj->x[k] - pix[k];
          r2 += dx[k] * dx[k];
        }

        /* Hit or miss? */
        if (r2 < hig2) {

#ifndef VECTORIZE

          IACT_NONSYM(r2, dx, hj, hi, pj, pi);

#else

          /* Add this interaction to the queue. */
          r2q1[icount1] = r2;
          dxq1[3 * icount1 + 0] = dx[0];
          dxq1[3 * icount1 + 1] = dx[1];
          dxq1[3 * icount1 + 2] = dx[2];
          hiq1[icount1] = hj;
          hjq1[icount1] = hi;
          piq1[icount1] = pj;
          pjq1[icount1] = pi;
          icount1 += 1;

          /* Flush? */
          if (icount1 == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1);
            icount1 = 0;
          }

#endif
        }

      } /* loop over the parts in cj. */

    }

    /* Otherwise, look at all parts. */
    else {

      /* Loop over the parts in cj. */
      for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {

        /* Get a pointer to the jth particle. */
        pj = &parts_j[sort_j[pjd].i];
        hj = pj->h;

        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
        }

        /* Hit or miss? */
        if (r2 < hig2) {

#ifndef VECTORIZE

          /* Does pj need to be updated too? */
          if (pj->t_end <= t_current)
            IACT(r2, dx, hi, hj, pi, pj);
          else
            IACT_NONSYM(r2, dx, hi, hj, pi, pj);

#else

          /* Does pj need to be updated too? */
          if (pj->t_end <= t_current) {

            /* Add this interaction to the symmetric queue. */
            r2q2[icount2] = r2;
            dxq2[3 * icount2 + 0] = dx[0];
            dxq2[3 * icount2 + 1] = dx[1];
            dxq2[3 * icount2 + 2] = dx[2];
            hiq2[icount2] = hi;
            hjq2[icount2] = hj;
            piq2[icount2] = pi;
            pjq2[icount2] = pj;
            icount2 += 1;

            /* Flush? */
            if (icount2 == VEC_SIZE) {
              IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2);
              icount2 = 0;
            }

          } else {

            /* Add this interaction to the non-symmetric queue. */
            r2q1[icount1] = r2;
            dxq1[3 * icount1 + 0] = dx[0];
            dxq1[3 * icount1 + 1] = dx[1];
            dxq1[3 * icount1 + 2] = dx[2];
            hiq1[icount1] = hi;
            hjq1[icount1] = hj;
            piq1[icount1] = pi;
            pjq1[icount1] = pj;
            icount1 += 1;

            /* Flush? */
            if (icount1 == VEC_SIZE) {
              IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1);
              icount1 = 0;
            }
          }

#endif
        }

      } /* loop over the parts in cj. */
    }

  } /* loop over the parts in ci. */

  /* printf( "runner_dopair: first half took %.3f ms...\n" ,
  ((double)(getticks() - tic)) / CPU_TPS * 1000 );
  tic = getticks(); */

  /* Loop over the parts in cj. */
  for (pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
       pjd++) {

    /* Get a hold of the jth part in cj. */
    pj = &parts_j[sort_j[pjd].i];
    hj = pj->h;
    dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
    if (dj > di_max) continue;

    for (k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
    hjg2 = hj * hj * kernel_gamma2;

    /* Is this particle outside the dt? */
    if (pj->t_end > t_current) {

      /* Loop over the parts in ci. */
      for (pid = countdt_i - 1; pid >= 0 && sortdt_i[pid].d > dj; pid--) {

        /* Get a pointer to the jth particle. */
        pi = &parts_i[sortdt_i[pid].i];
        hi = pi->h;

        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pi->x[k] - pjx[k];
          r2 += dx[k] * dx[k];
        }

        /* Hit or miss? */
        if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {

#ifndef VECTORIZE

          IACT_NONSYM(r2, dx, hi, hj, pi, pj);

#else

          /* Add this interaction to the queue. */
          r2q1[icount1] = r2;
          dxq1[3 * icount1 + 0] = dx[0];
          dxq1[3 * icount1 + 1] = dx[1];
          dxq1[3 * icount1 + 2] = dx[2];
          hiq1[icount1] = hi;
          hjq1[icount1] = hj;
          piq1[icount1] = pi;
          pjq1[icount1] = pj;
          icount1 += 1;

          /* Flush? */
          if (icount1 == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1);
            icount1 = 0;
          }

#endif
        }

      } /* loop over the parts in cj. */
    }

    /* Otherwise, interact with all particles in cj. */
    else {

      /* Loop over the parts in ci. */
      for (pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {

        /* Get a pointer to the jth particle. */
        pi = &parts_i[sort_i[pid].i];
        hi = pi->h;

        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pjx[k] - pi->x[k];
          r2 += dx[k] * dx[k];
        }

        /* Hit or miss? */
        if (r2 < hjg2 && r2 > hi * hi * kernel_gamma2) {

#ifndef VECTORIZE

          /* Does pi need to be updated too? */
          if (pi->t_end <= t_current)
            IACT(r2, dx, hj, hi, pj, pi);
          else
            IACT_NONSYM(r2, dx, hj, hi, pj, pi);

#else

          /* Does pi need to be updated too? */
          if (pi->dt <= dt_step) {

            /* Add this interaction to the symmetric queue. */
            r2q2[icount2] = r2;
            dxq2[3 * icount2 + 0] = dx[0];
            dxq2[3 * icount2 + 1] = dx[1];
            dxq2[3 * icount2 + 2] = dx[2];
            hiq2[icount2] = hj;
            hjq2[icount2] = hi;
            piq2[icount2] = pj;
            pjq2[icount2] = pi;
            icount2 += 1;

            /* Flush? */
            if (icount2 == VEC_SIZE) {
              IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2);
              icount2 = 0;
            }

          } else {

            /* Add this interaction to the non-symmetric queue. */
            r2q1[icount1] = r2;
            dxq1[3 * icount1 + 0] = dx[0];
            dxq1[3 * icount1 + 1] = dx[1];
            dxq1[3 * icount1 + 2] = dx[2];
            hiq1[icount1] = hj;
            hjq1[icount1] = hi;
            piq1[icount1] = pj;
            pjq1[icount1] = pi;
            icount1 += 1;

            /* Flush? */
            if (icount1 == VEC_SIZE) {
              IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1);
              icount1 = 0;
            }
          }

#endif
        }

      } /* loop over the parts in cj. */
    }

  } /* loop over the parts in ci. */

#ifdef VECTORIZE
  /* Pick up any leftovers. */
  if (icount1 > 0)
    for (k = 0; k < icount1; k++)
      IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]);
  if (icount2 > 0)
    for (k = 0; k < icount2; k++)
      IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]);
#endif

#ifdef TIMER_VERBOSE
  printf(
      "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) "
      "took %.3f ms.\n",
      r->id, count_i, count_j, ci->depth, ci->h_max, cj->h_max,
      fmax(ci->h[0], fmax(ci->h[1], ci->h[2])),
      ((double)(TIMER_TOC(TIMER_DOPAIR))) / CPU_TPS * 1000);
#else
  TIMER_TOC(TIMER_DOPAIR);
#endif
}

/**
 * @brief Compute the cell self-interaction.
 *
 * @param r The #runner.
 * @param c The #cell.
 */

void DOSELF1(struct runner *r, struct cell *restrict c) {

  int k, pid, pjd, count = c->count;
  double pix[3];
  float dx[3], hi, hj, hig2, r2;
  struct part *restrict parts = c->parts, *restrict pi, *restrict pj;
  float t_current = r->e->time;
  int firstdt = 0, countdt = 0, *indt = NULL, doj;
#ifdef VECTORIZE
  int icount1 = 0;
  float r2q1[VEC_SIZE] __attribute__((aligned(16)));
  float hiq1[VEC_SIZE] __attribute__((aligned(16)));
  float hjq1[VEC_SIZE] __attribute__((aligned(16)));
  float dxq1[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq1[VEC_SIZE], *pjq1[VEC_SIZE];
  int icount2 = 0;
  float r2q2[VEC_SIZE] __attribute__((aligned(16)));
  float hiq2[VEC_SIZE] __attribute__((aligned(16)));
  float hjq2[VEC_SIZE] __attribute__((aligned(16)));
  float dxq2[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE];
#endif
  TIMER_TIC


  /* Set up indt if needed. */
  if (c->t_end_min > t_current)
    return;
  else if (c->t_end_max > t_current) {
    if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
      error("Failed to allocate indt.");
    for (k = 0; k < count; k++)
      if (parts[k].t_end <= t_current) {
        indt[countdt] = k;
        countdt += 1;
      }
  }

  /* Loop over the particles in the cell. */
  for (pid = 0; pid < count; pid++) {

    /* Get a pointer to the ith particle. */
    pi = &parts[pid];

    /* Get the particle position and radius. */
    for (k = 0; k < 3; k++) pix[k] = pi->x[k];
    hi = pi->h;
    hig2 = hi * hi * kernel_gamma2;

    /* Is the ith particle inactive? */
    if (pi->t_end > t_current) {

      /* Loop over the other particles .*/
      for (pjd = firstdt; pjd < countdt; pjd++) {

        /* Get a pointer to the jth particle. */
        pj = &parts[indt[pjd]];
        hj = pj->h;

        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pj->x[k] - pix[k];
          r2 += dx[k] * dx[k];
        }

        /* Hit or miss? */
        if (r2 < hj * hj * kernel_gamma2) {

#ifndef VECTORIZE

          IACT_NONSYM(r2, dx, hj, hi, pj, pi);

#else

          /* Add this interaction to the queue. */
          r2q1[icount1] = r2;
          dxq1[3 * icount1 + 0] = dx[0];
          dxq1[3 * icount1 + 1] = dx[1];
          dxq1[3 * icount1 + 2] = dx[2];
          hiq1[icount1] = hj;
          hjq1[icount1] = hi;
          piq1[icount1] = pj;
          pjq1[icount1] = pi;
          icount1 += 1;

          /* Flush? */
          if (icount1 == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1);
            icount1 = 0;
          }

#endif
        }

      } /* loop over all other particles. */

    }

    /* Otherwise, interact with all candidates. */
    else {

      /* We caught a live one! */
      firstdt += 1;

      /* Loop over the other particles .*/
      for (pjd = pid + 1; pjd < count; pjd++) {

        /* Get a pointer to the jth particle. */
        pj = &parts[pjd];
        hj = pj->h;
        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
        }
        doj = (pj->t_end <= t_current) && (r2 < hj * hj * kernel_gamma2);

        /* Hit or miss? */
        if (r2 < hig2 || doj) {

#ifndef VECTORIZE

          /* Which parts need to be updated? */
          if (r2 < hig2 && doj)
            IACT(r2, dx, hi, hj, pi, pj);
          else if (!doj)
            IACT_NONSYM(r2, dx, hi, hj, pi, pj);
          else {
            dx[0] = -dx[0];
            dx[1] = -dx[1];
            dx[2] = -dx[2];
            IACT_NONSYM(r2, dx, hj, hi, pj, pi);
          }

#else

          /* Does pj need to be updated too? */
          if (r2 < hig2 && doj) {

            /* Add this interaction to the symmetric queue. */
            r2q2[icount2] = r2;
            dxq2[3 * icount2 + 0] = dx[0];
            dxq2[3 * icount2 + 1] = dx[1];
            dxq2[3 * icount2 + 2] = dx[2];
            hiq2[icount2] = hi;
            hjq2[icount2] = hj;
            piq2[icount2] = pi;
            pjq2[icount2] = pj;
            icount2 += 1;

            /* Flush? */
            if (icount2 == VEC_SIZE) {
              IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2);
              icount2 = 0;
            }

          } else if (!doj) {

            /* Add this interaction to the non-symmetric queue. */
            r2q1[icount1] = r2;
            dxq1[3 * icount1 + 0] = dx[0];
            dxq1[3 * icount1 + 1] = dx[1];
            dxq1[3 * icount1 + 2] = dx[2];
            hiq1[icount1] = hi;
            hjq1[icount1] = hj;
            piq1[icount1] = pi;
            pjq1[icount1] = pj;
            icount1 += 1;

            /* Flush? */
            if (icount1 == VEC_SIZE) {
              IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1);
              icount1 = 0;
            }

          } else {

            /* Add this interaction to the non-symmetric queue. */
            r2q1[icount1] = r2;
            dxq1[3 * icount1 + 0] = -dx[0];
            dxq1[3 * icount1 + 1] = -dx[1];
            dxq1[3 * icount1 + 2] = -dx[2];
            hiq1[icount1] = hj;
            hjq1[icount1] = hi;
            piq1[icount1] = pj;
            pjq1[icount1] = pi;
            icount1 += 1;

            /* Flush? */
            if (icount1 == VEC_SIZE) {
              IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1);
              icount1 = 0;
            }
          }

#endif
        }

      } /* loop over all other particles. */
    }

  } /* loop over all particles. */

#ifdef VECTORIZE
  /* Pick up any leftovers. */
  if (icount1 > 0)
    for (k = 0; k < icount1; k++)
      IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]);
  if (icount2 > 0)
    for (k = 0; k < icount2; k++)
      IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]);
#endif

#ifdef TIMER_VERBOSE
  printf("runner_doself1[%02i]: %i parts at depth %i took %.3f ms.\n", r->id,
         count, c->depth, ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000);
#else
  TIMER_TOC(TIMER_DOSELF);
#endif
}

void DOSELF2(struct runner *r, struct cell *restrict c) {

  int k, pid, pjd, count = c->count;
  double pix[3];
  float dx[3], hi, hj, hig2, r2;
  struct part *restrict parts = c->parts, *restrict pi, *restrict pj;
  float t_current = r->e->time;
  int firstdt = 0, countdt = 0, *indt = NULL;
#ifdef VECTORIZE
  int icount1 = 0;
  float r2q1[VEC_SIZE] __attribute__((aligned(16)));
  float hiq1[VEC_SIZE] __attribute__((aligned(16)));
  float hjq1[VEC_SIZE] __attribute__((aligned(16)));
  float dxq1[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq1[VEC_SIZE], *pjq1[VEC_SIZE];
  int icount2 = 0;
  float r2q2[VEC_SIZE] __attribute__((aligned(16)));
  float hiq2[VEC_SIZE] __attribute__((aligned(16)));
  float hjq2[VEC_SIZE] __attribute__((aligned(16)));
  float dxq2[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE];
#endif
  TIMER_TIC


  /* Set up indt if needed. */
  if (c->t_end_min > t_current)
    return;
  else if (c->t_end_max > t_current) {
    if ((indt = (int *)alloca(sizeof(int) * count)) == NULL)
      error("Failed to allocate indt.");
    for (k = 0; k < count; k++)
      if (parts[k].t_end <= t_current) {
        indt[countdt] = k;
        countdt += 1;
      }
  }

  /* Loop over the particles in the cell. */
  for (pid = 0; pid < count; pid++) {

    /* Get a pointer to the ith particle. */
    pi = &parts[pid];

    /* Get the particle position and radius. */
    for (k = 0; k < 3; k++) pix[k] = pi->x[k];
    hi = pi->h;
    hig2 = hi * hi * kernel_gamma2;

    /* Is the ith particle not active? */
    if (pi->t_end > t_current) {

      /* Loop over the other particles .*/
      for (pjd = firstdt; pjd < countdt; pjd++) {

        /* Get a pointer to the jth particle. */
        pj = &parts[indt[pjd]];
        hj = pj->h;

        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pj->x[k] - pix[k];
          r2 += dx[k] * dx[k];
        }

        /* Hit or miss? */
        if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {

#ifndef VECTORIZE

          IACT_NONSYM(r2, dx, hj, hi, pj, pi);

#else

          /* Add this interaction to the queue. */
          r2q1[icount1] = r2;
          dxq1[3 * icount1 + 0] = dx[0];
          dxq1[3 * icount1 + 1] = dx[1];
          dxq1[3 * icount1 + 2] = dx[2];
          hiq1[icount1] = hj;
          hjq1[icount1] = hi;
          piq1[icount1] = pj;
          pjq1[icount1] = pi;
          icount1 += 1;

          /* Flush? */
          if (icount1 == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1);
            icount1 = 0;
          }

#endif
        }

      } /* loop over all other particles. */

    }

    /* Otherwise, interact with all candidates. */
    else {

      /* We caught a live one! */
      firstdt += 1;

      /* Loop over the other particles .*/
      for (pjd = pid + 1; pjd < count; pjd++) {

        /* Get a pointer to the jth particle. */
        pj = &parts[pjd];
        hj = pj->h;

        /* Compute the pairwise distance. */
        r2 = 0.0f;
        for (k = 0; k < 3; k++) {
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
        }

        /* Hit or miss? */
        if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) {

#ifndef VECTORIZE

          /* Does pj need to be updated too? */
          if (pj->t_end <= t_current)
            IACT(r2, dx, hi, hj, pi, pj);
          else
            IACT_NONSYM(r2, dx, hi, hj, pi, pj);

#else

          /* Does pj need to be updated too? */
          if (pj->t_end <= t_current) {

            /* Add this interaction to the symmetric queue. */
            r2q2[icount2] = r2;
            dxq2[3 * icount2 + 0] = dx[0];
            dxq2[3 * icount2 + 1] = dx[1];
            dxq2[3 * icount2 + 2] = dx[2];
            hiq2[icount2] = hi;
            hjq2[icount2] = hj;
            piq2[icount2] = pi;
            pjq2[icount2] = pj;
            icount2 += 1;

            /* Flush? */
            if (icount2 == VEC_SIZE) {
              IACT_VEC(r2q2, dxq2, hiq2, hjq2, piq2, pjq2);
              icount2 = 0;
            }

          } else {

            /* Add this interaction to the non-symmetric queue. */
            r2q1[icount1] = r2;
            dxq1[3 * icount1 + 0] = dx[0];
            dxq1[3 * icount1 + 1] = dx[1];
            dxq1[3 * icount1 + 2] = dx[2];
            hiq1[icount1] = hi;
            hjq1[icount1] = hj;
            piq1[icount1] = pi;
            pjq1[icount1] = pj;
            icount1 += 1;

            /* Flush? */
            if (icount1 == VEC_SIZE) {
              IACT_NONSYM_VEC(r2q1, dxq1, hiq1, hjq1, piq1, pjq1);
              icount1 = 0;
            }
          }

#endif
        }

      } /* loop over all other particles. */
    }

  } /* loop over all particles. */

#ifdef VECTORIZE
  /* Pick up any leftovers. */
  if (icount1 > 0)
    for (k = 0; k < icount1; k++)
      IACT_NONSYM(r2q1[k], &dxq1[3 * k], hiq1[k], hjq1[k], piq1[k], pjq1[k]);
  if (icount2 > 0)
    for (k = 0; k < icount2; k++)
      IACT(r2q2[k], &dxq2[3 * k], hiq2[k], hjq2[k], piq2[k], pjq2[k]);
#endif

#ifdef TIMER_VERBOSE
  printf("runner_doself2[%02i]: %i parts at depth %i took %.3f ms.\n", r->id,
         count, c->depth, ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000);
#else
  TIMER_TOC(TIMER_DOSELF);
#endif
}

/**
 * @brief Compute grouped sub-cell interactions
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 * @param sid The direction linking the cells
 * @param gettimer Do we have a timer ?
 *
 * @todo Hard-code the sid on the recursive calls to avoid the
 * redundant computations to find the sid on-the-fly.
 */

void DOSUB1(struct runner *r, struct cell *ci, struct cell *cj, int sid,
            int gettimer) {

  int j = 0, k;
  double shift[3];
  float h;
  struct space *s = r->e->s;
  float t_current = r->e->time;

  TIMER_TIC

  /* Is this a single cell? */
  if (cj == NULL) {

    /* Should we even bother? */
    if (ci->t_end_min > t_current) return;

    /* Recurse? */
    if (ci->split) {

      /* Loop over all progeny. */
      for (k = 0; k < 8; k++)
        if (ci->progeny[k] != NULL) {
          DOSUB1(r, ci->progeny[k], NULL, -1, 0);
          for (j = k + 1; j < 8; j++)
            if (ci->progeny[j] != NULL)
              DOSUB1(r, ci->progeny[k], ci->progeny[j], -1, 0);
        }

    }

    /* Otherwise, compute self-interaction. */
    else
      DOSELF1(r, ci);

  } /* self-interaction. */

  /* Otherwise, it's a pair interaction. */
  else {

    /* Should we even bother? */
    if (ci->t_end_min > t_current && cj->t_end_min > t_current) return;

    /* Get the cell dimensions. */
    h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));

    /* Get the type of pair if not specified explicitly. */
    // if ( sid < 0 )
    sid = space_getsid(s, &ci, &cj, shift);

    /* Recurse? */
    if (ci->split && cj->split &&
        fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
            h / 2) {

      /* Different types of flags. */
      switch (sid) {

        /* Regular sub-cell interactions of a single cell. */
        case 0: /* (  1 ,  1 ,  1 ) */
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
          break;

        case 1: /* (  1 ,  1 ,  0 ) */
          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0);
          break;

        case 2: /* (  1 ,  1 , -1 ) */
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
          break;

        case 3: /* (  1 ,  0 ,  1 ) */
          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0);
          break;

        case 4: /* (  1 ,  0 ,  0 ) */
          if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[4], cj->progeny[0], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[4], cj->progeny[1], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[4], cj->progeny[2], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[1], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[3], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[2], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[3], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[3], -1, 0);
          break;

        case 5: /* (  1 ,  0 , -1 ) */
          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[4], cj->progeny[1], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[3], -1, 0);
          break;

        case 6: /* (  1 , -1 ,  1 ) */
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
          break;

        case 7: /* (  1 , -1 ,  0 ) */
          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[4], cj->progeny[2], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[3], -1, 0);
          break;

        case 8: /* (  1 , -1 , -1 ) */
          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
            DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0);
          break;

        case 9: /* (  0 ,  1 ,  1 ) */
          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0);
          break;

        case 10: /* (  0 ,  1 ,  0 ) */
          if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[2], cj->progeny[0], -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[2], cj->progeny[1], -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[2], cj->progeny[4], -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
            DOSUB1(r, ci->progeny[2], cj->progeny[5], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[1], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[5], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[4], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[5], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[5], -1, 0);
          break;

        case 11: /* (  0 ,  1 , -1 ) */
          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[2], cj->progeny[1], -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
            DOSUB1(r, ci->progeny[2], cj->progeny[5], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
            DOSUB1(r, ci->progeny[6], cj->progeny[5], -1, 0);
          break;

        case 12: /* (  0 ,  0 ,  1 ) */
          if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[1], cj->progeny[0], -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[1], cj->progeny[2], -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[1], cj->progeny[4], -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
            DOSUB1(r, ci->progeny[1], cj->progeny[6], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[2], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
            DOSUB1(r, ci->progeny[3], cj->progeny[6], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[4], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
            DOSUB1(r, ci->progeny[5], cj->progeny[6], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
            DOSUB1(r, ci->progeny[7], cj->progeny[6], -1, 0);
          break;
      }

    }

    /* Otherwise, compute the pair directly. */
    else if (ci->t_end_min <= t_current || cj->t_end_min <= t_current) {

      /* Do any of the cells need to be sorted first? */
      if (!(ci->sorted & (1 << sid))) runner_dosort(r, ci, (1 << sid), 1);
      if (!(cj->sorted & (1 << sid))) runner_dosort(r, cj, (1 << sid), 1);

      /* Compute the interactions. */
      DOPAIR1(r, ci, cj);
    }

  } /* otherwise, pair interaction. */

  if (gettimer)
#ifdef TIMER_VERBOSE
    printf("runner_dosub1[%02i]: flags=%i at depth %i took %.3f ms.\n", r->id,
           sid, ci->depth, ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000);
#else
    TIMER_TOC(TIMER_DOSUB);
#endif
}

void DOSUB2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
            int gettimer) {

  int j, k;
  double shift[3];
  float h;
  struct space *s = r->e->s;
  float t_current = r->e->time;

  TIMER_TIC

  /* Is this a single cell? */
  if (cj == NULL) {

    /* Should we even bother? */
    if (ci->t_end_min > t_current) return;

    /* Recurse? */
    if (ci->split) {

      /* Loop over all progeny. */
      for (k = 0; k < 8; k++)
        if (ci->progeny[k] != NULL) {
          DOSUB2(r, ci->progeny[k], NULL, -1, 0);
          for (j = k + 1; j < 8; j++)
            if (ci->progeny[j] != NULL)
              DOSUB2(r, ci->progeny[k], ci->progeny[j], -1, 0);
        }

    }

    /* Otherwise, compute self-interaction. */
    else
      DOSELF2(r, ci);

  } /* self-interaction. */

  /* Otherwise, it's a pair interaction. */
  else {

    /* Should we even bother? */
    if (ci->t_end_min > t_current && cj->t_end_min > t_current) return;

    /* Get the cell dimensions. */
    h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));

    /* Get the type of pair if not specified explicitly. */
    // if ( sid < 0 )
    sid = space_getsid(s, &ci, &cj, shift);

    /* Recurse? */
    if (ci->split && cj->split &&
        fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
            h / 2) {

      /* Different types of flags. */
      switch (sid) {

        /* Regular sub-cell interactions of a single cell. */
        case 0: /* (  1 ,  1 ,  1 ) */
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
          break;

        case 1: /* (  1 ,  1 ,  0 ) */
          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0);
          break;

        case 2: /* (  1 ,  1 , -1 ) */
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
          break;

        case 3: /* (  1 ,  0 ,  1 ) */
          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0);
          break;

        case 4: /* (  1 ,  0 ,  0 ) */
          if (ci->progeny[4] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[4], cj->progeny[0], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[4], cj->progeny[1], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[4], cj->progeny[2], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[1], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[3], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[2], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[3], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[3] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[3], -1, 0);
          break;

        case 5: /* (  1 ,  0 , -1 ) */
          if (ci->progeny[4] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[4], cj->progeny[1], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[3] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[3], -1, 0);
          break;

        case 6: /* (  1 , -1 ,  1 ) */
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
          break;

        case 7: /* (  1 , -1 ,  0 ) */
          if (ci->progeny[4] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[4], cj->progeny[2], -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[3] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[3], -1, 0);
          break;

        case 8: /* (  1 , -1 , -1 ) */
          if (ci->progeny[4] != NULL && cj->progeny[3] != NULL)
            DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0);
          break;

        case 9: /* (  0 ,  1 ,  1 ) */
          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0);
          break;

        case 10: /* (  0 ,  1 ,  0 ) */
          if (ci->progeny[2] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[2], cj->progeny[0], -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[2], cj->progeny[1], -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[2], cj->progeny[4], -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
            DOSUB2(r, ci->progeny[2], cj->progeny[5], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[1], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[5] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[5], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[4], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[5], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[5] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[5], -1, 0);
          break;

        case 11: /* (  0 ,  1 , -1 ) */
          if (ci->progeny[2] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[2], cj->progeny[1], -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[5] != NULL)
            DOSUB2(r, ci->progeny[2], cj->progeny[5], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[5] != NULL)
            DOSUB2(r, ci->progeny[6], cj->progeny[5], -1, 0);
          break;

        case 12: /* (  0 ,  0 ,  1 ) */
          if (ci->progeny[1] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[1], cj->progeny[0], -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[1], cj->progeny[2], -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[1], cj->progeny[4], -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[6] != NULL)
            DOSUB2(r, ci->progeny[1], cj->progeny[6], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[2], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[6] != NULL)
            DOSUB2(r, ci->progeny[3], cj->progeny[6], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[4], -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[6] != NULL)
            DOSUB2(r, ci->progeny[5], cj->progeny[6], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[2] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[4] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[6] != NULL)
            DOSUB2(r, ci->progeny[7], cj->progeny[6], -1, 0);
          break;
      }

    }

    /* Otherwise, compute the pair directly. */
    else if (ci->t_end_min <= t_current || cj->t_end_min <= t_current) {

      /* Do any of the cells need to be sorted first? */
      if (!(ci->sorted & (1 << sid))) runner_dosort(r, ci, (1 << sid), 1);
      if (!(cj->sorted & (1 << sid))) runner_dosort(r, cj, (1 << sid), 1);

      /* Compute the interactions. */
      DOPAIR2(r, ci, cj);
    }

  } /* otherwise, pair interaction. */

  if (gettimer)
#ifdef TIMER_VERBOSE
    printf("runner_dosub2[%02i]: flags=%i at depth %i took %.3f ms.\n", r->id,
           sid, ci->depth, ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000);
#else
    TIMER_TOC(TIMER_DOSUB);
#endif
}

void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts,
                  int *ind, int count, struct cell *cj, int sid, int gettimer) {

  int j, k;
  double shift[3];
  float h;
  struct space *s = r->e->s;
  struct cell *sub = NULL;
  float t_current = r->e->time;

  TIMER_TIC

  /* Find out in which sub-cell of ci the parts are. */
  for (k = 0; k < 8; k++)
    if (ci->progeny[k] != NULL) {
      // if ( parts[ ind[ 0 ] ].x[0] >= ci->progeny[k]->loc[0] &&
      //      parts[ ind[ 0 ] ].x[0] <= ci->progeny[k]->loc[0] +
      // ci->progeny[k]->h[0] &&
      //      parts[ ind[ 0 ] ].x[1] >= ci->progeny[k]->loc[1] &&
      //      parts[ ind[ 0 ] ].x[1] <= ci->progeny[k]->loc[1] +
      // ci->progeny[k]->h[1] &&
      //      parts[ ind[ 0 ] ].x[2] >= ci->progeny[k]->loc[2] &&
      //      parts[ ind[ 0 ] ].x[2] <= ci->progeny[k]->loc[2] +
      // ci->progeny[k]->h[2] ) {
      if (&parts[ind[0]] >= &ci->progeny[k]->parts[0] &&
          &parts[ind[0]] < &ci->progeny[k]->parts[ci->progeny[k]->count]) {
        sub = ci->progeny[k];
        break;
      }
    }

  /* Is this a single cell? */
  if (cj == NULL) {

    /* Recurse? */
    if (ci->split) {
      /* Loop over all progeny. */
      DOSUB_SUBSET(r, sub, parts, ind, count, NULL, -1, 0);
      for (j = 0; j < 8; j++)
        if (ci->progeny[j] != sub && ci->progeny[j] != NULL)
          DOSUB_SUBSET(r, sub, parts, ind, count, ci->progeny[j], -1, 0);

    }

    /* Otherwise, compute self-interaction. */
    else
      DOSELF_SUBSET(r, ci, parts, ind, count);

  } /* self-interaction. */

  /* Otherwise, it's a pair interaction. */
  else {

    /* Get the cell dimensions. */
    h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2]));

    /* Recurse? */
    if (ci->split && cj->split &&
        fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max <
            h / 2) {

      /* Get the type of pair if not specified explicitly. */
      sid = space_getsid(s, &ci, &cj, shift);

      /* Different types of flags. */
      switch (sid) {

        /* Regular sub-cell interactions of a single cell. */
        case 0: /* (  1 ,  1 ,  1 ) */
          if (ci->progeny[7] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, ci->progeny[0], parts, ind, count, cj->progeny[7],
                         -1, 0);
          break;

        case 1: /* (  1 ,  1 ,  0 ) */
          if (ci->progeny[6] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[7],
                         -1, 0);
          break;

        case 2: /* (  1 ,  1 , -1 ) */
          if (ci->progeny[6] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[6],
                         -1, 0);
          break;

        case 3: /* (  1 ,  0 ,  1 ) */
          if (ci->progeny[5] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[7],
                         -1, 0);
          break;

        case 4: /* (  1 ,  0 ,  0 ) */
          if (ci->progeny[4] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[4], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[4],
                         -1, 0);
          if (ci->progeny[4] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[4], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[4],
                         -1, 0);
          if (ci->progeny[4] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[4], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[4],
                         -1, 0);
          if (ci->progeny[4] == sub && cj->progeny[3] != NULL)
            DOSUB_SUBSET(r, ci->progeny[4], parts, ind, count, cj->progeny[3],
                         -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] == sub)
            DOSUB_SUBSET(r, cj->progeny[3], parts, ind, count, ci->progeny[4],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[3] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[3],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[3] == sub)
            DOSUB_SUBSET(r, cj->progeny[3], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[3] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[3],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[3] == sub)
            DOSUB_SUBSET(r, cj->progeny[3], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[3] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[3],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[3] == sub)
            DOSUB_SUBSET(r, cj->progeny[3], parts, ind, count, ci->progeny[7],
                         -1, 0);
          break;

        case 5: /* (  1 ,  0 , -1 ) */
          if (ci->progeny[4] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[4], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[4],
                         -1, 0);
          if (ci->progeny[4] == sub && cj->progeny[3] != NULL)
            DOSUB_SUBSET(r, ci->progeny[4], parts, ind, count, cj->progeny[3],
                         -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] == sub)
            DOSUB_SUBSET(r, cj->progeny[3], parts, ind, count, ci->progeny[4],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[3] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[3],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[3] == sub)
            DOSUB_SUBSET(r, cj->progeny[3], parts, ind, count, ci->progeny[6],
                         -1, 0);
          break;

        case 6: /* (  1 , -1 ,  1 ) */
          if (ci->progeny[5] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[5],
                         -1, 0);
          break;

        case 7: /* (  1 , -1 ,  0 ) */
          if (ci->progeny[4] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[4], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[4],
                         -1, 0);
          if (ci->progeny[4] == sub && cj->progeny[3] != NULL)
            DOSUB_SUBSET(r, ci->progeny[4], parts, ind, count, cj->progeny[3],
                         -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] == sub)
            DOSUB_SUBSET(r, cj->progeny[3], parts, ind, count, ci->progeny[4],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[3] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[3],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[3] == sub)
            DOSUB_SUBSET(r, cj->progeny[3], parts, ind, count, ci->progeny[5],
                         -1, 0);
          break;

        case 8: /* (  1 , -1 , -1 ) */
          if (ci->progeny[4] == sub && cj->progeny[3] != NULL)
            DOSUB_SUBSET(r, ci->progeny[4], parts, ind, count, cj->progeny[3],
                         -1, 0);
          if (ci->progeny[4] != NULL && cj->progeny[3] == sub)
            DOSUB_SUBSET(r, cj->progeny[3], parts, ind, count, ci->progeny[4],
                         -1, 0);
          break;

        case 9: /* (  0 ,  1 ,  1 ) */
          if (ci->progeny[3] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[3] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[7],
                         -1, 0);
          break;

        case 10: /* (  0 ,  1 ,  0 ) */
          if (ci->progeny[2] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[2], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[2],
                         -1, 0);
          if (ci->progeny[2] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[2], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[2],
                         -1, 0);
          if (ci->progeny[2] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[2], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[2],
                         -1, 0);
          if (ci->progeny[2] == sub && cj->progeny[5] != NULL)
            DOSUB_SUBSET(r, ci->progeny[2], parts, ind, count, cj->progeny[5],
                         -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[5] == sub)
            DOSUB_SUBSET(r, cj->progeny[5], parts, ind, count, ci->progeny[2],
                         -1, 0);
          if (ci->progeny[3] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[3] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[3] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[3] == sub && cj->progeny[5] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[5],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[5] == sub)
            DOSUB_SUBSET(r, cj->progeny[5], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[5] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[5],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[5] == sub)
            DOSUB_SUBSET(r, cj->progeny[5], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[5] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[5],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[5] == sub)
            DOSUB_SUBSET(r, cj->progeny[5], parts, ind, count, ci->progeny[7],
                         -1, 0);
          break;

        case 11: /* (  0 ,  1 , -1 ) */
          if (ci->progeny[2] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[2], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[2],
                         -1, 0);
          if (ci->progeny[2] == sub && cj->progeny[5] != NULL)
            DOSUB_SUBSET(r, ci->progeny[2], parts, ind, count, cj->progeny[5],
                         -1, 0);
          if (ci->progeny[2] != NULL && cj->progeny[5] == sub)
            DOSUB_SUBSET(r, cj->progeny[5], parts, ind, count, ci->progeny[2],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[1] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[1],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[1] == sub)
            DOSUB_SUBSET(r, cj->progeny[1], parts, ind, count, ci->progeny[6],
                         -1, 0);
          if (ci->progeny[6] == sub && cj->progeny[5] != NULL)
            DOSUB_SUBSET(r, ci->progeny[6], parts, ind, count, cj->progeny[5],
                         -1, 0);
          if (ci->progeny[6] != NULL && cj->progeny[5] == sub)
            DOSUB_SUBSET(r, cj->progeny[5], parts, ind, count, ci->progeny[6],
                         -1, 0);
          break;

        case 12: /* (  0 ,  0 ,  1 ) */
          if (ci->progeny[1] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[1], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[1],
                         -1, 0);
          if (ci->progeny[1] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[1], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[1],
                         -1, 0);
          if (ci->progeny[1] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[1], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[1],
                         -1, 0);
          if (ci->progeny[1] == sub && cj->progeny[6] != NULL)
            DOSUB_SUBSET(r, ci->progeny[1], parts, ind, count, cj->progeny[6],
                         -1, 0);
          if (ci->progeny[1] != NULL && cj->progeny[6] == sub)
            DOSUB_SUBSET(r, cj->progeny[6], parts, ind, count, ci->progeny[1],
                         -1, 0);
          if (ci->progeny[3] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[3] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[3] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[3] == sub && cj->progeny[6] != NULL)
            DOSUB_SUBSET(r, ci->progeny[3], parts, ind, count, cj->progeny[6],
                         -1, 0);
          if (ci->progeny[3] != NULL && cj->progeny[6] == sub)
            DOSUB_SUBSET(r, cj->progeny[6], parts, ind, count, ci->progeny[3],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[5] == sub && cj->progeny[6] != NULL)
            DOSUB_SUBSET(r, ci->progeny[5], parts, ind, count, cj->progeny[6],
                         -1, 0);
          if (ci->progeny[5] != NULL && cj->progeny[6] == sub)
            DOSUB_SUBSET(r, cj->progeny[6], parts, ind, count, ci->progeny[5],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[0] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[0],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[0] == sub)
            DOSUB_SUBSET(r, cj->progeny[0], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[2] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[2],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[2] == sub)
            DOSUB_SUBSET(r, cj->progeny[2], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[4] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[4],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[4] == sub)
            DOSUB_SUBSET(r, cj->progeny[4], parts, ind, count, ci->progeny[7],
                         -1, 0);
          if (ci->progeny[7] == sub && cj->progeny[6] != NULL)
            DOSUB_SUBSET(r, ci->progeny[7], parts, ind, count, cj->progeny[6],
                         -1, 0);
          if (ci->progeny[7] != NULL && cj->progeny[6] == sub)
            DOSUB_SUBSET(r, cj->progeny[6], parts, ind, count, ci->progeny[7],
                         -1, 0);
          break;
      }

    }

    /* Otherwise, compute the pair directly. */
    else if (ci->t_end_min <= t_current || cj->t_end_min <= t_current) {

      /* Get the relative distance between the pairs, wrapping. */
      for (k = 0; k < 3; k++) {
        if (cj->loc[k] - ci->loc[k] < -s->dim[k] / 2)
          shift[k] = s->dim[k];
        else if (cj->loc[k] - ci->loc[k] > s->dim[k] / 2)
          shift[k] = -s->dim[k];
      }

      /* Get the sorting index. */
      for (sid = 0, k = 0; k < 3; k++)
        sid =
            3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                           ? 0
                           : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);
      sid = sortlistID[sid];

      /* Do any of the cells need to be sorted first? */
      if (!(cj->sorted & (1 << sid))) runner_dosort(r, cj, (1 << sid), 1);

      /* Compute the interactions. */
      DOPAIR_SUBSET(r, ci, parts, ind, count, cj);
    }

  } /* otherwise, pair interaction. */

  if (gettimer)
#ifdef TIMER_VERBOSE
    printf("runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n", r->id,
           sid, ci->depth, ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000);
#else
    TIMER_TOC(TIMER_DOSUB);
#endif
}