diff --git a/src/runner.c b/src/runner.c index 97191a7430335bb56c4114c4d208378ed585544a..75c3723473cd391c8631435e5a320c356f119340 100644 --- a/src/runner.c +++ b/src/runner.c @@ -57,46 +57,20 @@ #include "timestep.h" /* Orientation of the cell pairs */ -const float runner_shift[13 * 3] = { - 5.773502691896258e-01, - 5.773502691896258e-01, - 5.773502691896258e-01, - 7.071067811865475e-01, - 7.071067811865475e-01, - 0.0, - 5.773502691896258e-01, - 5.773502691896258e-01, - -5.773502691896258e-01, - 7.071067811865475e-01, - 0.0, - 7.071067811865475e-01, - 1.0, - 0.0, - 0.0, - 7.071067811865475e-01, - 0.0, - -7.071067811865475e-01, - 5.773502691896258e-01, - -5.773502691896258e-01, - 5.773502691896258e-01, - 7.071067811865475e-01, - -7.071067811865475e-01, - 0.0, - 5.773502691896258e-01, - -5.773502691896258e-01, - -5.773502691896258e-01, - 0.0, - 7.071067811865475e-01, - 7.071067811865475e-01, - 0.0, - 1.0, - 0.0, - 0.0, - 7.071067811865475e-01, - -7.071067811865475e-01, - 0.0, - 0.0, - 1.0, +const double runner_shift[13][3] = { + {5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01}, + {7.071067811865475e-01, 7.071067811865475e-01, 0.0}, + {5.773502691896258e-01, 5.773502691896258e-01, -5.773502691896258e-01}, + {7.071067811865475e-01, 0.0, 7.071067811865475e-01}, + {1.0, 0.0, 0.0}, + {7.071067811865475e-01, 0.0, -7.071067811865475e-01}, + {5.773502691896258e-01, -5.773502691896258e-01, 5.773502691896258e-01}, + {7.071067811865475e-01, -7.071067811865475e-01, 0.0}, + {5.773502691896258e-01, -5.773502691896258e-01, -5.773502691896258e-01}, + {0.0, 7.071067811865475e-01, 7.071067811865475e-01}, + {0.0, 1.0, 0.0}, + {0.0, 7.071067811865475e-01, -7.071067811865475e-01}, + {0.0, 0.0, 1.0}, }; /* Does the axis need flipping ? */ @@ -255,8 +229,8 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { struct entry *sort; int j, k, count = c->count; int i, ind, off[8], inds[8], temp_i, missing; - // float shift[3]; - float buff[8], px[3]; + float buff[8]; + double px[3]; TIMER_TIC @@ -360,9 +334,9 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { for (j = 0; j < 13; j++) if (flags & (1 << j)) { sort[j * (count + 1) + k].i = k; - sort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] + - px[1] * runner_shift[3 * j + 1] + - px[2] * runner_shift[3 * j + 2]; + sort[j * (count + 1) + k].d = px[0] * runner_shift[j][0] + + px[1] * runner_shift[j][1] + + px[2] * runner_shift[j][2]; } } @@ -392,151 +366,6 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { if (clock) TIMER_TOC(timer_dosort); } -void runner_do_gsort(struct runner *r, struct cell *c, int flags, int clock) { - - struct entry *finger; - struct entry *fingers[8]; - struct gpart *gparts = c->gparts; - struct entry *gsort; - int j, k, count = c->gcount; - int i, ind, off[8], inds[8], temp_i, missing; - // float shift[3]; - float buff[8], px[3]; - - TIMER_TIC - - /* Clean-up the flags, i.e. filter out what's already been sorted. */ - flags &= ~c->gsorted; - if (flags == 0) return; - - /* start by allocating the entry arrays. */ - if (c->gsort == NULL || c->gsortsize < count) { - if (c->gsort != NULL) free(c->gsort); - c->gsortsize = count * 1.1; - if ((c->gsort = (struct entry *)malloc(sizeof(struct entry) * - (c->gsortsize + 1) * 13)) == NULL) - error("Failed to allocate sort memory."); - } - gsort = c->gsort; - - /* Does this cell have any progeny? */ - if (c->split) { - - /* Fill in the gaps within the progeny. */ - for (k = 0; k < 8; k++) { - if (c->progeny[k] == NULL) continue; - missing = flags & ~c->progeny[k]->gsorted; - if (missing) runner_do_gsort(r, c->progeny[k], missing, 0); - } - - /* Loop over the 13 different sort arrays. */ - for (j = 0; j < 13; j++) { - - /* Has this sort array been flagged? */ - if (!(flags & (1 << j))) continue; - - /* Init the particle index offsets. */ - for (off[0] = 0, k = 1; k < 8; k++) - if (c->progeny[k - 1] != NULL) - off[k] = off[k - 1] + c->progeny[k - 1]->gcount; - else - off[k] = off[k - 1]; - - /* Init the entries and indices. */ - for (k = 0; k < 8; k++) { - inds[k] = k; - if (c->progeny[k] != NULL && c->progeny[k]->gcount > 0) { - fingers[k] = &c->progeny[k]->gsort[j * (c->progeny[k]->gcount + 1)]; - buff[k] = fingers[k]->d; - off[k] = off[k]; - } else - buff[k] = FLT_MAX; - } - - /* Sort the buffer. */ - for (i = 0; i < 7; i++) - for (k = i + 1; k < 8; k++) - if (buff[inds[k]] < buff[inds[i]]) { - temp_i = inds[i]; - inds[i] = inds[k]; - inds[k] = temp_i; - } - - /* For each entry in the new sort list. */ - finger = &gsort[j * (count + 1)]; - for (ind = 0; ind < count; ind++) { - - /* Copy the minimum into the new sort array. */ - finger[ind].d = buff[inds[0]]; - finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; - - /* Update the buffer. */ - fingers[inds[0]] += 1; - buff[inds[0]] = fingers[inds[0]]->d; - - /* Find the smallest entry. */ - for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { - temp_i = inds[k - 1]; - inds[k - 1] = inds[k]; - inds[k] = temp_i; - } - - } /* Merge. */ - - /* Add a sentinel. */ - gsort[j * (count + 1) + count].d = FLT_MAX; - gsort[j * (count + 1) + count].i = 0; - - /* Mark as sorted. */ - c->gsorted |= (1 << j); - - } /* loop over sort arrays. */ - - } /* progeny? */ - - /* Otherwise, just sort. */ - else { - - /* Fill the sort array. */ - for (k = 0; k < count; k++) { - px[0] = gparts[k].x[0]; - px[1] = gparts[k].x[1]; - px[2] = gparts[k].x[2]; - for (j = 0; j < 13; j++) - if (flags & (1 << j)) { - gsort[j * (count + 1) + k].i = k; - gsort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] + - px[1] * runner_shift[3 * j + 1] + - px[2] * runner_shift[3 * j + 2]; - } - } - - /* Add the sentinel and sort. */ - for (j = 0; j < 13; j++) - if (flags & (1 << j)) { - gsort[j * (count + 1) + count].d = FLT_MAX; - gsort[j * (count + 1) + count].i = 0; - runner_do_sort_ascending(&gsort[j * (count + 1)], count); - c->gsorted |= (1 << j); - } - } - - /* Verify the sorting. */ - /* for ( j = 0 ; j < 13 ; j++ ) { - if ( !( flags & (1 << j) ) ) - continue; - finger = &c->gsort[ j*(count + 1) ]; - for ( k = 1 ; k < count ; k++ ) { - if ( finger[k].d < finger[k-1].d ) - error( "Sorting failed, ascending array." ); - if ( finger[k].i < 0 || finger[k].i >= count ) - error( "Sorting failed, indices borked." ); - } - } */ - - if (clock) TIMER_TOC(timer_dosort); -} - /** * @brief Initialize the particles before the density calculation * diff --git a/src/runner.h b/src/runner.h index 35e5f56a7b94145eec656b02a1c5568ec39352b6..758b6cf57dbc1a46b6fc068a23615f3b28c8707e 100644 --- a/src/runner.h +++ b/src/runner.h @@ -27,7 +27,7 @@ #include "cell.h" #include "inline.h" -extern const float runner_shift[13 * 3]; +extern const double runner_shift[13][3]; extern const char runner_flip[27]; /* A struct representing a runner's thread and its data. */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 97e22138b6c09750c1737741f58e61744a891c98..e0c13e4e0adb101d9a8975651f24225551277f43 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -376,9 +376,8 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; - di = hi * kernel_gamma + dxj + pix[0] * runner_shift[3 * sid + 0] + - pix[1] * runner_shift[3 * sid + 1] + - pix[2] * runner_shift[3 * sid + 2]; + di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] + + pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2]; /* Loop over the parts in cj. */ for (pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { @@ -439,9 +438,8 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; hi = pi->h; hig2 = hi * hi * kernel_gamma2; - di = -hi * kernel_gamma - dxj + pix[0] * runner_shift[3 * sid + 0] + - pix[1] * runner_shift[3 * sid + 1] + - pix[2] * runner_shift[3 * sid + 2]; + di = -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] + + pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2]; /* Loop over the parts in cj. */ for (pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) { @@ -758,7 +756,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Get the cutoff shift. */ for (rshift = 0.0, k = 0; k < 3; k++) - rshift += shift[k] * runner_shift[3 * sid + k]; + rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ sort_i = &ci->sort[sid * (ci->count + 1)]; @@ -952,7 +950,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Get the cutoff shift. */ for (rshift = 0.0, k = 0; k < 3; k++) - rshift += shift[k] * runner_shift[3 * sid + k]; + rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ sort_i = &ci->sort[sid * (ci->count + 1)]; diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 75775c24d1d35eaef2cde66a08c4f13561ef0ae4..ee9f2ba3233e022cb8c959ef84f30ed72438e1f3 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -60,8 +60,8 @@ void runner_dopair_grav_new(struct runner *r, struct cell *ci, sid = space_getsid(e->s, &ci, &cj, shift); /* Make sure the cells are sorted. */ - runner_do_gsort(r, ci, (1 << sid), 0); - runner_do_gsort(r, cj, (1 << sid), 0); + //runner_do_gsort(r, ci, (1 << sid), 0); + //runner_do_gsort(r, cj, (1 << sid), 0); /* Have the cells been sorted? */ if (!(ci->gsorted & (1 << sid)) || !(cj->gsorted & (1 << sid))) @@ -69,7 +69,7 @@ void runner_dopair_grav_new(struct runner *r, struct cell *ci, /* Get the cutoff shift. */ for (rshift = 0.0, k = 0; k < 3; k++) - rshift += shift[k] * runner_shift[3 * sid + k]; + rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ sort_i = &ci->gsort[sid * (ci->count + 1)];