Skip to content
Snippets Groups Projects
Commit c41468aa authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

old version of iact_pair_direct_sorted is definitely slower, killing it.

parent 3184d4a6
Branches
No related tags found
No related merge requests found
......@@ -1100,232 +1100,6 @@ static inline void iact_pair_direct_unsorted(struct cell *ci, struct cell *cj) {
* @param ci The #cell containing the particles.
* @param cj The #cell containing the other particles
*/
static inline void iact_pair_direct_sorted_old(struct cell *ci, struct cell *cj) {
int i, j, k, l;
int count_i, count_j;
struct part *parts_i, *parts_j;
double cjh = cj->h;
double xi[3];
float dx[3], ai[3], mi, mj, r2, w, ir;
#ifdef SANITY_CHECKS
/* Bad stuff will happen if cell sizes are different */
if (ci->h != cj->h)
error("Non matching cell sizes !! h_i=%f h_j=%f\n", ci->h, cj->h);
#endif
/* Get the sorted indices and stuff. */
struct index *ind_i, *ind_j;
double com[4][3] = {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0},
{0.0, 0.0, 0.0}};
float com_mass[4] = {0.0, 0.0, 0.0, 0.0};
float orth1[4], orth2[4];
int num_orth_planes = 0;
get_axis(&ci, &cj, &ind_i, &ind_j, &num_orth_planes, orth1, orth2);
count_i = ci->count;
parts_i = ci->parts;
count_j = cj->count;
parts_j = cj->parts;
cjh = cj->h;
#if ICHECK >= 0
for (k = 0; k < count_i; k++)
if (parts_i[k].id == ICHECK)
message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and "
"loc=[%f,%f,%f], h=%f.",
ci->loc[0], ci->loc[1], ci->loc[2], ci->h, cj->loc[0], cj->loc[1],
cj->loc[2], cj->h);
for (k = 0; k < count_j; k++)
if (parts_j[k].id == ICHECK)
message("[DEBUG] interacting cells loc=[%f,%f,%f], h=%f and "
"loc=[%f,%f,%f], h=%f.",
cj->loc[0], cj->loc[1], cj->loc[2], cj->h, ci->loc[0], ci->loc[1],
ci->loc[2], ci->h);
#endif
/* Distance along the axis as of which we will use a multipole. */
float d_max = dist_cutoff_ratio * cjh;
/* Loop over all particles in ci... */
for (i = count_i - 1; i >= 0; i--) {
/* Get the sorted index. */
int pid = ind_i[i].ind;
float di = ind_i[i].d;
/* Init the ith particle's data. */
for (k = 0; k < 3; k++) {
xi[k] = parts_i[pid].x[k];
ai[k] = 0.0;
}
mi = parts_i[pid].mass;
/* Loop over every following particle within d_max along the axis. */
for (j = 0; j < count_j && (ind_j[j].d - di) < d_max; j++) {
/* Get the sorted index. */
int pjd = ind_j[j].ind;
/* Compute the pairwise distance. */
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = xi[k] - parts_j[pjd].x[k];
r2 += dx[k] * dx[k];
}
/* Apply the gravitational acceleration. */
ir = 1.0f / sqrtf(r2);
w = const_G * ir * ir * ir;
mj = parts_j[pjd].mass;
for (k = 0; k < 3; k++) {
float wdx = w * dx[k];
parts_j[pjd].a[k] += wdx * mi;
ai[k] -= wdx * mj;
}
if (parts_i[i].id == ICHECK)
message("Interaction with part %d - d= %f", parts_j[j].id, sqrt(r2));
#ifdef COUNTERS
++count_direct_sorted_pp;
#endif
#if ICHECK >= 0 && 0
if (parts_i[pid].id == ICHECK)
printf("[NEW] Interaction with particle id= %d (pair i)\n",
parts_j[pjd].id);
if (parts_j[j].id == ICHECK)
printf("[NEW] Interaction with particle id= %d (pair j) h_i= %f h_j= "
"%f ci= %p cj= %p count_i= %d count_j= %d d_i= %d d_j= %d\n",
parts_i[pid].id, ci->h, cj->h, ci, cj, count_i, count_j, ci->res,
cj->res);
#endif
} /* loop over every other particle. */
/* Add any remaining particles to the COM. */
for (int jj = j; jj < count_j; jj++) {
int pjd = ind_j[jj].ind;
mj = parts_j[pjd].mass;
l = 0;
#ifdef MANY_MULTIPOLES
if (num_orth_planes > 0) {
float n1 = parts_j[pjd].x[0] * orth1[0] + parts_j[pjd].x[1] * orth1[1] +
parts_j[pjd].x[2] * orth1[2] + orth1[3];
l = 2 * l + ((n1 < 0.0) ? 0 : 1);
if (num_orth_planes > 1) {
float n2 =
parts_j[pjd].x[0] * orth2[0] + parts_j[pjd].x[1] * orth2[1] +
parts_j[pjd].x[2] * orth2[2] + orth2[3];
l = 2 * l + ((n2 < 0.0) ? 0 : 1);
}
}
#endif
com[l][0] += mj * parts_j[pjd].x[0];
com[l][1] += mj * parts_j[pjd].x[1];
com[l][2] += mj * parts_j[pjd].x[2];
com_mass[l] += mj;
}
/* Shrink count_j to the latest valid particle. */
count_j = j;
/* Interact part_i with the center of mass. */
for (l = 0; l < 4; ++l) {
if (com_mass[l] > 0.0) {
float icom_mass = 1.0f / com_mass[l];
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = xi[k] - com[l][k] * icom_mass;
r2 += dx[k] * dx[k];
}
ir = 1.0f / sqrtf(r2);
w = const_G * ir * ir * ir;
for (k = 0; k < 3; k++) ai[k] -= w * dx[k] * com_mass[l];
if (parts_i[i].id == ICHECK)
message("Interaction with multipole"); //, parts_j[j].id );
#ifdef COUNTERS
++count_direct_sorted_pm_i;
#endif
}
}
/* Store the accumulated acceleration on the ith part. */
for (k = 0; k < 3; k++) parts_i[pid].a[k] += ai[k];
} /* loop over all particles in ci. */
/* Loop over the particles in cj, catch the COM interactions. */
count_j = cj->count;
int last_i = 0;
for (l = 0; l < 4; ++l) {
com[l][0] = 0.0;
com[l][1] = 0.0;
com[l][2] = 0.0;
com_mass[l] = 0.0f;
}
for (j = 0; j < count_j; j++) {
/* Get the sorted index. */
int pjd = ind_j[j].ind;
float dj = ind_j[j].d;
/* Fill the COM with any new particles. */
for (i = last_i; i < count_i && (dj - ind_i[i].d) > d_max; i++) {
int pid = ind_i[i].ind;
mi = parts_i[pid].mass;
l = 0;
#ifdef MANY_MULTIPOLES
if (num_orth_planes > 0) {
float n1 = parts_i[pid].x[0] * orth1[0] + parts_i[pid].x[1] * orth1[1] +
parts_i[pid].x[2] * orth1[2] + orth1[3];
l = 2 * l + ((n1 < 0) ? 0 : 1);
if (num_orth_planes > 1) {
float n2 =
parts_i[pid].x[0] * orth2[0] + parts_i[pid].x[1] * orth2[1] +
parts_i[pid].x[2] * orth2[2] + orth2[3];
l = 2 * l + ((n2 < 0) ? 0 : 1);
}
}
#endif
com[l][0] += parts_i[pid].x[0] * mi;
com[l][1] += parts_i[pid].x[1] * mi;
com[l][2] += parts_i[pid].x[2] * mi;
com_mass[l] += mi;
}
/* Set the new last_i to the last particle checked. */
last_i = i;
/* Interact part_j with the COM. */
for (l = 0; l < 4; ++l) {
if (com_mass[l] > 0.0) {
float icom_mass = 1.0f / com_mass[l];
for (r2 = 0.0, k = 0; k < 3; k++) {
dx[k] = com[l][k] * icom_mass - parts_j[pjd].x[k];
r2 += dx[k] * dx[k];
}
ir = 1.0f / sqrtf(r2);
w = const_G * ir * ir * ir;
for (k = 0; k < 3; k++) parts_j[pjd].a[k] += w * dx[k] * com_mass[l];
#ifdef COUNTERS
++count_direct_sorted_pm_j;
#endif
}
}
}
}
static inline void iact_pair_direct_sorted(struct cell *ci, struct cell *cj) {
struct part_local {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment