diff --git a/examples/test_bh_new.c b/examples/test_bh_new.c index 0fabd158cbaaf8cd5e53a03e2967ab4d57103ef2..2533cff9a9994a75b4863ed119ad5702f7528482 100644 --- a/examples/test_bh_new.c +++ b/examples/test_bh_new.c @@ -38,13 +38,13 @@ /* Some local constants. */ #define cell_pool_grow 1000 #define cell_maxparts 128 -#define task_limit 1 +#define task_limit 1e8 #define const_G 1 // 6.6738e-8 #define dist_min 0.5 /* Used for legacy walk only */ #define dist_cutoff_ratio 1.5 #define ICHECK -1 -#define SANITY_CHECKS +#define NO_SANITY_CHECKS #define NO_COM_AS_TASK #define NO_COUNTERS #define QUADRUPOLES @@ -70,7 +70,12 @@ struct part_local { struct multipole { #ifdef QUADRUPOLES - double I[3][3]; + double I_xx; + double I_yy; + double I_zz; + double I_xy; + double I_xz; + double I_yz; #endif double com[3]; @@ -80,7 +85,12 @@ struct multipole { struct multipole_local { #ifdef QUADRUPOLES - float I[3][3]; + float I_xx; + float I_yy; + float I_zz; + float I_xy; + float I_xz; + float I_yz; #endif float com[3]; @@ -114,7 +124,7 @@ struct cell { int res, com_tid; struct index *indices; -};// __attribute__((aligned(256))); +} __attribute__((aligned(64))); /** Task types. */ enum task_type { @@ -182,10 +192,15 @@ void comp_com(struct cell *c) { int k, count = c->count; struct cell *cp; struct part *parts = c->parts; - double com[3] = {0.0, 0.0, 0.0}, mass = 0.0; + double com[3] = {0.0, 0.0, 0.0}, mass = 0.0, imass = 0.0; #ifdef QUADRUPOLES - double I[3][3] = {{0.0, 0.0, 0.0} , {0.0, 0.0, 0.0} , {0.0, 0.0, 0.0} }; + double I_xx = 0.; + double I_yy = 0.; + double I_zz = 0.; + double I_xy = 0.; + double I_xz = 0.; + double I_yz = 0.; #endif #ifdef SANITY_CHECKS @@ -204,26 +219,16 @@ void comp_com(struct cell *c) { mass += cp_mass; #ifdef QUADRUPOLES - I[0][0] += cp->new.I[0][0]; - I[0][1] += cp->new.I[0][1]; - I[0][2] += cp->new.I[0][2]; - I[1][0] += cp->new.I[1][0]; - I[1][1] += cp->new.I[1][1]; - I[1][2] += cp->new.I[1][2]; - I[2][0] += cp->new.I[2][0]; - I[2][1] += cp->new.I[2][1]; - I[2][2] += cp->new.I[2][2]; + I_xx += cp->new.I_xx; + I_yy += cp->new.I_yy; + I_zz += cp->new.I_zz; + I_xy += cp->new.I_xy; + I_xz += cp->new.I_xz; + I_yz += cp->new.I_yz; #endif } - /* Final operation for the COM */ - float imass = 1.0f / mass; - com[0] *= imass; - com[1] *= imass; - com[2] *= imass; - - /* Otherwise, collect the multipole from the particles. */ } else { @@ -235,43 +240,34 @@ void comp_com(struct cell *c) { mass += p_mass; #ifdef QUADRUPOLES - I[0][0] += parts[k].x[0] * parts[k].x[0] * p_mass; - I[0][1] += parts[k].x[0] * parts[k].x[1] * p_mass; - I[0][2] += parts[k].x[0] * parts[k].x[2] * p_mass; - I[1][0] += parts[k].x[1] * parts[k].x[0] * p_mass; - I[1][1] += parts[k].x[1] * parts[k].x[1] * p_mass; - I[1][2] += parts[k].x[1] * parts[k].x[2] * p_mass; - I[2][0] += parts[k].x[2] * parts[k].x[0] * p_mass; - I[2][1] += parts[k].x[2] * parts[k].x[1] * p_mass; - I[2][2] += parts[k].x[2] * parts[k].x[2] * p_mass; + I_xx += parts[k].x[0] * parts[k].x[0] * p_mass; + I_yy += parts[k].x[1] * parts[k].x[1] * p_mass; + I_zz += parts[k].x[2] * parts[k].x[2] * p_mass; + I_xy += parts[k].x[0] * parts[k].x[1] * p_mass; + I_xz += parts[k].x[0] * parts[k].x[2] * p_mass; + I_yz += parts[k].x[1] * parts[k].x[2] * p_mass; #endif } - /* Final operation for the COM */ - float imass = 1.0f / mass; - com[0] *= imass; - com[1] *= imass; - com[2] *= imass; } /* Store the multipole data */ + imass = 1.0f / mass; c->new.mass = mass; - c->new.com[0] = com[0]; - c->new.com[1] = com[1]; - c->new.com[2] = com[2]; + c->new.com[0] = com[0] * imass; + c->new.com[1] = com[1] * imass; + c->new.com[2] = com[2] * imass; #ifdef QUADRUPOLES - c->new.I[0][0] = I[0][0]; - c->new.I[0][1] = I[0][1]; - c->new.I[0][2] = I[0][2]; - c->new.I[1][0] = I[1][0]; - c->new.I[1][1] = I[1][1]; - c->new.I[1][2] = I[1][2]; - c->new.I[2][0] = I[2][0]; - c->new.I[2][1] = I[2][1]; - c->new.I[2][2] = I[2][2]; + /* Store the quadrupole data */ + c->new.I_xx = I_xx; + c->new.I_yy = I_yy; + c->new.I_zz = I_zz; + c->new.I_xy = I_xy; + c->new.I_xz = I_xz; + c->new.I_yz = I_yz; #endif } @@ -524,12 +520,13 @@ static inline void make_interact_pc(struct part_local *parts, int part_count, #ifndef QUADRUPOLES - parts[i].a[0] += mrinv3 * dx[0]; - parts[i].a[1] += mrinv3 * dx[1]; - parts[i].a[2] += mrinv3 * dx[2]; + parts[i].a[0] += mrinv3 * dx[0]; + parts[i].a[1] += mrinv3 * dx[1]; + parts[i].a[2] += mrinv3 * dx[2]; #else - + + /* Follows the notation in Bonsai */ float mrinv5 = mrinv3 * ir * ir; float mrinv7 = mrinv5 * ir * ir; @@ -537,10 +534,10 @@ static inline void make_interact_pc(struct part_local *parts, int part_count, float D2 = 3.f * mrinv5; float D3 = -15.f * mrinv7; - float q = mults[j].I[0][0] + mults[j].I[1][1] + mults[j].I[2][2]; - float qRx = mults[j].I[0][0]*dx[0] + mults[j].I[0][1]*dx[1] + mults[j].I[0][2]*dx[2]; - float qRy = mults[j].I[1][0]*dx[0] + mults[j].I[1][1]*dx[1] + mults[j].I[1][2]*dx[2]; - float qRz = mults[j].I[2][0]*dx[0] + mults[j].I[2][1]*dx[1] + mults[j].I[2][2]*dx[2]; + float q = mults[j].I_xx + mults[j].I_yy + mults[j].I_zz; + float qRx = mults[j].I_xx*dx[0] + mults[j].I_xy*dx[1] + mults[j].I_xz*dx[2]; + float qRy = mults[j].I_xy*dx[0] + mults[j].I_yy*dx[1] + mults[j].I_yz*dx[2]; + float qRz = mults[j].I_xz*dx[0] + mults[j].I_yz*dx[1] + mults[j].I_zz*dx[2]; float qRR = qRx * dx[0] + qRy * dx[1] + qRz * dx[2]; float C = D1 + 0.5f * D2 * q + 0.5f * D3 * qRR; @@ -548,11 +545,10 @@ static inline void make_interact_pc(struct part_local *parts, int part_count, parts[i].a[1] -= C * dx[1] + D2 * qRy; parts[i].a[2] -= C * dx[2] + D2 * qRz; - /* message(" %f %f %f", C * dx[0] + D2 * qRx, C * dx[1] + D2 * qRy, C * dx[2] + D2 * qRz); */ #endif - } /* loop over every multipoles. */ + } /* loop over every multipole. */ } /* loop over every particle. */ } @@ -615,15 +611,12 @@ static inline void iact_pair_pc(struct cell *ci, struct cell *cj) { /* Final operation to get the quadrupoles */ double imass = 1. / cps->new.mass; - mult_local[mult_count].I[0][0] = cps->new.I[0][0]*imass - cps->new.com[0] * cps->new.com[0]; - mult_local[mult_count].I[0][1] = cps->new.I[0][1]*imass - cps->new.com[0] * cps->new.com[1]; - mult_local[mult_count].I[0][2] = cps->new.I[0][2]*imass - cps->new.com[0] * cps->new.com[2]; - mult_local[mult_count].I[1][0] = cps->new.I[1][0]*imass - cps->new.com[1] * cps->new.com[0]; - mult_local[mult_count].I[1][1] = cps->new.I[1][1]*imass - cps->new.com[1] * cps->new.com[1]; - mult_local[mult_count].I[1][2] = cps->new.I[1][2]*imass - cps->new.com[1] * cps->new.com[2]; - mult_local[mult_count].I[2][0] = cps->new.I[2][0]*imass - cps->new.com[2] * cps->new.com[0]; - mult_local[mult_count].I[2][1] = cps->new.I[2][1]*imass - cps->new.com[2] * cps->new.com[1]; - mult_local[mult_count].I[2][2] = cps->new.I[2][2]*imass - cps->new.com[2] * cps->new.com[2]; + mult_local[mult_count].I_xx = cps->new.I_xx*imass - cps->new.com[0] * cps->new.com[0]; + mult_local[mult_count].I_xy = cps->new.I_xy*imass - cps->new.com[0] * cps->new.com[1]; + mult_local[mult_count].I_xz = cps->new.I_xz*imass - cps->new.com[0] * cps->new.com[2]; + mult_local[mult_count].I_yy = cps->new.I_yy*imass - cps->new.com[1] * cps->new.com[1]; + mult_local[mult_count].I_yz = cps->new.I_yz*imass - cps->new.com[1] * cps->new.com[2]; + mult_local[mult_count].I_zz = cps->new.I_zz*imass - cps->new.com[2] * cps->new.com[2]; #endif mult_count++; @@ -1281,14 +1274,13 @@ void interact_exact(int N, struct part *parts, int monitor) { */ void test_bh(int N, int nr_threads, int runs, char *fileName) { - int i,j, k; + int i, k; struct cell *root; struct part *parts; FILE *file; struct qsched s; ticks tic, toc_run, tot_setup = 0, tot_run = 0; int countMultipoles = 0, countPairs = 0, countCoMs = 0; - // double I[3][3] = {{0., 0., 0.},{0., 0., 0.},{0., 0., 0.}}; /* Runner function. */ void runner(int type, void * data) { @@ -1341,23 +1333,6 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { parts[k].a_legacy[1] = 0.0; parts[k].a_legacy[2] = 0.0; } - j++; - /* for (i = 0; i < 4; i++) { */ - /* for (j = 0; j < 4; j++) { */ - /* for (k = 0; k < 4; k++) { */ - /* int l = 16*i + 4*j + k; */ - /* parts[l].id = l; */ - /* parts[l].x[0] = 0.125 + 0.25 * i; */ - /* parts[l].x[1] = 0.125 + 0.25 * j; */ - /* parts[l].x[2] = 0.125 + 0.25 * k; */ - /* parts[l].mass = 1.; */ - /* parts[l].a_legacy[0] = 0.0; */ - /* parts[l].a_legacy[1] = 0.0; */ - /* parts[l].a_legacy[2] = 0.0; */ - - /* } */ - /* } */ - /* } */ /* Otherwise, read them from a file. */ } else { @@ -1401,7 +1376,7 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { } message("Average number of parts per leaf is %f.", ((double)N) / nr_leaves); -#if 1 //ICHECK > 0 +#if ICHECK > 0 printf("----------------------------------------------------------\n"); /* Do a N^2 interactions calculation */ @@ -1482,42 +1457,6 @@ void test_bh(int N, int nr_threads, int runs, char *fileName) { tot_run += toc_run - tic; } - /* for (k = 0; k < N; ++k) { */ - /* I[0][0] += parts[k].mass * parts[k].x[0] * parts[k].x[0]; */ - /* I[0][1] += parts[k].mass * parts[k].x[0] * parts[k].x[1]; */ - /* I[0][2] += parts[k].mass * parts[k].x[0] * parts[k].x[2]; */ - /* I[1][0] += parts[k].mass * parts[k].x[1] * parts[k].x[0]; */ - /* I[1][1] += parts[k].mass * parts[k].x[1] * parts[k].x[1]; */ - /* I[1][2] += parts[k].mass * parts[k].x[1] * parts[k].x[2]; */ - /* I[2][0] += parts[k].mass * parts[k].x[2] * parts[k].x[0]; */ - /* I[2][1] += parts[k].mass * parts[k].x[2] * parts[k].x[1]; */ - /* I[2][2] += parts[k].mass * parts[k].x[2] * parts[k].x[2]; */ - /* } */ - - /* for (k = 0; k < N; ++k) { */ - /* I[0][0] = I[0][0] / root->new.mass - root->new.com[0]*root->new.com[0]; */ - /* I[0][1] = I[0][1] / root->new.mass - root->new.com[0]*root->new.com[1]; */ - /* I[0][2] = I[0][2] / root->new.mass - root->new.com[0]*root->new.com[2]; */ - /* I[1][0] = I[1][0] / root->new.mass - root->new.com[1]*root->new.com[0]; */ - /* I[1][1] = I[1][1] / root->new.mass - root->new.com[1]*root->new.com[1]; */ - /* I[1][2] = I[1][2] / root->new.mass - root->new.com[1]*root->new.com[2]; */ - /* I[2][0] = I[2][0] / root->new.mass - root->new.com[2]*root->new.com[0]; */ - /* I[2][1] = I[2][1] / root->new.mass - root->new.com[2]*root->new.com[1]; */ - /* I[2][2] = I[2][2] / root->new.mass - root->new.com[2]*root->new.com[2]; */ - - /* } */ - - /* message("[check] root mass= %f %f", root->legacy.mass, root->new.mass); */ - /* message("[check] root CoMx= %f %f", root->legacy.com[0], root->new.com[0]); */ - /* message("[check] root CoMy= %f %f", root->legacy.com[1], root->new.com[1]); */ - /* message("[check] root CoMz= %f %f", root->legacy.com[2], root->new.com[2]); */ - /* message(" | %f %f %f |", root->new.I[0][0], root->new.I[0][1], root->new.I[0][2]); */ - /* message("In=| %f %f %f |", root->new.I[1][0], root->new.I[1][1], root->new.I[1][2]); */ - /* message(" | %f %f %f |", root->new.I[2][0], root->new.I[2][1], root->new.I[2][2]); */ - /* message("------------------------------------------------") */ - /* message(" | %f %f %f |", I[0][0], I[0][1], I[0][2]); */ - /* message("I =| %f %f %f |", I[1][0], I[1][1], I[1][2]); */ - /* message(" | %f %f %f |", I[2][0], I[2][1], I[2][2]); */ #if ICHECK >= 0 for (i = 0; i < N; ++i)