diff --git a/.gitignore b/.gitignore index 2a16a4a681b86d30940001fb2ce86dfb9d27a558..4e2f82309eb0dbf56f8eab50d2df14b65865d858 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ examples/*/*/*.hdf5 examples/*/*/*.png examples/*/*/*.txt examples/*/*/used_parameters.yml +examples/*/gravity_checks_*.dat tests/testPair tests/brute_force_standard.dat @@ -88,6 +89,8 @@ theory/SPH/Flavours/sph_flavours.pdf theory/SPH/EoS/eos.pdf theory/SPH/*.pdf theory/paper_pasc/pasc_paper.pdf +theory/Multipoles/fmm.pdf +theory/Multipoles/fmm_standalone.pdf m4/libtool.m4 m4/ltoptions.m4 diff --git a/README b/README index bec538bfc35239945d60487d66ed2e02b5d363a2..6e2b2cb2304aaa1790646de5316b7c08362779bf 100644 --- a/README +++ b/README @@ -31,6 +31,7 @@ Valid options are: -s Run with hydrodynamics. -S Run with stars. -t {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified. + -T Print timers every time-step. -v [12] Increase the level of verbosity 1: MPI-rank 0 writes 2: All MPI-ranks write diff --git a/examples/EAGLE_12/eagle_12.yml b/examples/EAGLE_12/eagle_12.yml index 67ce7c530263f7905a0d5147832dfc39148d753d..e4eb43672a2e80da2f7ee558cebbebe92a28e269 100644 --- a/examples/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_12/eagle_12.yml @@ -28,7 +28,8 @@ Statistics: # Parameters for the self-gravity scheme Gravity: - eta: 0.025 # Constant dimensionless multiplier for time integration. + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion) epsilon: 0.0001 # Softening length (in internal units). a_smooth: 1000. r_cut: 4. diff --git a/examples/UniformDMBox/uniformBox.yml b/examples/UniformDMBox/uniformBox.yml new file mode 100644 index 0000000000000000000000000000000000000000..8d9ec300164a7bf8f3df257c34ee44d4f77fe94e --- /dev/null +++ b/examples/UniformDMBox/uniformBox.yml @@ -0,0 +1,38 @@ +# Define the system of units to use internally. +InternalUnitSystem: + UnitMass_in_cgs: 1 # Grams + UnitLength_in_cgs: 1 # Centimeters + UnitVelocity_in_cgs: 1 # Centimeters per second + UnitCurrent_in_cgs: 1 # Amperes + UnitTemp_in_cgs: 1 # Kelvin + +# Parameters governing the time integration +TimeIntegration: + time_begin: 0. # The starting time of the simulation (in internal units). + time_end: 1. # The end time of the simulation (in internal units). + dt_min: 1e-6 # The minimal time-step size of the simulation (in internal units). + dt_max: 1e-3 # The maximal time-step size of the simulation (in internal units). + +Scheduler: + max_top_level_cells: 8 + cell_split_size: 50 + +# Parameters governing the snapshots +Snapshots: + basename: uniformDMBox # Common part of the name of output files + time_first: 0. # Time of the first output (in internal units) + delta_time: 0.01 # Time difference between consecutive outputs (in internal units) + +# Parameters for the self-gravity scheme +Gravity: + eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion) + epsilon: 0.00001 # Softening length (in internal units). + +# Parameters governing the conserved quantities statistics +Statistics: + delta_time: 1e-2 # Time between statistics output + +# Parameters related to the initial conditions +InitialConditions: + file_name: ./uniformDMBox_100.hdf5 # The file to read diff --git a/examples/main.c b/examples/main.c index 9cde1062a5e0d1d190fd3139400f82db5f445545..5f42f3e06ea46d0c3b73363956f470016e5d1498 100644 --- a/examples/main.c +++ b/examples/main.c @@ -90,6 +90,7 @@ void print_help_message() { printf(" %2s %8s %s\n", "-t", "{int}", "The number of threads to use on each MPI rank. Defaults to 1 if not " "specified."); + printf(" %2s %8s %s\n", "-T", "", "Print timers every time-step."); printf(" %2s %8s %s\n", "-v", "[12]", "Increase the level of verbosity."); printf(" %2s %8s %s\n", "", "", "1: MPI-rank 0 writes "); printf(" %2s %8s %s\n", "", "", "2: All MPI-ranks write"); @@ -604,6 +605,7 @@ int main(int argc, char *argv[]) { printf("# %6s %14s %14s %10s %10s %10s %16s [%s]\n", "Step", "Time", "Time-step", "Updates", "g-Updates", "s-Updates", "Wall-clock time", clocks_getunit()); + if (with_verbose_timers) { printf("timers: "); for (int k = 0; k < timer_count; k++) printf("%s\t", timers_names[k]); @@ -626,6 +628,7 @@ int main(int argc, char *argv[]) { for (int k = 0; k < timer_count; k++) printf("%.3f\t", clocks_from_ticks(timers[k])); printf("\n"); + timers_reset(0xFFFFFFFFllu); } #ifdef SWIFT_DEBUG_TASKS diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 501863a7047df33592fe77b523fdb211dfb7d16b..14bc60bc1e1c05ecdc66fb7ac828102b1d5748bf 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -52,6 +52,7 @@ SPH: # Parameters for the self-gravity scheme Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. + theta: 0.7 # Opening angle (Multipole acceptance criterion) epsilon: 0.1 # Softening length (in internal units). a_smooth: 1.25 # (Optional) Smoothing scale in top-level cell sizes to smooth the long-range forces over (this is the default value). r_cut: 4.5 # (Optional) Cut-off in number of top-level cells beyond which no FMM forces are computed (this is the default value). diff --git a/examples/plot_gravity_checks.py b/examples/plot_gravity_checks.py new file mode 100644 index 0000000000000000000000000000000000000000..41d9d629899f5c34cbb0264b38547a3439c04bf3 --- /dev/null +++ b/examples/plot_gravity_checks.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python + +import sys +import glob +import re +import numpy as np +import matplotlib.pyplot as plt + +params = {'axes.labelsize': 14, +'axes.titlesize': 18, +'font.size': 12, +'legend.fontsize': 12, +'xtick.labelsize': 14, +'ytick.labelsize': 14, +'text.usetex': True, +'figure.figsize': (10, 10), +'figure.subplot.left' : 0.06, +'figure.subplot.right' : 0.99 , +'figure.subplot.bottom' : 0.06 , +'figure.subplot.top' : 0.99 , +'figure.subplot.wspace' : 0.14 , +'figure.subplot.hspace' : 0.14 , +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +plt.rcParams.update(params) +plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + +min_error = 1e-7 +max_error = 3e-1 +num_bins = 64 + +# Construct the bins +bin_edges = np.linspace(np.log10(min_error), np.log10(max_error), num_bins + 1) +bin_size = (np.log10(max_error) - np.log10(min_error)) / num_bins +bins = 0.5*(bin_edges[1:] + bin_edges[:-1]) +bin_edges = 10**bin_edges +bins = 10**bins + +# Colours +cols = ['#332288', '#88CCEE', '#117733', '#DDCC77', '#CC6677'] + +# Time-step to plot +step = int(sys.argv[1]) + +# Find the files for the different expansion orders +order_list = glob.glob("gravity_checks_swift_step%d_order*.dat"%step) +num_order = len(order_list) + +# Get the multipole orders +order = np.zeros(num_order) +for i in range(num_order): + order[i] = int(order_list[i][32]) +order = sorted(order) +order_list = sorted(order_list) + +# Read the exact accelerations first +data = np.loadtxt('gravity_checks_exact_step%d.dat'%step) +exact_ids = data[:,0] +exact_pos = data[:,1:4] +exact_a = data[:,4:7] +# Sort stuff +sort_index = np.argsort(exact_ids) +exact_ids = exact_ids[sort_index] +exact_pos = exact_pos[sort_index, :] +exact_a = exact_a[sort_index, :] +exact_a_norm = np.sqrt(exact_a[:,0]**2 + exact_a[:,1]**2 + exact_a[:,2]**2) + +# Start the plot +plt.figure() + +count = 0 + +# Get the Gadget-2 data if existing +gadget2_file_list = glob.glob("forcetest_gadget2.txt") +if len(gadget2_file_list) != 0: + + gadget2_data = np.loadtxt(gadget2_file_list[0]) + gadget2_ids = gadget2_data[:,0] + gadget2_pos = gadget2_data[:,1:4] + gadget2_a_exact = gadget2_data[:,4:7] + gadget2_a_grav = gadget2_data[:, 7:10] + + # Sort stuff + sort_index = np.argsort(gadget2_ids) + gadget2_ids = gadget2_ids[sort_index] + gadget2_pos = gadget2_pos[sort_index, :] + gadget2_a_exact = gadget2_a_exact[sort_index, :] + gadget2_a_grav = gadget2_a_grav[sort_index, :] + + # Cross-checks + if not np.array_equal(exact_ids, gadget2_ids): + print "Comparing different IDs !" + + if np.max(np.abs(exact_pos - gadget2_pos)/np.abs(gadget2_pos)) > 1e-6: + print "Comparing different positions ! max difference:" + index = np.argmax(exact_pos[:,0]**2 + exact_pos[:,1]**2 + exact_pos[:,2]**2 - gadget2_pos[:,0]**2 - gadget2_pos[:,1]**2 - gadget2_pos[:,2]**2) + print "Gadget2 (id=%d):"%gadget2_ids[index], gadget2_pos[index,:], "exact (id=%d):"%exact_ids[index], exact_pos[index,:], "\n" + + if np.max(np.abs(exact_a - gadget2_a_exact) / np.abs(gadget2_a_exact)) > 2e-6: + print "Comparing different exact accelerations ! max difference:" + index = np.argmax(exact_a[:,0]**2 + exact_a[:,1]**2 + exact_a[:,2]**2 - gadget2_a_exact[:,0]**2 - gadget2_a_exact[:,1]**2 - gadget2_a_exact[:,2]**2) + print "a_exact --- Gadget2:", gadget2_a_exact[index,:], "exact:", exact_a[index,:] + print "pos --- Gadget2: (id=%d):"%gadget2_ids[index], gadget2_pos[index,:], "exact (id=%d):"%ids[index], pos[index,:],"\n" + + + # Compute the error norm + diff = gadget2_a_exact - gadget2_a_grav + + norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2) + norm_a = np.sqrt(gadget2_a_exact[:,0]**2 + gadget2_a_exact[:,1]**2 + gadget2_a_exact[:,2]**2) + + norm_error = norm_diff / norm_a + error_x = abs(diff[:,0]) / norm_a + error_y = abs(diff[:,1]) / norm_a + error_z = abs(diff[:,2]) / norm_a + + # Bin the error + norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) + error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) + error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) + error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) + + norm_median = np.median(norm_error) + median_x = np.median(error_x) + median_y = np.median(error_y) + median_z = np.median(error_z) + + norm_per99 = np.percentile(norm_error,99) + per99_x = np.percentile(error_x,99) + per99_y = np.percentile(error_y,99) + per99_z = np.percentile(error_z,99) + + norm_max = np.max(norm_error) + max_x = np.max(error_x) + max_y = np.max(error_y) + max_z = np.max(error_z) + + print "Gadget-2 ---- " + print "Norm: median= %f 99%%= %f max= %f"%(norm_median, norm_per99, norm_max) + print "X : median= %f 99%%= %f max= %f"%(median_x, per99_x, max_x) + print "Y : median= %f 99%%= %f max= %f"%(median_y, per99_y, max_y) + print "Z : median= %f 99%%= %f max= %f"%(median_z, per99_z, max_z) + print "" + + plt.subplot(221) + plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(norm_median, norm_per99), ha="left", va="top", alpha=0.8) + plt.semilogx(bins, norm_error_hist, 'k--', label="Gadget-2", alpha=0.8) + plt.subplot(222) + plt.semilogx(bins, error_x_hist, 'k--', label="Gadget-2", alpha=0.8) + plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_x, per99_x), ha="left", va="top", alpha=0.8) + plt.subplot(223) + plt.semilogx(bins, error_y_hist, 'k--', label="Gadget-2", alpha=0.8) + plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_y, per99_y), ha="left", va="top", alpha=0.8) + plt.subplot(224) + plt.semilogx(bins, error_z_hist, 'k--', label="Gadget-2", alpha=0.8) + plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_z, per99_z), ha="left", va="top", alpha=0.8) + + count += 1 + + +# Plot the different histograms +for i in range(num_order): + data = np.loadtxt(order_list[i]) + ids = data[:,0] + pos = data[:,1:4] + a_grav = data[:, 4:7] + + # Sort stuff + sort_index = np.argsort(ids) + ids = ids[sort_index] + pos = pos[sort_index, :] + a_grav = a_grav[sort_index, :] + + # Cross-checks + if not np.array_equal(exact_ids, ids): + print "Comparing different IDs !" + + if np.max(np.abs(exact_pos - pos)/np.abs(pos)) > 1e-6: + print "Comparing different positions ! max difference:" + index = np.argmax(exact_pos[:,0]**2 + exact_pos[:,1]**2 + exact_pos[:,2]**2 - pos[:,0]**2 - pos[:,1]**2 - pos[:,2]**2) + print "SWIFT (id=%d):"%ids[index], pos[index,:], "exact (id=%d):"%exact_ids[index], exact_pos[index,:], "\n" + + + # Compute the error norm + diff = exact_a - a_grav + + norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2) + + norm_error = norm_diff / exact_a_norm + error_x = abs(diff[:,0]) / exact_a_norm + error_y = abs(diff[:,1]) / exact_a_norm + error_z = abs(diff[:,2]) / exact_a_norm + + # Bin the error + norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) + error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) + error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) + error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) + + norm_median = np.median(norm_error) + median_x = np.median(error_x) + median_y = np.median(error_y) + median_z = np.median(error_z) + + norm_per99 = np.percentile(norm_error,99) + per99_x = np.percentile(error_x,99) + per99_y = np.percentile(error_y,99) + per99_z = np.percentile(error_z,99) + + norm_max = np.max(norm_error) + max_x = np.max(error_x) + max_y = np.max(error_y) + max_z = np.max(error_z) + + print "Order %d ---- "%order[i] + print "Norm: median= %f 99%%= %f max= %f"%(norm_median, norm_per99, norm_max) + print "X : median= %f 99%%= %f max= %f"%(median_x, per99_x, max_x) + print "Y : median= %f 99%%= %f max= %f"%(median_y, per99_y, max_y) + print "Z : median= %f 99%%= %f max= %f"%(median_z, per99_z, max_z) + print "" + + plt.subplot(221) + plt.semilogx(bins, norm_error_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) + plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(norm_median, norm_per99), ha="left", va="top", color=cols[i]) + plt.subplot(222) + plt.semilogx(bins, error_x_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) + plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_x, per99_x), ha="left", va="top", color=cols[i]) + plt.subplot(223) + plt.semilogx(bins, error_y_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) + plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_y, per99_y), ha="left", va="top", color=cols[i]) + plt.subplot(224) + plt.semilogx(bins, error_z_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) + plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_z, per99_z), ha="left", va="top", color=cols[i]) + + count += 1 + +plt.subplot(221) +plt.xlabel("$|\delta \overrightarrow{a}|/|\overrightarrow{a}_{exact}|$") +#plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0,2.5) +plt.legend(loc="upper left") +plt.subplot(222) +plt.xlabel("$\delta a_x/|\overrightarrow{a}_{exact}|$") +#plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0,1.75) +#plt.legend(loc="center left") +plt.subplot(223) +plt.xlabel("$\delta a_y/|\overrightarrow{a}_{exact}|$") +#plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0,1.75) +#plt.legend(loc="center left") +plt.subplot(224) +plt.xlabel("$\delta a_z/|\overrightarrow{a}_{exact}|$") +#plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0,1.75) +#plt.legend(loc="center left") + + + +plt.savefig("gravity_checks_step%d.png"%step, dpi=200) +plt.savefig("gravity_checks_step%d.pdf"%step, dpi=200) diff --git a/src/Makefile.am b/src/Makefile.am index 14e435f663f01d8faa5f12720398b58633300093..1da7a0d955e488c0b96ee209080b5438356a36bc 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -46,7 +46,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \ sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \ dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \ - vector_power.h hydro_space.h sort_part.h + vector_power.h collectgroup.h hydro_space.h sort_part.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ @@ -57,7 +57,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \ statistics.c runner_doiact_vec.c profiler.c dump.c logger.c \ part_type.c xmf.c gravity_properties.c gravity.c \ - hydro_space.c + collectgroup.c hydro_space.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ diff --git a/src/cell.c b/src/cell.c index a9c3cc9cb94c3a06ce92abc3cb7904100840f630..95e8619d0c392f753aaa3c646868af4e360ef138 100644 --- a/src/cell.c +++ b/src/cell.c @@ -1018,14 +1018,15 @@ void cell_clean_links(struct cell *c, void *data) { } /** - * @brief Checks that a cell is at the current point in time + * @brief Checks that the particles in a cell are at the + * current point in time * * Calls error() if the cell is not at the current time. * * @param c Cell to act upon * @param data The current time on the integer time-line */ -void cell_check_drift_point(struct cell *c, void *data) { +void cell_check_particle_drift_point(struct cell *c, void *data) { #ifdef SWIFT_DEBUG_CHECKS @@ -1057,6 +1058,34 @@ void cell_check_drift_point(struct cell *c, void *data) { #endif } +/** + * @brief Checks that the multipole of a cell is at the current point in time + * + * Calls error() if the cell is not at the current time. + * + * @param c Cell to act upon + * @param data The current time on the integer time-line + */ +void cell_check_multipole_drift_point(struct cell *c, void *data) { + +#ifdef SWIFT_DEBUG_CHECKS + + const integertime_t ti_drift = *(integertime_t *)data; + + /* Only check local cells */ + if (c->nodeID != engine_rank) return; + + if (c->ti_old_multipole != ti_drift) + error( + "Cell multipole in an incorrect time-zone! c->ti_old_multipole=%lld " + "ti_drift=%lld (depth=%d)", + c->ti_old_multipole, ti_drift, c->depth); + +#else + error("Calling debugging code without debugging flag activated."); +#endif +} + /** * @brief Resets all the individual cell task counters to 0. * @@ -1129,13 +1158,24 @@ void cell_check_multipole(struct cell *c, void *data) { /* Now compare the multipole expansion */ if (!gravity_multipole_equal(&ma, c->multipole, tolerance)) { - message("Multipoles are not equal at depth=%d!", c->depth); + message("Multipoles are not equal at depth=%d! tol=%f", c->depth, + tolerance); message("Correct answer:"); gravity_multipole_print(&ma.m_pole); message("Recursive multipole:"); gravity_multipole_print(&c->multipole->m_pole); error("Aborting"); } + + /* Check that the upper limit of r_max is good enough */ + if (!(c->multipole->r_max >= ma.r_max)) { + error("Upper-limit r_max=%e too small. Should be >=%e.", + c->multipole->r_max, ma.r_max); + } else if (c->multipole->r_max * c->multipole->r_max > + 3. * c->width[0] * c->width[0]) { + error("r_max=%e larger than cell diagonal %e.", c->multipole->r_max, + sqrt(3. * c->width[0] * c->width[0])); + } } #else error("Calling debugging code without debugging flag activated."); @@ -1529,17 +1569,15 @@ void cell_drift_all_multipoles(struct cell *c, const struct engine *e) { /* Check that we are actually going to move forward. */ if (ti_current < ti_old_multipole) error("Attempt to drift to the past"); + /* Drift the multipole */ + if (ti_current > ti_old_multipole) gravity_drift(c->multipole, dt); + /* Are we not in a leaf ? */ if (c->split) { - /* Loop over the progeny and drift the multipoles. */ + /* Loop over the progeny and recurse. */ for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) cell_drift_particles(c->progeny[k], e); - - } else if (ti_current > ti_old_multipole) { - - /* Drift the multipole */ - gravity_drift(c->multipole, dt); + if (c->progeny[k] != NULL) cell_drift_all_multipoles(c->progeny[k], e); } /* Update the time of the last drift */ diff --git a/src/cell.h b/src/cell.h index 54a1dde5776c3b6274dc0486135643e76889c225..64b874095e38d9a3919bbcdf00e48572a976a469 100644 --- a/src/cell.h +++ b/src/cell.h @@ -355,7 +355,8 @@ int cell_are_neighbours(const struct cell *restrict ci, const struct cell *restrict cj); void cell_check_multipole(struct cell *c, void *data); void cell_clean(struct cell *c); -void cell_check_drift_point(struct cell *c, void *data); +void cell_check_particle_drift_point(struct cell *c, void *data); +void cell_check_multipole_drift_point(struct cell *c, void *data); void cell_reset_task_counters(struct cell *c); int cell_is_drift_needed(struct cell *c, const struct engine *e); int cell_unskip_tasks(struct cell *c, struct scheduler *s); diff --git a/src/collectgroup.c b/src/collectgroup.c new file mode 100644 index 0000000000000000000000000000000000000000..0b4ddc405772a45a1e444ef48b65fcb7d37a248f --- /dev/null +++ b/src/collectgroup.c @@ -0,0 +1,197 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "collectgroup.h" + +/* Local headers. */ +#include "engine.h" +#include "error.h" + +#ifdef WITH_MPI + +/* Local collections for MPI reduces. */ +struct mpicollectgroup1 { + size_t updates, g_updates, s_updates; + integertime_t ti_end_min; + int forcerebuild; +}; + +/* Forward declarations. */ +static void mpicollect_create_MPI_type(); + +/** + * @brief MPI datatype for the #mpicollectgroup1 structure. + */ +static MPI_Datatype mpicollectgroup1_type; + +/** + * @brief MPI operator to reduce #mpicollectgroup1 structures. + */ +static MPI_Op mpicollectgroup1_reduce_op; + +#endif + +/** + * @brief Perform any once only initialisations. Must be called once. + */ +void collectgroup_init() { + +#ifdef WITH_MPI + /* Initialise the MPI types. */ + mpicollect_create_MPI_type(); +#endif +} + +/** + * @brief Apply the collectgroup1 values to the engine by copying all the + * values to the engine fields. + * + * @param grp1 The #collectgroup1 + * @param e The #engine + */ +void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e) { + e->ti_end_min = grp1->ti_end_min; + e->ti_end_max = grp1->ti_end_max; + e->ti_beg_max = grp1->ti_beg_max; + e->updates = grp1->updates; + e->g_updates = grp1->g_updates; + e->s_updates = grp1->s_updates; + e->forcerebuild = grp1->forcerebuild; +} + +/** + * @brief Initialises a collectgroup1 struct ready for processing. + * + * @param grp1 The #collectgroup1 to initialise + * @param updates the number of updated hydro particles on this node this step. + * @param g_updates the number of updated gravity particles on this node this + * step. + * @param s_updates the number of updated star particles on this node this step. + * @param ti_end_min the minimum end time for next time step after this step. + * @param ti_end_max the maximum end time for next time step after this step. + * @param ti_beg_max the maximum begin time for next time step after this step. + * @param forcerebuild whether a rebuild is required after this step. + */ +void collectgroup1_init(struct collectgroup1 *grp1, size_t updates, + size_t g_updates, size_t s_updates, + integertime_t ti_end_min, integertime_t ti_end_max, + integertime_t ti_beg_max, int forcerebuild) { + grp1->updates = updates; + grp1->g_updates = g_updates; + grp1->s_updates = s_updates; + grp1->ti_end_min = ti_end_min; + grp1->ti_end_max = ti_end_max; + grp1->ti_beg_max = ti_beg_max; + grp1->forcerebuild = forcerebuild; +} + +/** + * @brief Do any processing necessary to the group before it can be used. + * + * This may involve an MPI reduction across all nodes. + * + * @param grp1 the #collectgroup1 struct already initialised by a call + * to collectgroup1_init. + */ +void collectgroup1_reduce(struct collectgroup1 *grp1) { + +#ifdef WITH_MPI + + /* Populate an MPI group struct and reduce this across all nodes. */ + struct mpicollectgroup1 mpigrp11; + mpigrp11.updates = grp1->updates; + mpigrp11.g_updates = grp1->g_updates; + mpigrp11.s_updates = grp1->s_updates; + mpigrp11.ti_end_min = grp1->ti_end_min; + mpigrp11.forcerebuild = grp1->forcerebuild; + + struct mpicollectgroup1 mpigrp12; + if (MPI_Allreduce(&mpigrp11, &mpigrp12, 1, mpicollectgroup1_type, + mpicollectgroup1_reduce_op, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to reduce mpicollection1."); + + /* And update. */ + grp1->updates = mpigrp12.updates; + grp1->g_updates = mpigrp12.g_updates; + grp1->s_updates = mpigrp12.s_updates; + grp1->ti_end_min = mpigrp12.ti_end_min; + grp1->forcerebuild = mpigrp12.forcerebuild; + +#endif +} + +#ifdef WITH_MPI +/** + * @brief Do the reduction of two structs. + * + * @param mpigrp11 the first struct, this is updated on exit. + * @param mpigrp12 the second struct + */ +static void doreduce1(struct mpicollectgroup1 *mpigrp11, + const struct mpicollectgroup1 *mpigrp12) { + + /* Do what is needed for each part of the collection. */ + /* Sum of updates. */ + mpigrp11->updates += mpigrp12->updates; + mpigrp11->g_updates += mpigrp12->g_updates; + mpigrp11->s_updates += mpigrp12->s_updates; + + /* Minimum end time. */ + mpigrp11->ti_end_min = min(mpigrp11->ti_end_min, mpigrp12->ti_end_min); + + /* Everyone must agree to not rebuild. */ + if (mpigrp11->forcerebuild || mpigrp12->forcerebuild) + mpigrp11->forcerebuild = 1; +} + +/** + * @brief MPI reduce operator for #mpicollectgroup structures. + */ +static void mpicollectgroup1_reduce(void *in, void *inout, int *len, + MPI_Datatype *datatype) { + + for (int i = 0; i < *len; ++i) + doreduce1(&((struct mpicollectgroup1 *)inout)[0], + &((const struct mpicollectgroup1 *)in)[i]); +} + +/** + * @brief Registers any MPI collection types and reduction functions. + */ +static void mpicollect_create_MPI_type() { + + if (MPI_Type_contiguous(sizeof(struct mpicollectgroup1), MPI_BYTE, + &mpicollectgroup1_type) != MPI_SUCCESS || + MPI_Type_commit(&mpicollectgroup1_type) != MPI_SUCCESS) { + error("Failed to create MPI type for mpicollection1."); + } + + /* Create the reduction operation */ + MPI_Op_create(mpicollectgroup1_reduce, 1, &mpicollectgroup1_reduce_op); +} +#endif diff --git a/src/collectgroup.h b/src/collectgroup.h new file mode 100644 index 0000000000000000000000000000000000000000..b0cbf0763d94dec65ea1aae2c521c1c4b9f7bf83 --- /dev/null +++ b/src/collectgroup.h @@ -0,0 +1,55 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_COLLECTGROUP_H +#define SWIFT_COLLECTGROUP_H + +/* Config parameters. */ +#include "../config.h" + +/* Standard headers. */ +#include <stddef.h> + +/* Local headers. */ +#include "timeline.h" + +/* Forward declaration of engine struct (to avoid cyclic include). */ +struct engine; + +/* A collection of global quantities that can be processed at the same time. */ +struct collectgroup1 { + + /* Number of particles updated */ + size_t updates, g_updates, s_updates; + + /* Times for the time-step */ + integertime_t ti_end_min, ti_end_max, ti_beg_max; + + /* Force the engine to rebuild? */ + int forcerebuild; +}; + +void collectgroup_init(); +void collectgroup1_apply(struct collectgroup1 *grp1, struct engine *e); +void collectgroup1_init(struct collectgroup1 *grp1, size_t updates, + size_t g_updates, size_t s_updates, + integertime_t ti_end_min, integertime_t ti_end_max, + integertime_t ti_beg_max, int forcerebuild); +void collectgroup1_reduce(struct collectgroup1 *grp1); + +#endif /* SWIFT_COLLECTGROUP_H */ diff --git a/src/engine.c b/src/engine.c index 496e1c4467ef2ba5cd857ca339165ec7b6d51af9..c0b2bde3643c3378b040f6fc95e48a388356b325 100644 --- a/src/engine.c +++ b/src/engine.c @@ -836,7 +836,8 @@ void engine_repartition(struct engine *e) { fflush(stdout); /* Check that all cells have been drifted to the current time */ - space_check_drift_point(e->s, e->ti_old); + space_check_drift_point(e->s, e->ti_old, + e->policy & engine_policy_self_gravity); #endif /* Clear the repartition flag. */ @@ -1635,9 +1636,9 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, * @brief Constructs the top-level tasks for the short-range gravity * interactions. * - * All top-cells get a self task. - * All neighbouring pairs get a pair task. - * All non-neighbouring pairs within a range of 6 cells get a M-M task. + * - All top-cells get a self task. + * - All pairs within range according to the multipole acceptance + * criterion get a pair task. * * @param e The #engine. */ @@ -1646,6 +1647,7 @@ void engine_make_self_gravity_tasks(struct engine *e) { struct space *s = e->s; struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; + const double theta_crit_inv = e->gravity_properties->theta_crit_inv; struct cell *cells = s->cells_top; const int nr_cells = s->nr_cells; @@ -1656,13 +1658,14 @@ void engine_make_self_gravity_tasks(struct engine *e) { /* Skip cells without gravity particles */ if (ci->gcount == 0) continue; - /* Is that neighbour local ? */ + /* Is that cell local ? */ if (ci->nodeID != nodeID) continue; /* If the cells is local build a self-interaction */ scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL, 0); + /* Loop over every other cell */ for (int cjd = cid + 1; cjd < nr_cells; ++cjd) { struct cell *cj = &cells[cjd]; @@ -1671,9 +1674,11 @@ void engine_make_self_gravity_tasks(struct engine *e) { if (cj->gcount == 0) continue; /* Is that neighbour local ? */ - if (cj->nodeID != nodeID) continue; + if (cj->nodeID != nodeID) continue; // MATTHIEU - if (cell_are_neighbours(ci, cj)) + /* Are the cells to close for a MM interaction ? */ + if (!gravity_multipole_accept(ci->multipole, cj->multipole, + theta_crit_inv)) scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, ci, cj, 1); } @@ -2711,7 +2716,8 @@ void engine_rebuild(struct engine *e) { /* Check that all cells have been drifted to the current time. * That can include cells that have not * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_old); + space_check_drift_point(e->s, e->ti_old, + e->policy & engine_policy_self_gravity); #endif if (e->verbose) @@ -2733,7 +2739,8 @@ void engine_prepare(struct engine *e) { /* Check that all cells have been drifted to the current time. * That can include cells that have not * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_old); + space_check_drift_point(e->s, e->ti_old, + e->policy & engine_policy_self_gravity); } #endif @@ -2849,12 +2856,23 @@ void engine_collect_kick(struct cell *c) { } /** - * @brief Collects the next time-step by making each super-cell recurse - * to collect the minimal of ti_end and the number of updated particles. + * @brief Collects the next time-step and rebuild flag. + * + * The next time-step is determined by making each super-cell recurse to + * collect the minimal of ti_end and the number of updated particles. When in + * MPI mode this routines reduces these across all nodes and also collects the + * forcerebuild flag -- this is so that we only use a single collective MPI + * call per step for all these values. + * + * Note that the results are stored in e->collect_group1 struct not in the + * engine fields, unless apply is true. These can be applied field-by-field + * or all at once using collectgroup1_copy(); * * @param e The #engine. + * @param apply whether to apply the results to the engine or just keep in the + * group1 struct. */ -void engine_collect_timestep(struct engine *e) { +void engine_collect_timestep_and_rebuild(struct engine *e, int apply) { const ticks tic = getticks(); int updates = 0, g_updates = 0, s_updates = 0; @@ -2884,37 +2902,59 @@ void engine_collect_timestep(struct engine *e) { } } -/* Aggregate the data from the different nodes. */ + /* Store these in the temporary collection group. */ + collectgroup1_init(&e->collect_group1, updates, g_updates, s_updates, + ti_end_min, ti_end_max, ti_beg_max, e->forcerebuild); + +/* Aggregate collective data from the different nodes for this step. */ #ifdef WITH_MPI + collectgroup1_reduce(&e->collect_group1); + +#ifdef SWIFT_DEBUG_CHECKS { + /* Check the above using the original MPI calls. */ integertime_t in_i[1], out_i[1]; in_i[0] = 0; out_i[0] = ti_end_min; if (MPI_Allreduce(out_i, in_i, 1, MPI_LONG_LONG_INT, MPI_MIN, MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate t_end_min."); - ti_end_min = in_i[0]; - } - { + error("Failed to aggregate ti_end_min."); + if (in_i[0] != (long long)e->collect_group1.ti_end_min) + error("Failed to get same ti_end_min, is %lld, should be %lld", in_i[0], + e->collect_group1.ti_end_min); + long long in_ll[3], out_ll[3]; out_ll[0] = updates; out_ll[1] = g_updates; out_ll[2] = s_updates; if (MPI_Allreduce(out_ll, in_ll, 3, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate energies."); - updates = in_ll[0]; - g_updates = in_ll[1]; - s_updates = in_ll[2]; + error("Failed to aggregate particle counts."); + if (in_ll[0] != (long long)e->collect_group1.updates) + error("Failed to get same updates, is %lld, should be %ld", in_ll[0], + e->collect_group1.updates); + if (in_ll[1] != (long long)e->collect_group1.g_updates) + error("Failed to get same g_updates, is %lld, should be %ld", in_ll[1], + e->collect_group1.g_updates); + if (in_ll[2] != (long long)e->collect_group1.s_updates) + error("Failed to get same s_updates, is %lld, should be %ld", in_ll[2], + e->collect_group1.s_updates); + + int buff = 0; + if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, + MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate the rebuild flag across nodes."); + if (!!buff != !!e->collect_group1.forcerebuild) + error( + "Failed to get same rebuild flag from all nodes, is %d," + "should be %d", + buff, e->collect_group1.forcerebuild); } +#endif #endif - e->ti_end_min = ti_end_min; - e->ti_end_max = ti_end_max; - e->ti_beg_max = ti_beg_max; - e->updates = updates; - e->g_updates = g_updates; - e->s_updates = s_updates; + /* Apply to the engine, if requested. */ + if (apply) collectgroup1_apply(&e->collect_group1, e); if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -2934,7 +2974,8 @@ void engine_print_stats(struct engine *e) { /* Check that all cells have been drifted to the current time. * That can include cells that have not * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_current); + space_check_drift_point(e->s, e->ti_current, + e->policy & engine_policy_self_gravity); /* Be verbose about this */ message("Saving statistics at t=%e.", e->time); @@ -3165,7 +3206,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { #endif /* Recover the (integer) end of the next time-step */ - engine_collect_timestep(e); + engine_collect_timestep_and_rebuild(e, 1); clocks_gettime(&time2); @@ -3243,7 +3284,10 @@ void engine_step(struct engine *e) { for (int i = 0; i < e->s->nr_cells; ++i) num_gpart_mpole += e->s->cells_top[i].multipole->m_pole.num_gpart; if (num_gpart_mpole != e->s->nr_gparts) - error("Multipoles don't contain the total number of gpart"); + error( + "Multipoles don't contain the total number of gpart mpoles=%zd " + "ngparts=%zd", + num_gpart_mpole, e->s->nr_gparts); } #endif @@ -3252,6 +3296,9 @@ void engine_step(struct engine *e) { gravity_exact_force_compute(e->s, e); #endif + /* Do we need to drift the top-level multipoles ? */ + if (e->policy & engine_policy_self_gravity) engine_drift_top_multipoles(e); + /* Start all the tasks. */ TIMER_TIC; engine_launch(e, e->nr_threads); @@ -3262,14 +3309,16 @@ void engine_step(struct engine *e) { gravity_exact_force_check(e->s, e, 1e-1); #endif -/* Collect the values of rebuild from all nodes. */ -#ifdef WITH_MPI - int buff = 0; - if (MPI_Allreduce(&e->forcerebuild, &buff, 1, MPI_INT, MPI_MAX, - MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to aggregate the rebuild flag across nodes."); - e->forcerebuild = buff; -#endif + if (!(e->policy & engine_policy_hydro) && + (e->policy & engine_policy_self_gravity) && e->step % 20 == 0) + e->forcerebuild = 1; + + /* Collect the values of rebuild from all nodes and recover the (integer) + * end of the next time-step. Do these together to reduce the collective MPI + * calls per step, but some of the gathered information is not applied just + * yet (in case we save a snapshot or drift). */ + engine_collect_timestep_and_rebuild(e, 0); + e->forcerebuild = e->collect_group1.forcerebuild; /* Save some statistics ? */ if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) { @@ -3305,8 +3354,8 @@ void engine_step(struct engine *e) { e->timeLastStatistics += e->deltaTimeStatistics; } - /* Recover the (integer) end of the next time-step */ - engine_collect_timestep(e); + /* Now apply all the collected time step updates and particle counts. */ + collectgroup1_apply(&e->collect_group1, e); TIMER_TOC2(timer_step); @@ -3343,8 +3392,8 @@ void engine_unskip(struct engine *e) { } /** - * @brief Mapper function to drift ALL particle types and multipoles forward in - * time. + * @brief Mapper function to drift *all* particle types and multipoles forward + * in time. * * @param map_data An array of #cell%s. * @param num_elements Chunk size. @@ -3362,7 +3411,7 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements, /* Drift all the particles */ cell_drift_particles(c, e); - /* Drift the multipole */ + /* Drift the multipoles */ if (e->policy & engine_policy_self_gravity) cell_drift_all_multipoles(c, e); } @@ -3370,7 +3419,8 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements, } /** - * @brief Drift *all* particles forward to the current time. + * @brief Drift *all* particles and multipoles at all levels + * forward to the current time. * * @param e The #engine. */ @@ -3387,7 +3437,54 @@ void engine_drift_all(struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS /* Check that all cells have been drifted to the current time. */ - space_check_drift_point(e->s, e->ti_current); + space_check_drift_point(e->s, e->ti_current, + e->policy & engine_policy_self_gravity); +#endif + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + +/** + * @brief Mapper function to drift *all* top-level multipoles forward in + * time. + * + * @param map_data An array of #cell%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to an #engine. + */ +void engine_do_drift_top_multipoles_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct cell *cells = (struct cell *)map_data; + + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &cells[ind]; + if (c != NULL && c->nodeID == e->nodeID) { + + /* Drift the multipole at this level only */ + cell_drift_multipole(c, e); + } + } +} + +/** + * @brief Drift *all* top-level multipoles forward to the current time. + * + * @param e The #engine. + */ +void engine_drift_top_multipoles(struct engine *e) { + + const ticks tic = getticks(); + + threadpool_map(&e->threadpool, engine_do_drift_top_multipoles_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 10, e); + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that all cells have been drifted to the current time. */ + space_check_top_multipoles_drift_point(e->s, e->ti_current); #endif if (e->verbose) @@ -3603,7 +3700,8 @@ void engine_dump_snapshot(struct engine *e) { /* Check that all cells have been drifted to the current time. * That can include cells that have not * previously been active on this rank. */ - space_check_drift_point(e->s, e->ti_current); + space_check_drift_point(e->s, e->ti_current, + e->policy & engine_policy_self_gravity); /* Be verbose about this */ message("writing snapshot at t=%e.", e->time); @@ -4048,6 +4146,9 @@ void engine_init(struct engine *e, struct space *s, stats_create_MPI_type(); #endif + /* Initialise the collection group. */ + collectgroup_init(); + /* Initialize the threadpool. */ threadpool_init(&e->threadpool, e->nr_threads); diff --git a/src/engine.h b/src/engine.h index a0e32ad15b79c364d13d19589f8462ff8705ee29..22ee122bb082895131584942bde50509952b98ff 100644 --- a/src/engine.h +++ b/src/engine.h @@ -38,6 +38,7 @@ /* Includes. */ #include "clocks.h" +#include "collectgroup.h" #include "cooling_struct.h" #include "gravity_properties.h" #include "parser.h" @@ -243,6 +244,10 @@ struct engine { /* The (parsed) parameter file */ const struct swift_params *parameter_file; + + /* Temporary struct to hold a group of deferable properties (in MPI mode + * these are reduced together, but may not be required just yet). */ + struct collectgroup1 collect_group1; }; /* Function prototypes. */ @@ -250,6 +255,7 @@ void engine_barrier(struct engine *e, int tid); void engine_compute_next_snapshot_time(struct engine *e); void engine_unskip(struct engine *e); void engine_drift_all(struct engine *e); +void engine_drift_top_multipoles(struct engine *e); void engine_dump_snapshot(struct engine *e); void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, diff --git a/src/gravity.c b/src/gravity.c index 369c6b1b0ab0458f7c6ab5057261a7cade97a64c..405e52cc116416c27ec236340a0358443e255384 100644 --- a/src/gravity.c +++ b/src/gravity.c @@ -22,6 +22,7 @@ /* Some standard headers. */ #include <stdio.h> +#include <unistd.h> /* This object's header. */ #include "gravity.h" @@ -29,6 +30,57 @@ /* Local headers. */ #include "active.h" #include "error.h" +#include "version.h" + +/** + * @brief Checks whether the file containing the exact accelerations for + * the current choice of parameters already exists. + * + * @param e The #engine. + */ +int gravity_exact_force_file_exits(const struct engine *e) { + +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + + /* File name */ + char file_name[100]; + sprintf(file_name, "gravity_checks_exact_step%d.dat", e->step); + + /* Does the file exist ? */ + if (access(file_name, R_OK | W_OK) == 0) { + + /* Let's check whether the header matches the parameters of this run */ + FILE *file = fopen(file_name, "r"); + if (!file) error("Problem reading gravity_check file"); + + char line[100]; + char dummy1[10], dummy2[10]; + double epsilon, newton_G; + int N; + /* Reads file header */ + if (fgets(line, 100, file) != line) error("Problem reading title"); + if (fgets(line, 100, file) != line) error("Problem reading G"); + sscanf(line, "%s %s %le", dummy1, dummy2, &newton_G); + if (fgets(line, 100, file) != line) error("Problem reading N"); + sscanf(line, "%s %s %d", dummy1, dummy2, &N); + if (fgets(line, 100, file) != line) error("Problem reading epsilon"); + sscanf(line, "%s %s %le", dummy1, dummy2, &epsilon); + fclose(file); + + /* Check whether it matches the current parameters */ + if (N == SWIFT_GRAVITY_FORCE_CHECKS && + (fabs(epsilon - e->gravity_properties->epsilon) / epsilon < 1e-5) && + (fabs(newton_G - e->physical_constants->const_newton_G) / newton_G < + 1e-5)) { + return 1; + } + } + return 0; +#else + error("Gravity checking function called without the corresponding flag."); + return 0; +#endif +} /** * @brief Run a brute-force gravity calculation for a subset of particles. @@ -47,6 +99,13 @@ void gravity_exact_force_compute(struct space *s, const struct engine *e) { const double const_G = e->physical_constants->const_newton_G; int counter = 0; + /* Let's start by checking whether we already computed these forces */ + if (gravity_exact_force_file_exits(e)) { + message("Exact accelerations already computed. Skipping calculation."); + return; + } + + /* No matching file present ? Do it then */ for (size_t i = 0; i < s->nr_gparts; ++i) { struct gpart *gpi = &s->gparts[i]; @@ -72,8 +131,8 @@ void gravity_exact_force_compute(struct space *s, const struct engine *e) { gpi->x[2] - gpj->x[2]}; // z const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - const double r = sqrtf(r2); - const double ir = 1.f / r; + const double r = sqrt(r2); + const double ir = 1. / r; const double mj = gpj->mass; const double hi = gpi->epsilon; double f; @@ -86,15 +145,17 @@ void gravity_exact_force_compute(struct space *s, const struct engine *e) { } else { - const double hi_inv = 1.f / hi; + const double hi_inv = 1. / hi; const double hi_inv3 = hi_inv * hi_inv * hi_inv; const double ui = r * hi_inv; - float W; + double W; - kernel_grav_eval(ui, &W); + kernel_grav_eval_double(ui, &W); /* Get softened gravity */ f = mj * hi_inv3 * W * f_lr; + + // printf("r=%e hi=%e W=%e fac=%e\n", r, hi, W, f); } const double fdx[3] = {f * dx[0], f * dx[1], f * dx[2]}; @@ -138,25 +199,24 @@ void gravity_exact_force_check(struct space *s, const struct engine *e, #ifdef SWIFT_GRAVITY_FORCE_CHECKS - int counter = 0; - - /* Some accumulators */ - float err_rel_max = 0.f; - float err_rel_min = FLT_MAX; - float err_rel_mean = 0.f; - float err_rel_mean2 = 0.f; - float err_rel_std = 0.f; - - char file_name[100]; - sprintf(file_name, "gravity_checks_step%d_order%d.dat", e->step, + /* File name */ + char file_name_swift[100]; + sprintf(file_name_swift, "gravity_checks_swift_step%d_order%d.dat", e->step, SELF_GRAVITY_MULTIPOLE_ORDER); - FILE *file = fopen(file_name, "w"); - fprintf(file, "# Gravity accuracy test G = %16.8e\n", - e->physical_constants->const_newton_G); - fprintf(file, "# %16s %16s %16s %16s %16s %16s %16s %16s %16s %16s\n", "id", - "pos[0]", "pos[1]", "pos[2]", "a_exact[0]", "a_exact[1]", - "a_exact[2]", "a_grav[0]", "a_grav[1]", "a_grav[2]"); + /* Creare files and write header */ + FILE *file_swift = fopen(file_name_swift, "w"); + fprintf(file_swift, "# Gravity accuracy test - SWIFT FORCES\n"); + fprintf(file_swift, "# G= %16.8e\n", e->physical_constants->const_newton_G); + fprintf(file_swift, "# N= %d\n", SWIFT_GRAVITY_FORCE_CHECKS); + fprintf(file_swift, "# epsilon=%16.8e\n", e->gravity_properties->epsilon); + fprintf(file_swift, "# theta=%16.8e\n", e->gravity_properties->theta_crit); + fprintf(file_swift, "# Git Branch: %s\n", git_branch()); + fprintf(file_swift, "# Git Revision: %s\n", git_revision()); + fprintf(file_swift, "# %16s %16s %16s %16s %16s %16s %16s\n", "id", "pos[0]", + "pos[1]", "pos[2]", "a_swift[0]", "a_swift[1]", "a_swift[2]"); + + /* Output particle SWIFT accelerations */ for (size_t i = 0; i < s->nr_gparts; ++i) { struct gpart *gpi = &s->gparts[i]; @@ -165,63 +225,53 @@ void gravity_exact_force_check(struct space *s, const struct engine *e, if (gpi->id_or_neg_offset % SWIFT_GRAVITY_FORCE_CHECKS == 0 && gpart_is_starting(gpi, e)) { - fprintf(file, - "%18lld %16.8e %16.8e %16.8e %16.8e %16.8e %16.8e %16.8e %16.8e " - "%16.8e \n", + fprintf(file_swift, "%18lld %16.8e %16.8e %16.8e %16.8e %16.8e %16.8e \n", gpi->id_or_neg_offset, gpi->x[0], gpi->x[1], gpi->x[2], - gpi->a_grav_exact[0], gpi->a_grav_exact[1], gpi->a_grav_exact[2], gpi->a_grav[0], gpi->a_grav[1], gpi->a_grav[2]); - - const float diff[3] = {gpi->a_grav[0] - gpi->a_grav_exact[0], - gpi->a_grav[1] - gpi->a_grav_exact[1], - gpi->a_grav[2] - gpi->a_grav_exact[2]}; - - const float diff_norm = - sqrtf(diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2]); - const float a_norm = sqrtf(gpi->a_grav_exact[0] * gpi->a_grav_exact[0] + - gpi->a_grav_exact[1] * gpi->a_grav_exact[1] + - gpi->a_grav_exact[2] * gpi->a_grav_exact[2]); - - /* Compute relative error */ - const float err_rel = diff_norm / a_norm; - - /* Check that we are not above tolerance */ - if (err_rel > rel_tol) { - message( - "Error too large ! gp->a_grav=[%3.6e %3.6e %3.6e] " - "gp->a_exact=[%3.6e %3.6e %3.6e], " - "gp->num_interacted=%lld, err=%f", - gpi->a_grav[0], gpi->a_grav[1], gpi->a_grav[2], - gpi->a_grav_exact[0], gpi->a_grav_exact[1], gpi->a_grav_exact[2], - gpi->num_interacted, err_rel); - - continue; - } - - /* Construct some statistics */ - err_rel_max = max(err_rel_max, fabsf(err_rel)); - err_rel_min = min(err_rel_min, fabsf(err_rel)); - err_rel_mean += err_rel; - err_rel_mean2 += err_rel * err_rel; - counter++; } } + message("Written SWIFT accelerations in file '%s'.", file_name_swift); + /* Be nice */ - fclose(file); + fclose(file_swift); - /* Final operation on the stats */ - if (counter > 0) { - err_rel_mean /= counter; - err_rel_mean2 /= counter; - err_rel_std = sqrtf(err_rel_mean2 - err_rel_mean * err_rel_mean); - } + if (!gravity_exact_force_file_exits(e)) { - /* Report on the findings */ - message("Checked gravity for %d gparts.", counter); - message("Error on |a_grav|: min=%e max=%e mean=%e std=%e", err_rel_min, - err_rel_max, err_rel_mean, err_rel_std); + char file_name_exact[100]; + sprintf(file_name_exact, "gravity_checks_exact_step%d.dat", e->step); + FILE *file_exact = fopen(file_name_exact, "w"); + fprintf(file_exact, "# Gravity accuracy test - EXACT FORCES\n"); + fprintf(file_exact, "# G= %16.8e\n", e->physical_constants->const_newton_G); + fprintf(file_exact, "# N= %d\n", SWIFT_GRAVITY_FORCE_CHECKS); + fprintf(file_exact, "# epsilon=%16.8e\n", e->gravity_properties->epsilon); + fprintf(file_exact, "# theta=%16.8e\n", e->gravity_properties->theta_crit); + fprintf(file_exact, "# %16s %16s %16s %16s %16s %16s %16s\n", "id", + "pos[0]", "pos[1]", "pos[2]", "a_exact[0]", "a_exact[1]", + "a_exact[2]"); + + /* Output particle exact accelerations */ + for (size_t i = 0; i < s->nr_gparts; ++i) { + + struct gpart *gpi = &s->gparts[i]; + + /* Is the particle was active and part of the subset to be tested ? */ + if (gpi->id_or_neg_offset % SWIFT_GRAVITY_FORCE_CHECKS == 0 && + gpart_is_starting(gpi, e)) { + + fprintf( + file_exact, "%18lld %16.8e %16.8e %16.8e %16.8e %16.8e %16.8e \n", + gpi->id_or_neg_offset, gpi->x[0], gpi->x[1], gpi->x[2], + gpi->a_grav_exact[0], gpi->a_grav_exact[1], gpi->a_grav_exact[2]); + } + } + + message("Written exact accelerations in file '%s'.", file_name_exact); + + /* Be nice */ + fclose(file_exact); + } #else error("Gravity checking function called without the corresponding flag."); #endif diff --git a/src/gravity_properties.c b/src/gravity_properties.c index 6c1ec1f6d9e80d46278892cc7a2081b2cadad923..f52029fa1543ad1f8d0121c8c4e6d362227f4c53 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -47,8 +47,14 @@ void gravity_props_init(struct gravity_props *p, /* Time integration */ p->eta = parser_get_param_float(params, "Gravity:eta"); + /* Opening angle */ + p->theta_crit = parser_get_param_double(params, "Gravity:theta"); + p->theta_crit_inv = 1. / p->theta_crit; + /* Softening lengths */ - p->epsilon = parser_get_param_float(params, "Gravity:epsilon"); + p->epsilon = parser_get_param_double(params, "Gravity:epsilon"); + p->epsilon2 = p->epsilon * p->epsilon; + p->epsilon_inv = 1. / p->epsilon; } void gravity_props_print(const struct gravity_props *p) { @@ -57,7 +63,9 @@ void gravity_props_print(const struct gravity_props *p) { message("Self-gravity time integration: eta=%.4f", p->eta); - message("Self-gravity softening: epsilon=%.4f", p->epsilon); + message("Self-gravity opening angle: theta=%.4f", p->theta_crit); + + message("Self-gravity softening: epsilon=%.4f", p->epsilon); if (p->a_smooth != gravity_props_default_a_smooth) message("Self-gravity smoothing-scale: a_smooth=%f", p->a_smooth); @@ -71,7 +79,8 @@ void gravity_props_print_snapshot(hid_t h_grpgrav, const struct gravity_props *p) { io_write_attribute_f(h_grpgrav, "Time integration eta", p->eta); - io_write_attribute_f(h_grpgrav, "Softening", p->epsilon); + io_write_attribute_f(h_grpgrav, "Softening length", p->epsilon); + io_write_attribute_f(h_grpgrav, "Opening angle", p->theta_crit); io_write_attribute_f(h_grpgrav, "MM a_smooth", p->a_smooth); io_write_attribute_f(h_grpgrav, "MM r_cut", p->r_cut); } diff --git a/src/gravity_properties.h b/src/gravity_properties.h index 6fde91b1d8f2d32a27ec278d5e42f91e5fe930a6..be26f0d1d23b8cec71fa3cbbeedac9f61f337b2c 100644 --- a/src/gravity_properties.h +++ b/src/gravity_properties.h @@ -38,11 +38,23 @@ struct gravity_props { float a_smooth; float r_cut; - /* Time integration parameters */ + /*! Time integration dimensionless multiplier */ float eta; - /* Softening lengths */ - float epsilon; + /*! Tree opening angle (Multipole acceptance criterion) */ + double theta_crit; + + /*! Inverse of opening angle */ + double theta_crit_inv; + + /*! Softening length */ + double epsilon; + + /*! Square of softening length */ + double epsilon2; + + /*! Inverse of softening length */ + double epsilon_inv; }; void gravity_props_print(const struct gravity_props *p); diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h index a9425c7dda8f864a1d6b9c2a658b308c7e7e0624..4a7588d312c381ef60fb97c43f8fadb1e03dfead 100644 --- a/src/kernel_gravity.h +++ b/src/kernel_gravity.h @@ -87,4 +87,49 @@ __attribute__((always_inline)) INLINE static void kernel_grav_eval( *W = w / (u * u * u); } -#endif // SWIFT_KERNEL_GRAVITY_H +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + +__attribute__((always_inline)) INLINE static void kernel_grav_eval_double( + double u, double *const W) { + + static const double kernel_grav_coeffs_double[(kernel_grav_degree + 1) * + (kernel_grav_ivals + 1)] + __attribute__((aligned(16))) = {32., + -38.4, + 0., + 10.66666667, + 0., + 0., + 0., /* 0 < u < 0.5 */ + -10.66666667, + 38.4, + -48., + 21.3333333, + 0., + 0., + -0.066666667, /* 0.5 < u < 1 */ + 0., + 0., + 0., + 0., + 0., + 0., + 0.}; /* 1 < u */ + + /* Pick the correct branch of the kernel */ + const int ind = (int)min(u * (double)kernel_grav_ivals, kernel_grav_ivals); + const double *const coeffs = + &kernel_grav_coeffs_double[ind * (kernel_grav_degree + 1)]; + + /* First two terms of the polynomial ... */ + double w = coeffs[0] * u + coeffs[1]; + + /* ... and the rest of them */ + for (int k = 2; k <= kernel_grav_degree; k++) w = u * w + coeffs[k]; + + /* Return everything */ + *W = w / (u * u * u); +} +#endif /* SWIFT_GRAVITY_FORCE_CHECKS */ + +#endif /* SWIFT_KERNEL_GRAVITY_H */ diff --git a/src/multipole.h b/src/multipole.h index 0daef6de24039afe63f529ee53f0afda761bc6c3..a633554490fe6226f8fe42befb7cb4eaf1e5135f 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -33,6 +33,7 @@ #include "const.h" #include "error.h" #include "gravity_derivatives.h" +#include "gravity_properties.h" #include "inline.h" #include "kernel_gravity.h" #include "minmax.h" @@ -164,20 +165,26 @@ struct multipole { */ struct gravity_tensors { - /*! Linking pointer for "memory management". */ - struct gravity_tensors *next; + union { - /*! Centre of mass of the matter dsitribution */ - double CoM[3]; + /*! Linking pointer for "memory management". */ + struct gravity_tensors *next; - /*! The actual content */ - struct { + /*! The actual content */ + struct { - /*! Multipole mass */ - struct multipole m_pole; + /*! Centre of mass of the matter dsitribution */ + double CoM[3]; - /*! Field tensor for the potential */ - struct grav_tensor pot; + /*! Upper limit of the CoM<->gpart distance */ + double r_max; + + /*! Multipole mass */ + struct multipole m_pole; + + /*! Field tensor for the potential */ + struct grav_tensor pot; + }; }; } SWIFT_STRUCT_ALIGN; @@ -200,12 +207,25 @@ INLINE static void gravity_reset(struct gravity_tensors *m) { */ INLINE static void gravity_drift(struct gravity_tensors *m, double dt) { + const double dx = m->m_pole.vel[0] * dt; + const double dy = m->m_pole.vel[1] * dt; + const double dz = m->m_pole.vel[2] * dt; + /* Move the whole thing according to bulk motion */ - m->CoM[0] += m->m_pole.vel[0] * dt; - m->CoM[1] += m->m_pole.vel[1] * dt; - m->CoM[2] += m->m_pole.vel[2] * dt; + m->CoM[0] += dx; + m->CoM[1] += dy; + m->CoM[2] += dz; + + /* Conservative change in maximal radius containing all gpart */ + /* MATTHIEU: Use gpart->x_diff here ? */ + m->r_max += sqrt(dx * dx + dy * dy + dz * dz); } +/** + * @brief Zeroes all the fields of a field tensor + * + * @param l The field tensor. + */ INLINE static void gravity_field_tensors_init(struct grav_tensor *l) { bzero(l, sizeof(struct grav_tensor)); @@ -354,6 +374,16 @@ INLINE static void gravity_field_tensors_print(const struct grav_tensor *l) { #endif } +/** + * @brief Zeroes all the fields of a multipole. + * + * @param m The multipole + */ +INLINE static void gravity_multipole_init(struct multipole *m) { + + bzero(m, sizeof(struct multipole)); +} + /** * @brief Prints the content of a #multipole to stdout. * @@ -948,9 +978,9 @@ INLINE static void gravity_P2M(struct gravity_tensors *m, double com[3] = {0.0, 0.0, 0.0}; double vel[3] = {0.f, 0.f, 0.f}; - /* Collect the particle data. */ + /* Collect the particle data for CoM. */ for (int k = 0; k < gcount; k++) { - const float m = gparts[k].mass; + const double m = gparts[k].mass; mass += m; com[0] += gparts[k].x[0] * m; @@ -970,7 +1000,8 @@ INLINE static void gravity_P2M(struct gravity_tensors *m, vel[1] *= imass; vel[2] *= imass; -/* Prepare some local counters */ + /* Prepare some local counters */ + double r_max2 = 0.; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 double M_100 = 0., M_010 = 0., M_001 = 0.; #endif @@ -1007,11 +1038,15 @@ INLINE static void gravity_P2M(struct gravity_tensors *m, /* Construce the higher order terms */ for (int k = 0; k < gcount; k++) { -#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - const double m = gparts[k].mass; const double dx[3] = {gparts[k].x[0] - com[0], gparts[k].x[1] - com[1], gparts[k].x[2] - com[2]}; + /* Maximal distance CoM<->gpart */ + r_max2 = max(r_max2, dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]); + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + const double m = gparts[k].mass; + /* 1st order terms */ M_100 += -m * X_100(dx); M_010 += -m * X_010(dx); @@ -1096,6 +1131,7 @@ INLINE static void gravity_P2M(struct gravity_tensors *m, /* Store the data on the multipole. */ m->m_pole.M_000 = mass; + m->r_max = sqrt(r_max2); m->CoM[0] = com[0]; m->CoM[1] = com[1]; m->CoM[2] = com[2]; @@ -1456,549 +1492,557 @@ INLINE static void gravity_M2M(struct multipole *m_a, * @param m_a The multipole creating the field. * @param pos_b The position of the field tensor. * @param pos_a The position of the multipole. + * @param props The #gravity_props of this calculation. * @param periodic Is the calculation periodic ? */ INLINE static void gravity_M2L(struct grav_tensor *l_b, const struct multipole *m_a, const double pos_b[3], const double pos_a[3], + const struct gravity_props *props, int periodic) { - double dx, dy, dz; - if (periodic) { - dx = box_wrap(pos_b[0] - pos_a[0], 0., 1.); - dy = box_wrap(pos_b[1] - pos_a[1], 0., 1.); - dz = box_wrap(pos_b[2] - pos_a[2], 0., 1.); - } else { - dx = pos_b[0] - pos_a[0]; - dy = pos_b[1] - pos_a[1]; - dz = pos_b[2] - pos_a[2]; - } - const double r2 = dx * dx + dy * dy + dz * dz; + /* Compute distance vector */ + const double dx = + periodic ? box_wrap(pos_b[0] - pos_a[0], 0., 1.) : pos_b[0] - pos_a[0]; + const double dy = + periodic ? box_wrap(pos_b[1] - pos_a[1], 0., 1.) : pos_b[1] - pos_a[1]; + const double dz = + periodic ? box_wrap(pos_b[2] - pos_a[2], 0., 1.) : pos_b[2] - pos_a[2]; + /* Compute distance */ + const double r2 = dx * dx + dy * dy + dz * dz; const double r_inv = 1. / sqrt(r2); - /* 0th order term */ - l_b->F_000 += m_a->M_000 * D_000(dx, dy, dz, r_inv); +#ifdef SWIFT_DEBUG_CHECKS + /* Count interactions */ + l_b->num_interacted += m_a->num_gpart; +#endif + + /* Un-softened case */ + if (r2 > props->epsilon2) { + + /* 0th order term */ + l_b->F_000 += m_a->M_000 * D_000(dx, dy, dz, r_inv); #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - /* 1st order multipole term (addition to rank 0)*/ - l_b->F_000 += m_a->M_100 * D_100(dx, dy, dz, r_inv) + - m_a->M_010 * D_010(dx, dy, dz, r_inv) + - m_a->M_001 * D_001(dx, dy, dz, r_inv); + /* 1st order multipole term (addition to rank 0)*/ + l_b->F_000 += m_a->M_100 * D_100(dx, dy, dz, r_inv) + + m_a->M_010 * D_010(dx, dy, dz, r_inv) + + m_a->M_001 * D_001(dx, dy, dz, r_inv); - /* 1st order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_000 * D_100(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_000 * D_010(dx, dy, dz, r_inv); - l_b->F_001 += m_a->M_000 * D_001(dx, dy, dz, r_inv); + /* 1st order multipole term (addition to rank 1)*/ + l_b->F_100 += m_a->M_000 * D_100(dx, dy, dz, r_inv); + l_b->F_010 += m_a->M_000 * D_010(dx, dy, dz, r_inv); + l_b->F_001 += m_a->M_000 * D_001(dx, dy, dz, r_inv); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - /* 2nd order multipole term (addition to rank 0)*/ - l_b->F_000 += m_a->M_200 * D_200(dx, dy, dz, r_inv) + - m_a->M_020 * D_020(dx, dy, dz, r_inv) + - m_a->M_002 * D_002(dx, dy, dz, r_inv); - l_b->F_000 += m_a->M_110 * D_110(dx, dy, dz, r_inv) + - m_a->M_101 * D_101(dx, dy, dz, r_inv) + - m_a->M_011 * D_011(dx, dy, dz, r_inv); - - /* 2nd order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_100 * D_200(dx, dy, dz, r_inv) + - m_a->M_010 * D_110(dx, dy, dz, r_inv) + - m_a->M_001 * D_101(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_100 * D_110(dx, dy, dz, r_inv) + - m_a->M_010 * D_020(dx, dy, dz, r_inv) + - m_a->M_001 * D_011(dx, dy, dz, r_inv); - l_b->F_001 += m_a->M_100 * D_101(dx, dy, dz, r_inv) + - m_a->M_010 * D_011(dx, dy, dz, r_inv) + - m_a->M_001 * D_002(dx, dy, dz, r_inv); - - /* 2nd order multipole term (addition to rank 2)*/ - l_b->F_200 += m_a->M_000 * D_200(dx, dy, dz, r_inv); - l_b->F_020 += m_a->M_000 * D_020(dx, dy, dz, r_inv); - l_b->F_002 += m_a->M_000 * D_002(dx, dy, dz, r_inv); - l_b->F_110 += m_a->M_000 * D_110(dx, dy, dz, r_inv); - l_b->F_101 += m_a->M_000 * D_101(dx, dy, dz, r_inv); - l_b->F_011 += m_a->M_000 * D_011(dx, dy, dz, r_inv); + /* 2nd order multipole term (addition to rank 0)*/ + l_b->F_000 += m_a->M_200 * D_200(dx, dy, dz, r_inv) + + m_a->M_020 * D_020(dx, dy, dz, r_inv) + + m_a->M_002 * D_002(dx, dy, dz, r_inv); + l_b->F_000 += m_a->M_110 * D_110(dx, dy, dz, r_inv) + + m_a->M_101 * D_101(dx, dy, dz, r_inv) + + m_a->M_011 * D_011(dx, dy, dz, r_inv); + + /* 2nd order multipole term (addition to rank 1)*/ + l_b->F_100 += m_a->M_100 * D_200(dx, dy, dz, r_inv) + + m_a->M_010 * D_110(dx, dy, dz, r_inv) + + m_a->M_001 * D_101(dx, dy, dz, r_inv); + l_b->F_010 += m_a->M_100 * D_110(dx, dy, dz, r_inv) + + m_a->M_010 * D_020(dx, dy, dz, r_inv) + + m_a->M_001 * D_011(dx, dy, dz, r_inv); + l_b->F_001 += m_a->M_100 * D_101(dx, dy, dz, r_inv) + + m_a->M_010 * D_011(dx, dy, dz, r_inv) + + m_a->M_001 * D_002(dx, dy, dz, r_inv); + + /* 2nd order multipole term (addition to rank 2)*/ + l_b->F_200 += m_a->M_000 * D_200(dx, dy, dz, r_inv); + l_b->F_020 += m_a->M_000 * D_020(dx, dy, dz, r_inv); + l_b->F_002 += m_a->M_000 * D_002(dx, dy, dz, r_inv); + l_b->F_110 += m_a->M_000 * D_110(dx, dy, dz, r_inv); + l_b->F_101 += m_a->M_000 * D_101(dx, dy, dz, r_inv); + l_b->F_011 += m_a->M_000 * D_011(dx, dy, dz, r_inv); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - /* 3rd order multipole term (addition to rank 0)*/ - l_b->F_000 += m_a->M_300 * D_300(dx, dy, dz, r_inv) + - m_a->M_030 * D_030(dx, dy, dz, r_inv) + - m_a->M_003 * D_003(dx, dy, dz, r_inv); - l_b->F_000 += m_a->M_210 * D_210(dx, dy, dz, r_inv) + - m_a->M_201 * D_201(dx, dy, dz, r_inv) + - m_a->M_120 * D_120(dx, dy, dz, r_inv); - l_b->F_000 += m_a->M_021 * D_021(dx, dy, dz, r_inv) + - m_a->M_102 * D_102(dx, dy, dz, r_inv) + - m_a->M_012 * D_012(dx, dy, dz, r_inv); - l_b->F_000 += m_a->M_111 * D_111(dx, dy, dz, r_inv); - - /* 3rd order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_200 * D_300(dx, dy, dz, r_inv) + - m_a->M_020 * D_120(dx, dy, dz, r_inv) + - m_a->M_002 * D_102(dx, dy, dz, r_inv); - l_b->F_100 += m_a->M_110 * D_210(dx, dy, dz, r_inv) + - m_a->M_101 * D_201(dx, dy, dz, r_inv) + - m_a->M_011 * D_111(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_200 * D_210(dx, dy, dz, r_inv) + - m_a->M_020 * D_030(dx, dy, dz, r_inv) + - m_a->M_002 * D_012(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_110 * D_120(dx, dy, dz, r_inv) + - m_a->M_101 * D_111(dx, dy, dz, r_inv) + - m_a->M_011 * D_021(dx, dy, dz, r_inv); - l_b->F_001 += m_a->M_200 * D_201(dx, dy, dz, r_inv) + - m_a->M_020 * D_021(dx, dy, dz, r_inv) + - m_a->M_002 * D_003(dx, dy, dz, r_inv); - l_b->F_001 += m_a->M_110 * D_111(dx, dy, dz, r_inv) + - m_a->M_101 * D_102(dx, dy, dz, r_inv) + - m_a->M_011 * D_012(dx, dy, dz, r_inv); - - /* 3rd order multipole term (addition to rank 2)*/ - l_b->F_200 += m_a->M_100 * D_300(dx, dy, dz, r_inv) + - m_a->M_010 * D_210(dx, dy, dz, r_inv) + - m_a->M_001 * D_201(dx, dy, dz, r_inv); - l_b->F_020 += m_a->M_100 * D_120(dx, dy, dz, r_inv) + - m_a->M_010 * D_030(dx, dy, dz, r_inv) + - m_a->M_001 * D_021(dx, dy, dz, r_inv); - l_b->F_002 += m_a->M_100 * D_102(dx, dy, dz, r_inv) + - m_a->M_010 * D_012(dx, dy, dz, r_inv) + - m_a->M_001 * D_003(dx, dy, dz, r_inv); - l_b->F_110 += m_a->M_100 * D_210(dx, dy, dz, r_inv) + - m_a->M_010 * D_120(dx, dy, dz, r_inv) + - m_a->M_001 * D_111(dx, dy, dz, r_inv); - l_b->F_101 += m_a->M_100 * D_201(dx, dy, dz, r_inv) + - m_a->M_010 * D_111(dx, dy, dz, r_inv) + - m_a->M_001 * D_102(dx, dy, dz, r_inv); - l_b->F_011 += m_a->M_100 * D_111(dx, dy, dz, r_inv) + - m_a->M_010 * D_021(dx, dy, dz, r_inv) + - m_a->M_001 * D_012(dx, dy, dz, r_inv); - - /* 3rd order multipole term (addition to rank 2)*/ - l_b->F_300 += m_a->M_000 * D_300(dx, dy, dz, r_inv); - l_b->F_030 += m_a->M_000 * D_030(dx, dy, dz, r_inv); - l_b->F_003 += m_a->M_000 * D_003(dx, dy, dz, r_inv); - l_b->F_210 += m_a->M_000 * D_210(dx, dy, dz, r_inv); - l_b->F_201 += m_a->M_000 * D_201(dx, dy, dz, r_inv); - l_b->F_120 += m_a->M_000 * D_120(dx, dy, dz, r_inv); - l_b->F_021 += m_a->M_000 * D_021(dx, dy, dz, r_inv); - l_b->F_102 += m_a->M_000 * D_102(dx, dy, dz, r_inv); - l_b->F_012 += m_a->M_000 * D_012(dx, dy, dz, r_inv); - l_b->F_111 += m_a->M_000 * D_111(dx, dy, dz, r_inv); + /* 3rd order multipole term (addition to rank 0)*/ + l_b->F_000 += m_a->M_300 * D_300(dx, dy, dz, r_inv) + + m_a->M_030 * D_030(dx, dy, dz, r_inv) + + m_a->M_003 * D_003(dx, dy, dz, r_inv); + l_b->F_000 += m_a->M_210 * D_210(dx, dy, dz, r_inv) + + m_a->M_201 * D_201(dx, dy, dz, r_inv) + + m_a->M_120 * D_120(dx, dy, dz, r_inv); + l_b->F_000 += m_a->M_021 * D_021(dx, dy, dz, r_inv) + + m_a->M_102 * D_102(dx, dy, dz, r_inv) + + m_a->M_012 * D_012(dx, dy, dz, r_inv); + l_b->F_000 += m_a->M_111 * D_111(dx, dy, dz, r_inv); + + /* 3rd order multipole term (addition to rank 1)*/ + l_b->F_100 += m_a->M_200 * D_300(dx, dy, dz, r_inv) + + m_a->M_020 * D_120(dx, dy, dz, r_inv) + + m_a->M_002 * D_102(dx, dy, dz, r_inv); + l_b->F_100 += m_a->M_110 * D_210(dx, dy, dz, r_inv) + + m_a->M_101 * D_201(dx, dy, dz, r_inv) + + m_a->M_011 * D_111(dx, dy, dz, r_inv); + l_b->F_010 += m_a->M_200 * D_210(dx, dy, dz, r_inv) + + m_a->M_020 * D_030(dx, dy, dz, r_inv) + + m_a->M_002 * D_012(dx, dy, dz, r_inv); + l_b->F_010 += m_a->M_110 * D_120(dx, dy, dz, r_inv) + + m_a->M_101 * D_111(dx, dy, dz, r_inv) + + m_a->M_011 * D_021(dx, dy, dz, r_inv); + l_b->F_001 += m_a->M_200 * D_201(dx, dy, dz, r_inv) + + m_a->M_020 * D_021(dx, dy, dz, r_inv) + + m_a->M_002 * D_003(dx, dy, dz, r_inv); + l_b->F_001 += m_a->M_110 * D_111(dx, dy, dz, r_inv) + + m_a->M_101 * D_102(dx, dy, dz, r_inv) + + m_a->M_011 * D_012(dx, dy, dz, r_inv); + + /* 3rd order multipole term (addition to rank 2)*/ + l_b->F_200 += m_a->M_100 * D_300(dx, dy, dz, r_inv) + + m_a->M_010 * D_210(dx, dy, dz, r_inv) + + m_a->M_001 * D_201(dx, dy, dz, r_inv); + l_b->F_020 += m_a->M_100 * D_120(dx, dy, dz, r_inv) + + m_a->M_010 * D_030(dx, dy, dz, r_inv) + + m_a->M_001 * D_021(dx, dy, dz, r_inv); + l_b->F_002 += m_a->M_100 * D_102(dx, dy, dz, r_inv) + + m_a->M_010 * D_012(dx, dy, dz, r_inv) + + m_a->M_001 * D_003(dx, dy, dz, r_inv); + l_b->F_110 += m_a->M_100 * D_210(dx, dy, dz, r_inv) + + m_a->M_010 * D_120(dx, dy, dz, r_inv) + + m_a->M_001 * D_111(dx, dy, dz, r_inv); + l_b->F_101 += m_a->M_100 * D_201(dx, dy, dz, r_inv) + + m_a->M_010 * D_111(dx, dy, dz, r_inv) + + m_a->M_001 * D_102(dx, dy, dz, r_inv); + l_b->F_011 += m_a->M_100 * D_111(dx, dy, dz, r_inv) + + m_a->M_010 * D_021(dx, dy, dz, r_inv) + + m_a->M_001 * D_012(dx, dy, dz, r_inv); + + /* 3rd order multipole term (addition to rank 2)*/ + l_b->F_300 += m_a->M_000 * D_300(dx, dy, dz, r_inv); + l_b->F_030 += m_a->M_000 * D_030(dx, dy, dz, r_inv); + l_b->F_003 += m_a->M_000 * D_003(dx, dy, dz, r_inv); + l_b->F_210 += m_a->M_000 * D_210(dx, dy, dz, r_inv); + l_b->F_201 += m_a->M_000 * D_201(dx, dy, dz, r_inv); + l_b->F_120 += m_a->M_000 * D_120(dx, dy, dz, r_inv); + l_b->F_021 += m_a->M_000 * D_021(dx, dy, dz, r_inv); + l_b->F_102 += m_a->M_000 * D_102(dx, dy, dz, r_inv); + l_b->F_012 += m_a->M_000 * D_012(dx, dy, dz, r_inv); + l_b->F_111 += m_a->M_000 * D_111(dx, dy, dz, r_inv); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - /* Compute 4th order field tensor terms (addition to rank 0) */ - l_b->F_000 += m_a->M_004 * D_004(dx, dy, dz, r_inv) + - m_a->M_013 * D_013(dx, dy, dz, r_inv) + - m_a->M_022 * D_022(dx, dy, dz, r_inv) + - m_a->M_031 * D_031(dx, dy, dz, r_inv) + - m_a->M_040 * D_040(dx, dy, dz, r_inv) + - m_a->M_103 * D_103(dx, dy, dz, r_inv) + - m_a->M_112 * D_112(dx, dy, dz, r_inv) + - m_a->M_121 * D_121(dx, dy, dz, r_inv) + - m_a->M_130 * D_130(dx, dy, dz, r_inv) + - m_a->M_202 * D_202(dx, dy, dz, r_inv) + - m_a->M_211 * D_211(dx, dy, dz, r_inv) + - m_a->M_220 * D_220(dx, dy, dz, r_inv) + - m_a->M_301 * D_301(dx, dy, dz, r_inv) + - m_a->M_310 * D_310(dx, dy, dz, r_inv) + - m_a->M_400 * D_400(dx, dy, dz, r_inv); - - /* Compute 4th order field tensor terms (addition to rank 1) */ - l_b->F_001 += m_a->M_003 * D_004(dx, dy, dz, r_inv) + - m_a->M_012 * D_013(dx, dy, dz, r_inv) + - m_a->M_021 * D_022(dx, dy, dz, r_inv) + - m_a->M_030 * D_031(dx, dy, dz, r_inv) + - m_a->M_102 * D_103(dx, dy, dz, r_inv) + - m_a->M_111 * D_112(dx, dy, dz, r_inv) + - m_a->M_120 * D_121(dx, dy, dz, r_inv) + - m_a->M_201 * D_202(dx, dy, dz, r_inv) + - m_a->M_210 * D_211(dx, dy, dz, r_inv) + - m_a->M_300 * D_301(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_003 * D_013(dx, dy, dz, r_inv) + - m_a->M_012 * D_022(dx, dy, dz, r_inv) + - m_a->M_021 * D_031(dx, dy, dz, r_inv) + - m_a->M_030 * D_040(dx, dy, dz, r_inv) + - m_a->M_102 * D_112(dx, dy, dz, r_inv) + - m_a->M_111 * D_121(dx, dy, dz, r_inv) + - m_a->M_120 * D_130(dx, dy, dz, r_inv) + - m_a->M_201 * D_211(dx, dy, dz, r_inv) + - m_a->M_210 * D_220(dx, dy, dz, r_inv) + - m_a->M_300 * D_310(dx, dy, dz, r_inv); - l_b->F_100 += m_a->M_003 * D_103(dx, dy, dz, r_inv) + - m_a->M_012 * D_112(dx, dy, dz, r_inv) + - m_a->M_021 * D_121(dx, dy, dz, r_inv) + - m_a->M_030 * D_130(dx, dy, dz, r_inv) + - m_a->M_102 * D_202(dx, dy, dz, r_inv) + - m_a->M_111 * D_211(dx, dy, dz, r_inv) + - m_a->M_120 * D_220(dx, dy, dz, r_inv) + - m_a->M_201 * D_301(dx, dy, dz, r_inv) + - m_a->M_210 * D_310(dx, dy, dz, r_inv) + - m_a->M_300 * D_400(dx, dy, dz, r_inv); - - /* Compute 4th order field tensor terms (addition to rank 2) */ - l_b->F_002 += m_a->M_002 * D_004(dx, dy, dz, r_inv) + - m_a->M_011 * D_013(dx, dy, dz, r_inv) + - m_a->M_020 * D_022(dx, dy, dz, r_inv) + - m_a->M_101 * D_103(dx, dy, dz, r_inv) + - m_a->M_110 * D_112(dx, dy, dz, r_inv) + - m_a->M_200 * D_202(dx, dy, dz, r_inv); - l_b->F_011 += m_a->M_002 * D_013(dx, dy, dz, r_inv) + - m_a->M_011 * D_022(dx, dy, dz, r_inv) + - m_a->M_020 * D_031(dx, dy, dz, r_inv) + - m_a->M_101 * D_112(dx, dy, dz, r_inv) + - m_a->M_110 * D_121(dx, dy, dz, r_inv) + - m_a->M_200 * D_211(dx, dy, dz, r_inv); - l_b->F_020 += m_a->M_002 * D_022(dx, dy, dz, r_inv) + - m_a->M_011 * D_031(dx, dy, dz, r_inv) + - m_a->M_020 * D_040(dx, dy, dz, r_inv) + - m_a->M_101 * D_121(dx, dy, dz, r_inv) + - m_a->M_110 * D_130(dx, dy, dz, r_inv) + - m_a->M_200 * D_220(dx, dy, dz, r_inv); - l_b->F_101 += m_a->M_002 * D_103(dx, dy, dz, r_inv) + - m_a->M_011 * D_112(dx, dy, dz, r_inv) + - m_a->M_020 * D_121(dx, dy, dz, r_inv) + - m_a->M_101 * D_202(dx, dy, dz, r_inv) + - m_a->M_110 * D_211(dx, dy, dz, r_inv) + - m_a->M_200 * D_301(dx, dy, dz, r_inv); - l_b->F_110 += m_a->M_002 * D_112(dx, dy, dz, r_inv) + - m_a->M_011 * D_121(dx, dy, dz, r_inv) + - m_a->M_020 * D_130(dx, dy, dz, r_inv) + - m_a->M_101 * D_211(dx, dy, dz, r_inv) + - m_a->M_110 * D_220(dx, dy, dz, r_inv) + - m_a->M_200 * D_310(dx, dy, dz, r_inv); - l_b->F_200 += m_a->M_002 * D_202(dx, dy, dz, r_inv) + - m_a->M_011 * D_211(dx, dy, dz, r_inv) + - m_a->M_020 * D_220(dx, dy, dz, r_inv) + - m_a->M_101 * D_301(dx, dy, dz, r_inv) + - m_a->M_110 * D_310(dx, dy, dz, r_inv) + - m_a->M_200 * D_400(dx, dy, dz, r_inv); - - /* Compute 4th order field tensor terms (addition to rank 3) */ - l_b->F_003 += m_a->M_001 * D_004(dx, dy, dz, r_inv) + - m_a->M_010 * D_013(dx, dy, dz, r_inv) + - m_a->M_100 * D_103(dx, dy, dz, r_inv); - l_b->F_012 += m_a->M_001 * D_013(dx, dy, dz, r_inv) + - m_a->M_010 * D_022(dx, dy, dz, r_inv) + - m_a->M_100 * D_112(dx, dy, dz, r_inv); - l_b->F_021 += m_a->M_001 * D_022(dx, dy, dz, r_inv) + - m_a->M_010 * D_031(dx, dy, dz, r_inv) + - m_a->M_100 * D_121(dx, dy, dz, r_inv); - l_b->F_030 += m_a->M_001 * D_031(dx, dy, dz, r_inv) + - m_a->M_010 * D_040(dx, dy, dz, r_inv) + - m_a->M_100 * D_130(dx, dy, dz, r_inv); - l_b->F_102 += m_a->M_001 * D_103(dx, dy, dz, r_inv) + - m_a->M_010 * D_112(dx, dy, dz, r_inv) + - m_a->M_100 * D_202(dx, dy, dz, r_inv); - l_b->F_111 += m_a->M_001 * D_112(dx, dy, dz, r_inv) + - m_a->M_010 * D_121(dx, dy, dz, r_inv) + - m_a->M_100 * D_211(dx, dy, dz, r_inv); - l_b->F_120 += m_a->M_001 * D_121(dx, dy, dz, r_inv) + - m_a->M_010 * D_130(dx, dy, dz, r_inv) + - m_a->M_100 * D_220(dx, dy, dz, r_inv); - l_b->F_201 += m_a->M_001 * D_202(dx, dy, dz, r_inv) + - m_a->M_010 * D_211(dx, dy, dz, r_inv) + - m_a->M_100 * D_301(dx, dy, dz, r_inv); - l_b->F_210 += m_a->M_001 * D_211(dx, dy, dz, r_inv) + - m_a->M_010 * D_220(dx, dy, dz, r_inv) + - m_a->M_100 * D_310(dx, dy, dz, r_inv); - l_b->F_300 += m_a->M_001 * D_301(dx, dy, dz, r_inv) + - m_a->M_010 * D_310(dx, dy, dz, r_inv) + - m_a->M_100 * D_400(dx, dy, dz, r_inv); - - /* Compute 4th order field tensor terms (addition to rank 4) */ - l_b->F_004 += m_a->M_000 * D_004(dx, dy, dz, r_inv); - l_b->F_013 += m_a->M_000 * D_013(dx, dy, dz, r_inv); - l_b->F_022 += m_a->M_000 * D_022(dx, dy, dz, r_inv); - l_b->F_031 += m_a->M_000 * D_031(dx, dy, dz, r_inv); - l_b->F_040 += m_a->M_000 * D_040(dx, dy, dz, r_inv); - l_b->F_103 += m_a->M_000 * D_103(dx, dy, dz, r_inv); - l_b->F_112 += m_a->M_000 * D_112(dx, dy, dz, r_inv); - l_b->F_121 += m_a->M_000 * D_121(dx, dy, dz, r_inv); - l_b->F_130 += m_a->M_000 * D_130(dx, dy, dz, r_inv); - l_b->F_202 += m_a->M_000 * D_202(dx, dy, dz, r_inv); - l_b->F_211 += m_a->M_000 * D_211(dx, dy, dz, r_inv); - l_b->F_220 += m_a->M_000 * D_220(dx, dy, dz, r_inv); - l_b->F_301 += m_a->M_000 * D_301(dx, dy, dz, r_inv); - l_b->F_310 += m_a->M_000 * D_310(dx, dy, dz, r_inv); - l_b->F_400 += m_a->M_000 * D_400(dx, dy, dz, r_inv); + /* Compute 4th order field tensor terms (addition to rank 0) */ + l_b->F_000 += m_a->M_004 * D_004(dx, dy, dz, r_inv) + + m_a->M_013 * D_013(dx, dy, dz, r_inv) + + m_a->M_022 * D_022(dx, dy, dz, r_inv) + + m_a->M_031 * D_031(dx, dy, dz, r_inv) + + m_a->M_040 * D_040(dx, dy, dz, r_inv) + + m_a->M_103 * D_103(dx, dy, dz, r_inv) + + m_a->M_112 * D_112(dx, dy, dz, r_inv) + + m_a->M_121 * D_121(dx, dy, dz, r_inv) + + m_a->M_130 * D_130(dx, dy, dz, r_inv) + + m_a->M_202 * D_202(dx, dy, dz, r_inv) + + m_a->M_211 * D_211(dx, dy, dz, r_inv) + + m_a->M_220 * D_220(dx, dy, dz, r_inv) + + m_a->M_301 * D_301(dx, dy, dz, r_inv) + + m_a->M_310 * D_310(dx, dy, dz, r_inv) + + m_a->M_400 * D_400(dx, dy, dz, r_inv); + + /* Compute 4th order field tensor terms (addition to rank 1) */ + l_b->F_001 += m_a->M_003 * D_004(dx, dy, dz, r_inv) + + m_a->M_012 * D_013(dx, dy, dz, r_inv) + + m_a->M_021 * D_022(dx, dy, dz, r_inv) + + m_a->M_030 * D_031(dx, dy, dz, r_inv) + + m_a->M_102 * D_103(dx, dy, dz, r_inv) + + m_a->M_111 * D_112(dx, dy, dz, r_inv) + + m_a->M_120 * D_121(dx, dy, dz, r_inv) + + m_a->M_201 * D_202(dx, dy, dz, r_inv) + + m_a->M_210 * D_211(dx, dy, dz, r_inv) + + m_a->M_300 * D_301(dx, dy, dz, r_inv); + l_b->F_010 += m_a->M_003 * D_013(dx, dy, dz, r_inv) + + m_a->M_012 * D_022(dx, dy, dz, r_inv) + + m_a->M_021 * D_031(dx, dy, dz, r_inv) + + m_a->M_030 * D_040(dx, dy, dz, r_inv) + + m_a->M_102 * D_112(dx, dy, dz, r_inv) + + m_a->M_111 * D_121(dx, dy, dz, r_inv) + + m_a->M_120 * D_130(dx, dy, dz, r_inv) + + m_a->M_201 * D_211(dx, dy, dz, r_inv) + + m_a->M_210 * D_220(dx, dy, dz, r_inv) + + m_a->M_300 * D_310(dx, dy, dz, r_inv); + l_b->F_100 += m_a->M_003 * D_103(dx, dy, dz, r_inv) + + m_a->M_012 * D_112(dx, dy, dz, r_inv) + + m_a->M_021 * D_121(dx, dy, dz, r_inv) + + m_a->M_030 * D_130(dx, dy, dz, r_inv) + + m_a->M_102 * D_202(dx, dy, dz, r_inv) + + m_a->M_111 * D_211(dx, dy, dz, r_inv) + + m_a->M_120 * D_220(dx, dy, dz, r_inv) + + m_a->M_201 * D_301(dx, dy, dz, r_inv) + + m_a->M_210 * D_310(dx, dy, dz, r_inv) + + m_a->M_300 * D_400(dx, dy, dz, r_inv); + + /* Compute 4th order field tensor terms (addition to rank 2) */ + l_b->F_002 += m_a->M_002 * D_004(dx, dy, dz, r_inv) + + m_a->M_011 * D_013(dx, dy, dz, r_inv) + + m_a->M_020 * D_022(dx, dy, dz, r_inv) + + m_a->M_101 * D_103(dx, dy, dz, r_inv) + + m_a->M_110 * D_112(dx, dy, dz, r_inv) + + m_a->M_200 * D_202(dx, dy, dz, r_inv); + l_b->F_011 += m_a->M_002 * D_013(dx, dy, dz, r_inv) + + m_a->M_011 * D_022(dx, dy, dz, r_inv) + + m_a->M_020 * D_031(dx, dy, dz, r_inv) + + m_a->M_101 * D_112(dx, dy, dz, r_inv) + + m_a->M_110 * D_121(dx, dy, dz, r_inv) + + m_a->M_200 * D_211(dx, dy, dz, r_inv); + l_b->F_020 += m_a->M_002 * D_022(dx, dy, dz, r_inv) + + m_a->M_011 * D_031(dx, dy, dz, r_inv) + + m_a->M_020 * D_040(dx, dy, dz, r_inv) + + m_a->M_101 * D_121(dx, dy, dz, r_inv) + + m_a->M_110 * D_130(dx, dy, dz, r_inv) + + m_a->M_200 * D_220(dx, dy, dz, r_inv); + l_b->F_101 += m_a->M_002 * D_103(dx, dy, dz, r_inv) + + m_a->M_011 * D_112(dx, dy, dz, r_inv) + + m_a->M_020 * D_121(dx, dy, dz, r_inv) + + m_a->M_101 * D_202(dx, dy, dz, r_inv) + + m_a->M_110 * D_211(dx, dy, dz, r_inv) + + m_a->M_200 * D_301(dx, dy, dz, r_inv); + l_b->F_110 += m_a->M_002 * D_112(dx, dy, dz, r_inv) + + m_a->M_011 * D_121(dx, dy, dz, r_inv) + + m_a->M_020 * D_130(dx, dy, dz, r_inv) + + m_a->M_101 * D_211(dx, dy, dz, r_inv) + + m_a->M_110 * D_220(dx, dy, dz, r_inv) + + m_a->M_200 * D_310(dx, dy, dz, r_inv); + l_b->F_200 += m_a->M_002 * D_202(dx, dy, dz, r_inv) + + m_a->M_011 * D_211(dx, dy, dz, r_inv) + + m_a->M_020 * D_220(dx, dy, dz, r_inv) + + m_a->M_101 * D_301(dx, dy, dz, r_inv) + + m_a->M_110 * D_310(dx, dy, dz, r_inv) + + m_a->M_200 * D_400(dx, dy, dz, r_inv); + + /* Compute 4th order field tensor terms (addition to rank 3) */ + l_b->F_003 += m_a->M_001 * D_004(dx, dy, dz, r_inv) + + m_a->M_010 * D_013(dx, dy, dz, r_inv) + + m_a->M_100 * D_103(dx, dy, dz, r_inv); + l_b->F_012 += m_a->M_001 * D_013(dx, dy, dz, r_inv) + + m_a->M_010 * D_022(dx, dy, dz, r_inv) + + m_a->M_100 * D_112(dx, dy, dz, r_inv); + l_b->F_021 += m_a->M_001 * D_022(dx, dy, dz, r_inv) + + m_a->M_010 * D_031(dx, dy, dz, r_inv) + + m_a->M_100 * D_121(dx, dy, dz, r_inv); + l_b->F_030 += m_a->M_001 * D_031(dx, dy, dz, r_inv) + + m_a->M_010 * D_040(dx, dy, dz, r_inv) + + m_a->M_100 * D_130(dx, dy, dz, r_inv); + l_b->F_102 += m_a->M_001 * D_103(dx, dy, dz, r_inv) + + m_a->M_010 * D_112(dx, dy, dz, r_inv) + + m_a->M_100 * D_202(dx, dy, dz, r_inv); + l_b->F_111 += m_a->M_001 * D_112(dx, dy, dz, r_inv) + + m_a->M_010 * D_121(dx, dy, dz, r_inv) + + m_a->M_100 * D_211(dx, dy, dz, r_inv); + l_b->F_120 += m_a->M_001 * D_121(dx, dy, dz, r_inv) + + m_a->M_010 * D_130(dx, dy, dz, r_inv) + + m_a->M_100 * D_220(dx, dy, dz, r_inv); + l_b->F_201 += m_a->M_001 * D_202(dx, dy, dz, r_inv) + + m_a->M_010 * D_211(dx, dy, dz, r_inv) + + m_a->M_100 * D_301(dx, dy, dz, r_inv); + l_b->F_210 += m_a->M_001 * D_211(dx, dy, dz, r_inv) + + m_a->M_010 * D_220(dx, dy, dz, r_inv) + + m_a->M_100 * D_310(dx, dy, dz, r_inv); + l_b->F_300 += m_a->M_001 * D_301(dx, dy, dz, r_inv) + + m_a->M_010 * D_310(dx, dy, dz, r_inv) + + m_a->M_100 * D_400(dx, dy, dz, r_inv); + + /* Compute 4th order field tensor terms (addition to rank 4) */ + l_b->F_004 += m_a->M_000 * D_004(dx, dy, dz, r_inv); + l_b->F_013 += m_a->M_000 * D_013(dx, dy, dz, r_inv); + l_b->F_022 += m_a->M_000 * D_022(dx, dy, dz, r_inv); + l_b->F_031 += m_a->M_000 * D_031(dx, dy, dz, r_inv); + l_b->F_040 += m_a->M_000 * D_040(dx, dy, dz, r_inv); + l_b->F_103 += m_a->M_000 * D_103(dx, dy, dz, r_inv); + l_b->F_112 += m_a->M_000 * D_112(dx, dy, dz, r_inv); + l_b->F_121 += m_a->M_000 * D_121(dx, dy, dz, r_inv); + l_b->F_130 += m_a->M_000 * D_130(dx, dy, dz, r_inv); + l_b->F_202 += m_a->M_000 * D_202(dx, dy, dz, r_inv); + l_b->F_211 += m_a->M_000 * D_211(dx, dy, dz, r_inv); + l_b->F_220 += m_a->M_000 * D_220(dx, dy, dz, r_inv); + l_b->F_301 += m_a->M_000 * D_301(dx, dy, dz, r_inv); + l_b->F_310 += m_a->M_000 * D_310(dx, dy, dz, r_inv); + l_b->F_400 += m_a->M_000 * D_400(dx, dy, dz, r_inv); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 - /* Compute 5th order field tensor terms (addition to rank 0) */ - l_b->F_000 += m_a->M_005 * D_005(dx, dy, dz, r_inv) + - m_a->M_014 * D_014(dx, dy, dz, r_inv) + - m_a->M_023 * D_023(dx, dy, dz, r_inv) + - m_a->M_032 * D_032(dx, dy, dz, r_inv) + - m_a->M_041 * D_041(dx, dy, dz, r_inv) + - m_a->M_050 * D_050(dx, dy, dz, r_inv) + - m_a->M_104 * D_104(dx, dy, dz, r_inv) + - m_a->M_113 * D_113(dx, dy, dz, r_inv) + - m_a->M_122 * D_122(dx, dy, dz, r_inv) + - m_a->M_131 * D_131(dx, dy, dz, r_inv) + - m_a->M_140 * D_140(dx, dy, dz, r_inv) + - m_a->M_203 * D_203(dx, dy, dz, r_inv) + - m_a->M_212 * D_212(dx, dy, dz, r_inv) + - m_a->M_221 * D_221(dx, dy, dz, r_inv) + - m_a->M_230 * D_230(dx, dy, dz, r_inv) + - m_a->M_302 * D_302(dx, dy, dz, r_inv) + - m_a->M_311 * D_311(dx, dy, dz, r_inv) + - m_a->M_320 * D_320(dx, dy, dz, r_inv) + - m_a->M_401 * D_401(dx, dy, dz, r_inv) + - m_a->M_410 * D_410(dx, dy, dz, r_inv) + - m_a->M_500 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 1) */ - l_b->F_001 += m_a->M_004 * D_005(dx, dy, dz, r_inv) + - m_a->M_013 * D_014(dx, dy, dz, r_inv) + - m_a->M_022 * D_023(dx, dy, dz, r_inv) + - m_a->M_031 * D_032(dx, dy, dz, r_inv) + - m_a->M_040 * D_041(dx, dy, dz, r_inv) + - m_a->M_103 * D_104(dx, dy, dz, r_inv) + - m_a->M_112 * D_113(dx, dy, dz, r_inv) + - m_a->M_121 * D_122(dx, dy, dz, r_inv) + - m_a->M_130 * D_131(dx, dy, dz, r_inv) + - m_a->M_202 * D_203(dx, dy, dz, r_inv) + - m_a->M_211 * D_212(dx, dy, dz, r_inv) + - m_a->M_220 * D_221(dx, dy, dz, r_inv) + - m_a->M_301 * D_302(dx, dy, dz, r_inv) + - m_a->M_310 * D_311(dx, dy, dz, r_inv) + - m_a->M_400 * D_401(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_004 * D_014(dx, dy, dz, r_inv) + - m_a->M_013 * D_023(dx, dy, dz, r_inv) + - m_a->M_022 * D_032(dx, dy, dz, r_inv) + - m_a->M_031 * D_041(dx, dy, dz, r_inv) + - m_a->M_040 * D_050(dx, dy, dz, r_inv) + - m_a->M_103 * D_113(dx, dy, dz, r_inv) + - m_a->M_112 * D_122(dx, dy, dz, r_inv) + - m_a->M_121 * D_131(dx, dy, dz, r_inv) + - m_a->M_130 * D_140(dx, dy, dz, r_inv) + - m_a->M_202 * D_212(dx, dy, dz, r_inv) + - m_a->M_211 * D_221(dx, dy, dz, r_inv) + - m_a->M_220 * D_230(dx, dy, dz, r_inv) + - m_a->M_301 * D_311(dx, dy, dz, r_inv) + - m_a->M_310 * D_320(dx, dy, dz, r_inv) + - m_a->M_400 * D_410(dx, dy, dz, r_inv); - l_b->F_100 += m_a->M_004 * D_104(dx, dy, dz, r_inv) + - m_a->M_013 * D_113(dx, dy, dz, r_inv) + - m_a->M_022 * D_122(dx, dy, dz, r_inv) + - m_a->M_031 * D_131(dx, dy, dz, r_inv) + - m_a->M_040 * D_140(dx, dy, dz, r_inv) + - m_a->M_103 * D_203(dx, dy, dz, r_inv) + - m_a->M_112 * D_212(dx, dy, dz, r_inv) + - m_a->M_121 * D_221(dx, dy, dz, r_inv) + - m_a->M_130 * D_230(dx, dy, dz, r_inv) + - m_a->M_202 * D_302(dx, dy, dz, r_inv) + - m_a->M_211 * D_311(dx, dy, dz, r_inv) + - m_a->M_220 * D_320(dx, dy, dz, r_inv) + - m_a->M_301 * D_401(dx, dy, dz, r_inv) + - m_a->M_310 * D_410(dx, dy, dz, r_inv) + - m_a->M_400 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 2) */ - l_b->F_002 += m_a->M_003 * D_005(dx, dy, dz, r_inv) + - m_a->M_012 * D_014(dx, dy, dz, r_inv) + - m_a->M_021 * D_023(dx, dy, dz, r_inv) + - m_a->M_030 * D_032(dx, dy, dz, r_inv) + - m_a->M_102 * D_104(dx, dy, dz, r_inv) + - m_a->M_111 * D_113(dx, dy, dz, r_inv) + - m_a->M_120 * D_122(dx, dy, dz, r_inv) + - m_a->M_201 * D_203(dx, dy, dz, r_inv) + - m_a->M_210 * D_212(dx, dy, dz, r_inv) + - m_a->M_300 * D_302(dx, dy, dz, r_inv); - l_b->F_011 += m_a->M_003 * D_014(dx, dy, dz, r_inv) + - m_a->M_012 * D_023(dx, dy, dz, r_inv) + - m_a->M_021 * D_032(dx, dy, dz, r_inv) + - m_a->M_030 * D_041(dx, dy, dz, r_inv) + - m_a->M_102 * D_113(dx, dy, dz, r_inv) + - m_a->M_111 * D_122(dx, dy, dz, r_inv) + - m_a->M_120 * D_131(dx, dy, dz, r_inv) + - m_a->M_201 * D_212(dx, dy, dz, r_inv) + - m_a->M_210 * D_221(dx, dy, dz, r_inv) + - m_a->M_300 * D_311(dx, dy, dz, r_inv); - l_b->F_020 += m_a->M_003 * D_023(dx, dy, dz, r_inv) + - m_a->M_012 * D_032(dx, dy, dz, r_inv) + - m_a->M_021 * D_041(dx, dy, dz, r_inv) + - m_a->M_030 * D_050(dx, dy, dz, r_inv) + - m_a->M_102 * D_122(dx, dy, dz, r_inv) + - m_a->M_111 * D_131(dx, dy, dz, r_inv) + - m_a->M_120 * D_140(dx, dy, dz, r_inv) + - m_a->M_201 * D_221(dx, dy, dz, r_inv) + - m_a->M_210 * D_230(dx, dy, dz, r_inv) + - m_a->M_300 * D_320(dx, dy, dz, r_inv); - l_b->F_101 += m_a->M_003 * D_104(dx, dy, dz, r_inv) + - m_a->M_012 * D_113(dx, dy, dz, r_inv) + - m_a->M_021 * D_122(dx, dy, dz, r_inv) + - m_a->M_030 * D_131(dx, dy, dz, r_inv) + - m_a->M_102 * D_203(dx, dy, dz, r_inv) + - m_a->M_111 * D_212(dx, dy, dz, r_inv) + - m_a->M_120 * D_221(dx, dy, dz, r_inv) + - m_a->M_201 * D_302(dx, dy, dz, r_inv) + - m_a->M_210 * D_311(dx, dy, dz, r_inv) + - m_a->M_300 * D_401(dx, dy, dz, r_inv); - l_b->F_110 += m_a->M_003 * D_113(dx, dy, dz, r_inv) + - m_a->M_012 * D_122(dx, dy, dz, r_inv) + - m_a->M_021 * D_131(dx, dy, dz, r_inv) + - m_a->M_030 * D_140(dx, dy, dz, r_inv) + - m_a->M_102 * D_212(dx, dy, dz, r_inv) + - m_a->M_111 * D_221(dx, dy, dz, r_inv) + - m_a->M_120 * D_230(dx, dy, dz, r_inv) + - m_a->M_201 * D_311(dx, dy, dz, r_inv) + - m_a->M_210 * D_320(dx, dy, dz, r_inv) + - m_a->M_300 * D_410(dx, dy, dz, r_inv); - l_b->F_200 += m_a->M_003 * D_203(dx, dy, dz, r_inv) + - m_a->M_012 * D_212(dx, dy, dz, r_inv) + - m_a->M_021 * D_221(dx, dy, dz, r_inv) + - m_a->M_030 * D_230(dx, dy, dz, r_inv) + - m_a->M_102 * D_302(dx, dy, dz, r_inv) + - m_a->M_111 * D_311(dx, dy, dz, r_inv) + - m_a->M_120 * D_320(dx, dy, dz, r_inv) + - m_a->M_201 * D_401(dx, dy, dz, r_inv) + - m_a->M_210 * D_410(dx, dy, dz, r_inv) + - m_a->M_300 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 3) */ - l_b->F_003 += m_a->M_002 * D_005(dx, dy, dz, r_inv) + - m_a->M_011 * D_014(dx, dy, dz, r_inv) + - m_a->M_020 * D_023(dx, dy, dz, r_inv) + - m_a->M_101 * D_104(dx, dy, dz, r_inv) + - m_a->M_110 * D_113(dx, dy, dz, r_inv) + - m_a->M_200 * D_203(dx, dy, dz, r_inv); - l_b->F_012 += m_a->M_002 * D_014(dx, dy, dz, r_inv) + - m_a->M_011 * D_023(dx, dy, dz, r_inv) + - m_a->M_020 * D_032(dx, dy, dz, r_inv) + - m_a->M_101 * D_113(dx, dy, dz, r_inv) + - m_a->M_110 * D_122(dx, dy, dz, r_inv) + - m_a->M_200 * D_212(dx, dy, dz, r_inv); - l_b->F_021 += m_a->M_002 * D_023(dx, dy, dz, r_inv) + - m_a->M_011 * D_032(dx, dy, dz, r_inv) + - m_a->M_020 * D_041(dx, dy, dz, r_inv) + - m_a->M_101 * D_122(dx, dy, dz, r_inv) + - m_a->M_110 * D_131(dx, dy, dz, r_inv) + - m_a->M_200 * D_221(dx, dy, dz, r_inv); - l_b->F_030 += m_a->M_002 * D_032(dx, dy, dz, r_inv) + - m_a->M_011 * D_041(dx, dy, dz, r_inv) + - m_a->M_020 * D_050(dx, dy, dz, r_inv) + - m_a->M_101 * D_131(dx, dy, dz, r_inv) + - m_a->M_110 * D_140(dx, dy, dz, r_inv) + - m_a->M_200 * D_230(dx, dy, dz, r_inv); - l_b->F_102 += m_a->M_002 * D_104(dx, dy, dz, r_inv) + - m_a->M_011 * D_113(dx, dy, dz, r_inv) + - m_a->M_020 * D_122(dx, dy, dz, r_inv) + - m_a->M_101 * D_203(dx, dy, dz, r_inv) + - m_a->M_110 * D_212(dx, dy, dz, r_inv) + - m_a->M_200 * D_302(dx, dy, dz, r_inv); - l_b->F_111 += m_a->M_002 * D_113(dx, dy, dz, r_inv) + - m_a->M_011 * D_122(dx, dy, dz, r_inv) + - m_a->M_020 * D_131(dx, dy, dz, r_inv) + - m_a->M_101 * D_212(dx, dy, dz, r_inv) + - m_a->M_110 * D_221(dx, dy, dz, r_inv) + - m_a->M_200 * D_311(dx, dy, dz, r_inv); - l_b->F_120 += m_a->M_002 * D_122(dx, dy, dz, r_inv) + - m_a->M_011 * D_131(dx, dy, dz, r_inv) + - m_a->M_020 * D_140(dx, dy, dz, r_inv) + - m_a->M_101 * D_221(dx, dy, dz, r_inv) + - m_a->M_110 * D_230(dx, dy, dz, r_inv) + - m_a->M_200 * D_320(dx, dy, dz, r_inv); - l_b->F_201 += m_a->M_002 * D_203(dx, dy, dz, r_inv) + - m_a->M_011 * D_212(dx, dy, dz, r_inv) + - m_a->M_020 * D_221(dx, dy, dz, r_inv) + - m_a->M_101 * D_302(dx, dy, dz, r_inv) + - m_a->M_110 * D_311(dx, dy, dz, r_inv) + - m_a->M_200 * D_401(dx, dy, dz, r_inv); - l_b->F_210 += m_a->M_002 * D_212(dx, dy, dz, r_inv) + - m_a->M_011 * D_221(dx, dy, dz, r_inv) + - m_a->M_020 * D_230(dx, dy, dz, r_inv) + - m_a->M_101 * D_311(dx, dy, dz, r_inv) + - m_a->M_110 * D_320(dx, dy, dz, r_inv) + - m_a->M_200 * D_410(dx, dy, dz, r_inv); - l_b->F_300 += m_a->M_002 * D_302(dx, dy, dz, r_inv) + - m_a->M_011 * D_311(dx, dy, dz, r_inv) + - m_a->M_020 * D_320(dx, dy, dz, r_inv) + - m_a->M_101 * D_401(dx, dy, dz, r_inv) + - m_a->M_110 * D_410(dx, dy, dz, r_inv) + - m_a->M_200 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 4) */ - l_b->F_004 += m_a->M_001 * D_005(dx, dy, dz, r_inv) + - m_a->M_010 * D_014(dx, dy, dz, r_inv) + - m_a->M_100 * D_104(dx, dy, dz, r_inv); - l_b->F_013 += m_a->M_001 * D_014(dx, dy, dz, r_inv) + - m_a->M_010 * D_023(dx, dy, dz, r_inv) + - m_a->M_100 * D_113(dx, dy, dz, r_inv); - l_b->F_022 += m_a->M_001 * D_023(dx, dy, dz, r_inv) + - m_a->M_010 * D_032(dx, dy, dz, r_inv) + - m_a->M_100 * D_122(dx, dy, dz, r_inv); - l_b->F_031 += m_a->M_001 * D_032(dx, dy, dz, r_inv) + - m_a->M_010 * D_041(dx, dy, dz, r_inv) + - m_a->M_100 * D_131(dx, dy, dz, r_inv); - l_b->F_040 += m_a->M_001 * D_041(dx, dy, dz, r_inv) + - m_a->M_010 * D_050(dx, dy, dz, r_inv) + - m_a->M_100 * D_140(dx, dy, dz, r_inv); - l_b->F_103 += m_a->M_001 * D_104(dx, dy, dz, r_inv) + - m_a->M_010 * D_113(dx, dy, dz, r_inv) + - m_a->M_100 * D_203(dx, dy, dz, r_inv); - l_b->F_112 += m_a->M_001 * D_113(dx, dy, dz, r_inv) + - m_a->M_010 * D_122(dx, dy, dz, r_inv) + - m_a->M_100 * D_212(dx, dy, dz, r_inv); - l_b->F_121 += m_a->M_001 * D_122(dx, dy, dz, r_inv) + - m_a->M_010 * D_131(dx, dy, dz, r_inv) + - m_a->M_100 * D_221(dx, dy, dz, r_inv); - l_b->F_130 += m_a->M_001 * D_131(dx, dy, dz, r_inv) + - m_a->M_010 * D_140(dx, dy, dz, r_inv) + - m_a->M_100 * D_230(dx, dy, dz, r_inv); - l_b->F_202 += m_a->M_001 * D_203(dx, dy, dz, r_inv) + - m_a->M_010 * D_212(dx, dy, dz, r_inv) + - m_a->M_100 * D_302(dx, dy, dz, r_inv); - l_b->F_211 += m_a->M_001 * D_212(dx, dy, dz, r_inv) + - m_a->M_010 * D_221(dx, dy, dz, r_inv) + - m_a->M_100 * D_311(dx, dy, dz, r_inv); - l_b->F_220 += m_a->M_001 * D_221(dx, dy, dz, r_inv) + - m_a->M_010 * D_230(dx, dy, dz, r_inv) + - m_a->M_100 * D_320(dx, dy, dz, r_inv); - l_b->F_301 += m_a->M_001 * D_302(dx, dy, dz, r_inv) + - m_a->M_010 * D_311(dx, dy, dz, r_inv) + - m_a->M_100 * D_401(dx, dy, dz, r_inv); - l_b->F_310 += m_a->M_001 * D_311(dx, dy, dz, r_inv) + - m_a->M_010 * D_320(dx, dy, dz, r_inv) + - m_a->M_100 * D_410(dx, dy, dz, r_inv); - l_b->F_400 += m_a->M_001 * D_401(dx, dy, dz, r_inv) + - m_a->M_010 * D_410(dx, dy, dz, r_inv) + - m_a->M_100 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 5) */ - l_b->F_005 += m_a->M_000 * D_005(dx, dy, dz, r_inv); - l_b->F_014 += m_a->M_000 * D_014(dx, dy, dz, r_inv); - l_b->F_023 += m_a->M_000 * D_023(dx, dy, dz, r_inv); - l_b->F_032 += m_a->M_000 * D_032(dx, dy, dz, r_inv); - l_b->F_041 += m_a->M_000 * D_041(dx, dy, dz, r_inv); - l_b->F_050 += m_a->M_000 * D_050(dx, dy, dz, r_inv); - l_b->F_104 += m_a->M_000 * D_104(dx, dy, dz, r_inv); - l_b->F_113 += m_a->M_000 * D_113(dx, dy, dz, r_inv); - l_b->F_122 += m_a->M_000 * D_122(dx, dy, dz, r_inv); - l_b->F_131 += m_a->M_000 * D_131(dx, dy, dz, r_inv); - l_b->F_140 += m_a->M_000 * D_140(dx, dy, dz, r_inv); - l_b->F_203 += m_a->M_000 * D_203(dx, dy, dz, r_inv); - l_b->F_212 += m_a->M_000 * D_212(dx, dy, dz, r_inv); - l_b->F_221 += m_a->M_000 * D_221(dx, dy, dz, r_inv); - l_b->F_230 += m_a->M_000 * D_230(dx, dy, dz, r_inv); - l_b->F_302 += m_a->M_000 * D_302(dx, dy, dz, r_inv); - l_b->F_311 += m_a->M_000 * D_311(dx, dy, dz, r_inv); - l_b->F_320 += m_a->M_000 * D_320(dx, dy, dz, r_inv); - l_b->F_401 += m_a->M_000 * D_401(dx, dy, dz, r_inv); - l_b->F_410 += m_a->M_000 * D_410(dx, dy, dz, r_inv); - l_b->F_500 += m_a->M_000 * D_500(dx, dy, dz, r_inv); + /* Compute 5th order field tensor terms (addition to rank 0) */ + l_b->F_000 += m_a->M_005 * D_005(dx, dy, dz, r_inv) + + m_a->M_014 * D_014(dx, dy, dz, r_inv) + + m_a->M_023 * D_023(dx, dy, dz, r_inv) + + m_a->M_032 * D_032(dx, dy, dz, r_inv) + + m_a->M_041 * D_041(dx, dy, dz, r_inv) + + m_a->M_050 * D_050(dx, dy, dz, r_inv) + + m_a->M_104 * D_104(dx, dy, dz, r_inv) + + m_a->M_113 * D_113(dx, dy, dz, r_inv) + + m_a->M_122 * D_122(dx, dy, dz, r_inv) + + m_a->M_131 * D_131(dx, dy, dz, r_inv) + + m_a->M_140 * D_140(dx, dy, dz, r_inv) + + m_a->M_203 * D_203(dx, dy, dz, r_inv) + + m_a->M_212 * D_212(dx, dy, dz, r_inv) + + m_a->M_221 * D_221(dx, dy, dz, r_inv) + + m_a->M_230 * D_230(dx, dy, dz, r_inv) + + m_a->M_302 * D_302(dx, dy, dz, r_inv) + + m_a->M_311 * D_311(dx, dy, dz, r_inv) + + m_a->M_320 * D_320(dx, dy, dz, r_inv) + + m_a->M_401 * D_401(dx, dy, dz, r_inv) + + m_a->M_410 * D_410(dx, dy, dz, r_inv) + + m_a->M_500 * D_500(dx, dy, dz, r_inv); + + /* Compute 5th order field tensor terms (addition to rank 1) */ + l_b->F_001 += m_a->M_004 * D_005(dx, dy, dz, r_inv) + + m_a->M_013 * D_014(dx, dy, dz, r_inv) + + m_a->M_022 * D_023(dx, dy, dz, r_inv) + + m_a->M_031 * D_032(dx, dy, dz, r_inv) + + m_a->M_040 * D_041(dx, dy, dz, r_inv) + + m_a->M_103 * D_104(dx, dy, dz, r_inv) + + m_a->M_112 * D_113(dx, dy, dz, r_inv) + + m_a->M_121 * D_122(dx, dy, dz, r_inv) + + m_a->M_130 * D_131(dx, dy, dz, r_inv) + + m_a->M_202 * D_203(dx, dy, dz, r_inv) + + m_a->M_211 * D_212(dx, dy, dz, r_inv) + + m_a->M_220 * D_221(dx, dy, dz, r_inv) + + m_a->M_301 * D_302(dx, dy, dz, r_inv) + + m_a->M_310 * D_311(dx, dy, dz, r_inv) + + m_a->M_400 * D_401(dx, dy, dz, r_inv); + l_b->F_010 += m_a->M_004 * D_014(dx, dy, dz, r_inv) + + m_a->M_013 * D_023(dx, dy, dz, r_inv) + + m_a->M_022 * D_032(dx, dy, dz, r_inv) + + m_a->M_031 * D_041(dx, dy, dz, r_inv) + + m_a->M_040 * D_050(dx, dy, dz, r_inv) + + m_a->M_103 * D_113(dx, dy, dz, r_inv) + + m_a->M_112 * D_122(dx, dy, dz, r_inv) + + m_a->M_121 * D_131(dx, dy, dz, r_inv) + + m_a->M_130 * D_140(dx, dy, dz, r_inv) + + m_a->M_202 * D_212(dx, dy, dz, r_inv) + + m_a->M_211 * D_221(dx, dy, dz, r_inv) + + m_a->M_220 * D_230(dx, dy, dz, r_inv) + + m_a->M_301 * D_311(dx, dy, dz, r_inv) + + m_a->M_310 * D_320(dx, dy, dz, r_inv) + + m_a->M_400 * D_410(dx, dy, dz, r_inv); + l_b->F_100 += m_a->M_004 * D_104(dx, dy, dz, r_inv) + + m_a->M_013 * D_113(dx, dy, dz, r_inv) + + m_a->M_022 * D_122(dx, dy, dz, r_inv) + + m_a->M_031 * D_131(dx, dy, dz, r_inv) + + m_a->M_040 * D_140(dx, dy, dz, r_inv) + + m_a->M_103 * D_203(dx, dy, dz, r_inv) + + m_a->M_112 * D_212(dx, dy, dz, r_inv) + + m_a->M_121 * D_221(dx, dy, dz, r_inv) + + m_a->M_130 * D_230(dx, dy, dz, r_inv) + + m_a->M_202 * D_302(dx, dy, dz, r_inv) + + m_a->M_211 * D_311(dx, dy, dz, r_inv) + + m_a->M_220 * D_320(dx, dy, dz, r_inv) + + m_a->M_301 * D_401(dx, dy, dz, r_inv) + + m_a->M_310 * D_410(dx, dy, dz, r_inv) + + m_a->M_400 * D_500(dx, dy, dz, r_inv); + + /* Compute 5th order field tensor terms (addition to rank 2) */ + l_b->F_002 += m_a->M_003 * D_005(dx, dy, dz, r_inv) + + m_a->M_012 * D_014(dx, dy, dz, r_inv) + + m_a->M_021 * D_023(dx, dy, dz, r_inv) + + m_a->M_030 * D_032(dx, dy, dz, r_inv) + + m_a->M_102 * D_104(dx, dy, dz, r_inv) + + m_a->M_111 * D_113(dx, dy, dz, r_inv) + + m_a->M_120 * D_122(dx, dy, dz, r_inv) + + m_a->M_201 * D_203(dx, dy, dz, r_inv) + + m_a->M_210 * D_212(dx, dy, dz, r_inv) + + m_a->M_300 * D_302(dx, dy, dz, r_inv); + l_b->F_011 += m_a->M_003 * D_014(dx, dy, dz, r_inv) + + m_a->M_012 * D_023(dx, dy, dz, r_inv) + + m_a->M_021 * D_032(dx, dy, dz, r_inv) + + m_a->M_030 * D_041(dx, dy, dz, r_inv) + + m_a->M_102 * D_113(dx, dy, dz, r_inv) + + m_a->M_111 * D_122(dx, dy, dz, r_inv) + + m_a->M_120 * D_131(dx, dy, dz, r_inv) + + m_a->M_201 * D_212(dx, dy, dz, r_inv) + + m_a->M_210 * D_221(dx, dy, dz, r_inv) + + m_a->M_300 * D_311(dx, dy, dz, r_inv); + l_b->F_020 += m_a->M_003 * D_023(dx, dy, dz, r_inv) + + m_a->M_012 * D_032(dx, dy, dz, r_inv) + + m_a->M_021 * D_041(dx, dy, dz, r_inv) + + m_a->M_030 * D_050(dx, dy, dz, r_inv) + + m_a->M_102 * D_122(dx, dy, dz, r_inv) + + m_a->M_111 * D_131(dx, dy, dz, r_inv) + + m_a->M_120 * D_140(dx, dy, dz, r_inv) + + m_a->M_201 * D_221(dx, dy, dz, r_inv) + + m_a->M_210 * D_230(dx, dy, dz, r_inv) + + m_a->M_300 * D_320(dx, dy, dz, r_inv); + l_b->F_101 += m_a->M_003 * D_104(dx, dy, dz, r_inv) + + m_a->M_012 * D_113(dx, dy, dz, r_inv) + + m_a->M_021 * D_122(dx, dy, dz, r_inv) + + m_a->M_030 * D_131(dx, dy, dz, r_inv) + + m_a->M_102 * D_203(dx, dy, dz, r_inv) + + m_a->M_111 * D_212(dx, dy, dz, r_inv) + + m_a->M_120 * D_221(dx, dy, dz, r_inv) + + m_a->M_201 * D_302(dx, dy, dz, r_inv) + + m_a->M_210 * D_311(dx, dy, dz, r_inv) + + m_a->M_300 * D_401(dx, dy, dz, r_inv); + l_b->F_110 += m_a->M_003 * D_113(dx, dy, dz, r_inv) + + m_a->M_012 * D_122(dx, dy, dz, r_inv) + + m_a->M_021 * D_131(dx, dy, dz, r_inv) + + m_a->M_030 * D_140(dx, dy, dz, r_inv) + + m_a->M_102 * D_212(dx, dy, dz, r_inv) + + m_a->M_111 * D_221(dx, dy, dz, r_inv) + + m_a->M_120 * D_230(dx, dy, dz, r_inv) + + m_a->M_201 * D_311(dx, dy, dz, r_inv) + + m_a->M_210 * D_320(dx, dy, dz, r_inv) + + m_a->M_300 * D_410(dx, dy, dz, r_inv); + l_b->F_200 += m_a->M_003 * D_203(dx, dy, dz, r_inv) + + m_a->M_012 * D_212(dx, dy, dz, r_inv) + + m_a->M_021 * D_221(dx, dy, dz, r_inv) + + m_a->M_030 * D_230(dx, dy, dz, r_inv) + + m_a->M_102 * D_302(dx, dy, dz, r_inv) + + m_a->M_111 * D_311(dx, dy, dz, r_inv) + + m_a->M_120 * D_320(dx, dy, dz, r_inv) + + m_a->M_201 * D_401(dx, dy, dz, r_inv) + + m_a->M_210 * D_410(dx, dy, dz, r_inv) + + m_a->M_300 * D_500(dx, dy, dz, r_inv); + + /* Compute 5th order field tensor terms (addition to rank 3) */ + l_b->F_003 += m_a->M_002 * D_005(dx, dy, dz, r_inv) + + m_a->M_011 * D_014(dx, dy, dz, r_inv) + + m_a->M_020 * D_023(dx, dy, dz, r_inv) + + m_a->M_101 * D_104(dx, dy, dz, r_inv) + + m_a->M_110 * D_113(dx, dy, dz, r_inv) + + m_a->M_200 * D_203(dx, dy, dz, r_inv); + l_b->F_012 += m_a->M_002 * D_014(dx, dy, dz, r_inv) + + m_a->M_011 * D_023(dx, dy, dz, r_inv) + + m_a->M_020 * D_032(dx, dy, dz, r_inv) + + m_a->M_101 * D_113(dx, dy, dz, r_inv) + + m_a->M_110 * D_122(dx, dy, dz, r_inv) + + m_a->M_200 * D_212(dx, dy, dz, r_inv); + l_b->F_021 += m_a->M_002 * D_023(dx, dy, dz, r_inv) + + m_a->M_011 * D_032(dx, dy, dz, r_inv) + + m_a->M_020 * D_041(dx, dy, dz, r_inv) + + m_a->M_101 * D_122(dx, dy, dz, r_inv) + + m_a->M_110 * D_131(dx, dy, dz, r_inv) + + m_a->M_200 * D_221(dx, dy, dz, r_inv); + l_b->F_030 += m_a->M_002 * D_032(dx, dy, dz, r_inv) + + m_a->M_011 * D_041(dx, dy, dz, r_inv) + + m_a->M_020 * D_050(dx, dy, dz, r_inv) + + m_a->M_101 * D_131(dx, dy, dz, r_inv) + + m_a->M_110 * D_140(dx, dy, dz, r_inv) + + m_a->M_200 * D_230(dx, dy, dz, r_inv); + l_b->F_102 += m_a->M_002 * D_104(dx, dy, dz, r_inv) + + m_a->M_011 * D_113(dx, dy, dz, r_inv) + + m_a->M_020 * D_122(dx, dy, dz, r_inv) + + m_a->M_101 * D_203(dx, dy, dz, r_inv) + + m_a->M_110 * D_212(dx, dy, dz, r_inv) + + m_a->M_200 * D_302(dx, dy, dz, r_inv); + l_b->F_111 += m_a->M_002 * D_113(dx, dy, dz, r_inv) + + m_a->M_011 * D_122(dx, dy, dz, r_inv) + + m_a->M_020 * D_131(dx, dy, dz, r_inv) + + m_a->M_101 * D_212(dx, dy, dz, r_inv) + + m_a->M_110 * D_221(dx, dy, dz, r_inv) + + m_a->M_200 * D_311(dx, dy, dz, r_inv); + l_b->F_120 += m_a->M_002 * D_122(dx, dy, dz, r_inv) + + m_a->M_011 * D_131(dx, dy, dz, r_inv) + + m_a->M_020 * D_140(dx, dy, dz, r_inv) + + m_a->M_101 * D_221(dx, dy, dz, r_inv) + + m_a->M_110 * D_230(dx, dy, dz, r_inv) + + m_a->M_200 * D_320(dx, dy, dz, r_inv); + l_b->F_201 += m_a->M_002 * D_203(dx, dy, dz, r_inv) + + m_a->M_011 * D_212(dx, dy, dz, r_inv) + + m_a->M_020 * D_221(dx, dy, dz, r_inv) + + m_a->M_101 * D_302(dx, dy, dz, r_inv) + + m_a->M_110 * D_311(dx, dy, dz, r_inv) + + m_a->M_200 * D_401(dx, dy, dz, r_inv); + l_b->F_210 += m_a->M_002 * D_212(dx, dy, dz, r_inv) + + m_a->M_011 * D_221(dx, dy, dz, r_inv) + + m_a->M_020 * D_230(dx, dy, dz, r_inv) + + m_a->M_101 * D_311(dx, dy, dz, r_inv) + + m_a->M_110 * D_320(dx, dy, dz, r_inv) + + m_a->M_200 * D_410(dx, dy, dz, r_inv); + l_b->F_300 += m_a->M_002 * D_302(dx, dy, dz, r_inv) + + m_a->M_011 * D_311(dx, dy, dz, r_inv) + + m_a->M_020 * D_320(dx, dy, dz, r_inv) + + m_a->M_101 * D_401(dx, dy, dz, r_inv) + + m_a->M_110 * D_410(dx, dy, dz, r_inv) + + m_a->M_200 * D_500(dx, dy, dz, r_inv); + + /* Compute 5th order field tensor terms (addition to rank 4) */ + l_b->F_004 += m_a->M_001 * D_005(dx, dy, dz, r_inv) + + m_a->M_010 * D_014(dx, dy, dz, r_inv) + + m_a->M_100 * D_104(dx, dy, dz, r_inv); + l_b->F_013 += m_a->M_001 * D_014(dx, dy, dz, r_inv) + + m_a->M_010 * D_023(dx, dy, dz, r_inv) + + m_a->M_100 * D_113(dx, dy, dz, r_inv); + l_b->F_022 += m_a->M_001 * D_023(dx, dy, dz, r_inv) + + m_a->M_010 * D_032(dx, dy, dz, r_inv) + + m_a->M_100 * D_122(dx, dy, dz, r_inv); + l_b->F_031 += m_a->M_001 * D_032(dx, dy, dz, r_inv) + + m_a->M_010 * D_041(dx, dy, dz, r_inv) + + m_a->M_100 * D_131(dx, dy, dz, r_inv); + l_b->F_040 += m_a->M_001 * D_041(dx, dy, dz, r_inv) + + m_a->M_010 * D_050(dx, dy, dz, r_inv) + + m_a->M_100 * D_140(dx, dy, dz, r_inv); + l_b->F_103 += m_a->M_001 * D_104(dx, dy, dz, r_inv) + + m_a->M_010 * D_113(dx, dy, dz, r_inv) + + m_a->M_100 * D_203(dx, dy, dz, r_inv); + l_b->F_112 += m_a->M_001 * D_113(dx, dy, dz, r_inv) + + m_a->M_010 * D_122(dx, dy, dz, r_inv) + + m_a->M_100 * D_212(dx, dy, dz, r_inv); + l_b->F_121 += m_a->M_001 * D_122(dx, dy, dz, r_inv) + + m_a->M_010 * D_131(dx, dy, dz, r_inv) + + m_a->M_100 * D_221(dx, dy, dz, r_inv); + l_b->F_130 += m_a->M_001 * D_131(dx, dy, dz, r_inv) + + m_a->M_010 * D_140(dx, dy, dz, r_inv) + + m_a->M_100 * D_230(dx, dy, dz, r_inv); + l_b->F_202 += m_a->M_001 * D_203(dx, dy, dz, r_inv) + + m_a->M_010 * D_212(dx, dy, dz, r_inv) + + m_a->M_100 * D_302(dx, dy, dz, r_inv); + l_b->F_211 += m_a->M_001 * D_212(dx, dy, dz, r_inv) + + m_a->M_010 * D_221(dx, dy, dz, r_inv) + + m_a->M_100 * D_311(dx, dy, dz, r_inv); + l_b->F_220 += m_a->M_001 * D_221(dx, dy, dz, r_inv) + + m_a->M_010 * D_230(dx, dy, dz, r_inv) + + m_a->M_100 * D_320(dx, dy, dz, r_inv); + l_b->F_301 += m_a->M_001 * D_302(dx, dy, dz, r_inv) + + m_a->M_010 * D_311(dx, dy, dz, r_inv) + + m_a->M_100 * D_401(dx, dy, dz, r_inv); + l_b->F_310 += m_a->M_001 * D_311(dx, dy, dz, r_inv) + + m_a->M_010 * D_320(dx, dy, dz, r_inv) + + m_a->M_100 * D_410(dx, dy, dz, r_inv); + l_b->F_400 += m_a->M_001 * D_401(dx, dy, dz, r_inv) + + m_a->M_010 * D_410(dx, dy, dz, r_inv) + + m_a->M_100 * D_500(dx, dy, dz, r_inv); + + /* Compute 5th order field tensor terms (addition to rank 5) */ + l_b->F_005 += m_a->M_000 * D_005(dx, dy, dz, r_inv); + l_b->F_014 += m_a->M_000 * D_014(dx, dy, dz, r_inv); + l_b->F_023 += m_a->M_000 * D_023(dx, dy, dz, r_inv); + l_b->F_032 += m_a->M_000 * D_032(dx, dy, dz, r_inv); + l_b->F_041 += m_a->M_000 * D_041(dx, dy, dz, r_inv); + l_b->F_050 += m_a->M_000 * D_050(dx, dy, dz, r_inv); + l_b->F_104 += m_a->M_000 * D_104(dx, dy, dz, r_inv); + l_b->F_113 += m_a->M_000 * D_113(dx, dy, dz, r_inv); + l_b->F_122 += m_a->M_000 * D_122(dx, dy, dz, r_inv); + l_b->F_131 += m_a->M_000 * D_131(dx, dy, dz, r_inv); + l_b->F_140 += m_a->M_000 * D_140(dx, dy, dz, r_inv); + l_b->F_203 += m_a->M_000 * D_203(dx, dy, dz, r_inv); + l_b->F_212 += m_a->M_000 * D_212(dx, dy, dz, r_inv); + l_b->F_221 += m_a->M_000 * D_221(dx, dy, dz, r_inv); + l_b->F_230 += m_a->M_000 * D_230(dx, dy, dz, r_inv); + l_b->F_302 += m_a->M_000 * D_302(dx, dy, dz, r_inv); + l_b->F_311 += m_a->M_000 * D_311(dx, dy, dz, r_inv); + l_b->F_320 += m_a->M_000 * D_320(dx, dy, dz, r_inv); + l_b->F_401 += m_a->M_000 * D_401(dx, dy, dz, r_inv); + l_b->F_410 += m_a->M_000 * D_410(dx, dy, dz, r_inv); + l_b->F_500 += m_a->M_000 * D_500(dx, dy, dz, r_inv); #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 5 #error "Missing implementation for order >5" #endif -#ifdef SWIFT_DEBUG_CHECKS - - l_b->num_interacted += m_a->num_gpart; -#endif + /* Softened case */ + } else { + message("Un-supported softened M2L interaction."); + } } /** @@ -2470,4 +2514,29 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, gp->a_grav[2] += a_grav[2]; } +/** + * @brief Checks whether a cell-cell interaction can be appromixated by a M-M + * interaction. + * + * @param ma The #multipole of the first #cell. + * @param mb The #multipole of the second #cell. + * @param theta_crit_inv The inverse of the critical opening angle. + */ +__attribute__((always_inline)) INLINE static int gravity_multipole_accept( + const struct gravity_tensors *ma, const struct gravity_tensors *mb, + double theta_crit_inv) { + + const double r_crit_a = ma->r_max * theta_crit_inv; + const double r_crit_b = mb->r_max * theta_crit_inv; + + const double dx = ma->CoM[0] - mb->CoM[0]; + const double dy = ma->CoM[1] - mb->CoM[1]; + const double dz = ma->CoM[2] - mb->CoM[2]; + + const double r2 = dx * dx + dy * dy + dz * dz; + + /* Multipole acceptance criterion (Dehnen 2002, eq.10) */ + return (r2 > (r_crit_a + r_crit_b) * (r_crit_a + r_crit_b)); +} + #endif /* SWIFT_MULTIPOLE_H */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index f938fc54d4f62a2e6360d85ed24ee6b659220c03..d41470f68d9a1050edd44c0dd46ba077c5dfa327 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -2028,6 +2028,7 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Should we even bother? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (ci->count == 0 || cj->count == 0) return; /* Get the cell dimensions. */ const float h = min(ci->width[0], min(ci->width[1], ci->width[2])); @@ -2275,7 +2276,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { TIMER_TIC; /* Should we even bother? */ - if (!cell_is_active(ci, r->e)) return; + if (ci->count == 0 || !cell_is_active(ci, r->e)) return; /* Recurse? */ if (ci->split) { @@ -2329,6 +2330,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Should we even bother? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + if (ci->count == 0 || cj->count == 0) return; /* Get the cell dimensions. */ const float h = min(ci->width[0], min(ci->width[1], ci->width[2])); @@ -2576,7 +2578,7 @@ void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) { TIMER_TIC; /* Should we even bother? */ - if (!cell_is_active(ci, r->e)) return; + if (ci->count == 0 || !cell_is_active(ci, r->e)) return; /* Recurse? */ if (ci->split) { diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index edd368cf8eb48e18119aaa184a8bb29336016483..065e99dba8ee08a6a4fb91245a9d53ffdc7caccc 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -23,6 +23,7 @@ /* Includes. */ #include "cell.h" #include "gravity.h" +#include "inline.h" #include "part.h" /** @@ -93,6 +94,7 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, struct cell *restrict cj) { const struct engine *e = r->e; + const struct gravity_props *props = e->gravity_properties; const int periodic = e->s->periodic; const struct multipole *multi_j = &cj->multipole->m_pole; // const float a_smooth = e->gravity_properties->a_smooth; @@ -114,7 +116,7 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, /* Let's interact at this level */ gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM, - cj->multipole->CoM, periodic * 0); + cj->multipole->CoM, props, periodic * 0); TIMER_TOC(timer_dopair_grav_mm); } @@ -156,12 +158,6 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { TIMER_TIC; -#ifdef SWIFT_DEBUG_CHECKS - if (ci->width[0] != cj->width[0]) - error("Non matching cell sizes !! h_i=%f h_j=%f", ci->width[0], - cj->width[0]); -#endif - /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; @@ -189,6 +185,8 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { } #endif + /* MATTHIEU: Should we use local DP accumulators ? */ + /* Loop over all particles in ci... */ for (int pid = 0; pid < gcount_i; pid++) { @@ -288,6 +286,8 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { } #endif + /* MATTHIEU: Should we use local DP accumulators ? */ + /* Loop over all particles in ci... */ for (int pid = 0; pid < gcount; pid++) { @@ -358,6 +358,11 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, int gettimer) { + /* Some constants */ + const struct engine *e = r->e; + const struct gravity_props *props = e->gravity_properties; + const double theta_crit_inv = props->theta_crit_inv; + #ifdef SWIFT_DEBUG_CHECKS const int gcount_i = ci->gcount; @@ -367,26 +372,8 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, if (gcount_i == 0 || gcount_j == 0) error("Doing pair gravity on an empty cell !"); - /* Bad stuff will happen if cell sizes are different */ - if (ci->width[0] != cj->width[0]) - error("Non matching cell sizes !! h_i=%f h_j=%f", ci->width[0], - cj->width[0]); - /* Sanity check */ - if (ci == cj) - error( - "The impossible has happened: pair interaction between a cell and " - "itself."); - - /* Are the cells direct neighbours? */ - if (!cell_are_neighbours(ci, cj)) - error( - "Non-neighbouring cells ! ci->x=[%f %f %f] ci->width=%f cj->loc=[%f %f " - "%f] " - "cj->width=%f", - ci->loc[0], ci->loc[1], ci->loc[2], ci->width[0], cj->loc[0], - cj->loc[1], cj->loc[2], cj->width[0]); - + if (ci == cj) error("Pair interaction between a cell and itself."); #endif #if ICHECK > 0 @@ -415,34 +402,70 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, TIMER_TIC; - /* Are both cells split ? */ - if (ci->split && cj->split) { + /* Can we use M-M interactions ? */ + if (gravity_multipole_accept(ci->multipole, cj->multipole, theta_crit_inv)) { + /* MATTHIEU: make a symmetric M-M interaction function ! */ + runner_dopair_grav_mm(r, ci, cj); + runner_dopair_grav_mm(r, cj, ci); + } + /* We have two leaves. Go P-P. */ + else if (!ci->split && !cj->split) { + runner_dopair_grav_pp(r, ci, cj); + } + /* Alright, we'll have to split and recurse. */ + else { + + const double ri_max = ci->multipole->r_max; + const double rj_max = cj->multipole->r_max; - for (int j = 0; j < 8; j++) { - if (ci->progeny[j] != NULL) { + /* Split the larger of the two cells and start over again */ + if (ri_max > rj_max) { + + /* Can we actually split that interaction ? */ + if (ci->split) { + /* Loop over ci's children */ for (int k = 0; k < 8; k++) { - if (cj->progeny[k] != NULL) { + if (ci->progeny[k] != NULL) + runner_dopair_grav(r, ci->progeny[k], cj, 0); + } - if (cell_are_neighbours(ci->progeny[j], cj->progeny[k])) { + } else if (cj->split) { + /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ - /* Recurse */ - runner_dopair_grav(r, ci->progeny[j], cj->progeny[k], 0); + /* Loop over cj's children */ + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + runner_dopair_grav(r, ci, cj->progeny[k], 0); + } - } else { + } else { + error("Fundamental error in the logic"); + } + } else { - /* Ok, here we can go for multipole-multipole interactions */ - runner_dopair_grav_mm(r, ci->progeny[j], cj->progeny[k]); - runner_dopair_grav_mm(r, cj->progeny[k], ci->progeny[j]); - } - } + /* Can we actually split that interaction ? */ + if (cj->split) { + + /* Loop over cj's children */ + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + runner_dopair_grav(r, ci, cj->progeny[k], 0); + } + + } else if (ci->split) { + /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + runner_dopair_grav(r, ci->progeny[k], cj, 0); } + + } else { + error("Fundamental error in the logic"); } } - } else { /* Not split */ - - /* Compute the interactions at this level directly. */ - runner_dopair_grav_pp(r, ci, cj); } if (gettimer) TIMER_TOC(timer_dosub_pair_grav); @@ -460,7 +483,6 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, void runner_doself_grav(struct runner *r, struct cell *c, int gettimer) { #ifdef SWIFT_DEBUG_CHECKS - /* Early abort? */ if (c->gcount == 0) error("Doing self gravity on an empty cell !"); #endif @@ -514,6 +536,14 @@ void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj, } } +/** + * @brief Performs all M-M interactions between a given top-level cell and all + * the other top-levels that are far enough. + * + * @param r The thread #runner. + * @param ci The #cell of interest. + * @param timer Are we timing this ? + */ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { #if ICHECK > 0 @@ -529,39 +559,40 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { } #endif + /* Some constants */ + const struct engine *e = r->e; + const struct gravity_props *props = e->gravity_properties; + const double theta_crit_inv = props->theta_crit_inv; + TIMER_TIC; /* Recover the list of top-level cells */ - const struct engine *e = r->e; struct cell *cells = e->s->cells_top; const int nr_cells = e->s->nr_cells; - /* const double max_d = */ - /* const_gravity_a_smooth * const_gravity_r_cut * ci->width[0]; */ - /* const double max_d2 = max_d * max_d; */ - // const double pos_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; /* Anything to do here? */ - if (!cell_is_active(ci, e)) return; + if (!cell_is_active(ci, e)) return; // MATTHIEU (should never happen) - /* Drift our own multipole if need be */ - if (ci->ti_old_multipole != e->ti_current) cell_drift_multipole(ci, e); + /* Check multipole has been drifted */ + if (ci->ti_old_multipole != e->ti_current) + error("Interacting un-drifted multipole"); - /* Loop over all the cells and go for a p-m interaction if far enough but not - * too far */ + /* Loop over all the top-level cells and go for a M-M interaction if + * well-separated */ for (int i = 0; i < nr_cells; ++i) { + /* Handle on the top-level cell */ struct cell *cj = &cells[i]; - if (ci == cj) continue; - if (cj->gcount == 0) continue; + /* Avoid stupid cases */ + if (ci == cj || cj->gcount == 0) continue; - /* const double dx[3] = {cj->loc[0] - pos_i[0], // x */ - /* cj->loc[1] - pos_i[1], // y */ - /* cj->loc[2] - pos_i[2]}; // z */ - /* const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; */ - /* if (r2 > max_d2) continue; */ - - if (!cell_are_neighbours(ci, cj)) runner_dopair_grav_mm(r, ci, cj); + /* Check the multipole acceptance criterion */ + if (gravity_multipole_accept(ci->multipole, cj->multipole, + theta_crit_inv)) { + /* Go for a (non-symmetric) M-M calculation */ + runner_dopair_grav_mm(r, ci, cj); + } } if (timer) TIMER_TOC(timer_dograv_long_range); diff --git a/src/scheduler.c b/src/scheduler.c index 4d9ab07f6ecac25d0756b20ef761f7d01e2efa92..298dcc2b3d3cb36457d5b76853fbbea12392beed 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -163,10 +163,10 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { } /* Is this cell even split? */ - if (ci->split && ci->h_max * kernel_gamma * space_stretch < hi / 2) { + if (ci->split && 2.f * kernel_gamma * ci->h_max * space_stretch < hi) { /* Make a sub? */ - if (scheduler_dosub && + if (scheduler_dosub && /* Note division here to avoid overflow */ ((ci->count > 0 && ci->count < space_subsize / ci->count) || (ci->gcount > 0 && ci->gcount < space_subsize / ci->gcount))) { @@ -240,8 +240,8 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Should this task be split-up? */ if (ci->split && cj->split && - ci->h_max * kernel_gamma * space_stretch < hi / 2 && - cj->h_max * kernel_gamma * space_stretch < hj / 2) { + 2.f * kernel_gamma * space_stretch * ci->h_max < hi && + 2.f * kernel_gamma * space_stretch * cj->h_max < hj) { /* Replace by a single sub-task? */ if (scheduler_dosub && @@ -773,18 +773,34 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, void scheduler_set_unlocks(struct scheduler *s) { /* Store the counts for each task. */ - int *counts; - if ((counts = (int *)malloc(sizeof(int) * s->nr_tasks)) == NULL) + short int *counts; + if ((counts = (short int *)malloc(sizeof(short int) * s->nr_tasks)) == NULL) error("Failed to allocate temporary counts array."); - bzero(counts, sizeof(int) * s->nr_tasks); - for (int k = 0; k < s->nr_unlocks; k++) counts[s->unlock_ind[k]] += 1; + bzero(counts, sizeof(short int) * s->nr_tasks); + for (int k = 0; k < s->nr_unlocks; k++) { + counts[s->unlock_ind[k]] += 1; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we are not overflowing */ + if (counts[s->unlock_ind[k]] < 0) + error("Task unlocking more than %d other tasks!", + (1 << (sizeof(short int) - 1)) - 1); +#endif + } /* Compute the offset for each unlock block. */ int *offsets; if ((offsets = (int *)malloc(sizeof(int) * (s->nr_tasks + 1))) == NULL) error("Failed to allocate temporary offsets array."); offsets[0] = 0; - for (int k = 0; k < s->nr_tasks; k++) offsets[k + 1] = offsets[k] + counts[k]; + for (int k = 0; k < s->nr_tasks; k++) { + offsets[k + 1] = offsets[k] + counts[k]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we are not overflowing */ + if (offsets[k + 1] < 0) error("Task unlock offset array overflowing"); +#endif + } /* Create and fill a temporary array with the sorted unlocks. */ struct task **unlocks; @@ -1054,10 +1070,17 @@ void scheduler_rewait_mapper(void *map_data, int num_elements, /* Increment the task's own wait counter for the enqueueing. */ atomic_inc(&t->wait); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we don't have more waits that what can be stored. */ + if (t->wait < 0) + error("Task unlocked by more than %d tasks!", + (1 << (8 * sizeof(t->wait) - 1)) - 1); + /* Skip sort tasks that have already been performed */ if (t->type == task_type_sort && t->flags == 0) { error("Empty sort task encountered."); } +#endif /* Sets the waits of the dependances */ for (int k = 0; k < t->nr_unlock_tasks; k++) { diff --git a/src/space.c b/src/space.c index a8bb460b956baa0a7d3864c1c95aeb3db613bc4c..e907949f305b507f681c33beaedbeb90fc966475 100644 --- a/src/space.c +++ b/src/space.c @@ -2084,16 +2084,39 @@ void space_split_recursive(struct space *s, struct cell *c, /* Now shift progeny multipoles and add them up */ struct multipole temp; + double r_max = 0.; for (int k = 0; k < 8; ++k) { if (c->progeny[k] != NULL) { const struct cell *cp = c->progeny[k]; const struct multipole *m = &cp->multipole->m_pole; + + /* Contribution to multipole */ gravity_M2M(&temp, m, c->multipole->CoM, cp->multipole->CoM, s->periodic); gravity_multipole_add(&c->multipole->m_pole, &temp); + + /* Upper limit of max CoM<->gpart distance */ + const double dx = c->multipole->CoM[0] - cp->multipole->CoM[0]; + const double dy = c->multipole->CoM[1] - cp->multipole->CoM[1]; + const double dz = c->multipole->CoM[2] - cp->multipole->CoM[2]; + const double r2 = dx * dx + dy * dy + dz * dz; + r_max = max(r_max, cp->multipole->r_max + sqrt(r2)); } } - } + /* Alternative upper limit of max CoM<->gpart distance */ + const double dx = c->multipole->CoM[0] > c->loc[0] + c->width[0] / 2. + ? c->multipole->CoM[0] - c->loc[0] + : c->loc[0] + c->width[0] - c->multipole->CoM[0]; + const double dy = c->multipole->CoM[1] > c->loc[1] + c->width[1] / 2. + ? c->multipole->CoM[1] - c->loc[1] + : c->loc[1] + c->width[1] - c->multipole->CoM[1]; + const double dz = c->multipole->CoM[2] > c->loc[2] + c->width[2] / 2. + ? c->multipole->CoM[2] - c->loc[2] + : c->loc[2] + c->width[2] - c->multipole->CoM[2]; + + /* Take minimum of both limits */ + c->multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); + } /* Deal with gravity */ } /* Otherwise, collect the data for this cell. */ @@ -2151,7 +2174,27 @@ void space_split_recursive(struct space *s, struct cell *c, ti_beg_max = get_integer_time_begin(e->ti_current + 1, time_bin_max); /* Construct the multipole and the centre of mass*/ - if (s->gravity) gravity_P2M(c->multipole, c->gparts, c->gcount); + if (s->gravity) { + if (gcount > 0) { + gravity_P2M(c->multipole, c->gparts, c->gcount); + const double dx = c->multipole->CoM[0] > c->loc[0] + c->width[0] / 2. + ? c->multipole->CoM[0] - c->loc[0] + : c->loc[0] + c->width[0] - c->multipole->CoM[0]; + const double dy = c->multipole->CoM[1] > c->loc[1] + c->width[1] / 2. + ? c->multipole->CoM[1] - c->loc[1] + : c->loc[1] + c->width[1] - c->multipole->CoM[1]; + const double dz = c->multipole->CoM[2] > c->loc[2] + c->width[2] / 2. + ? c->multipole->CoM[2] - c->loc[2] + : c->loc[2] + c->width[2] - c->multipole->CoM[2]; + c->multipole->r_max = sqrt(dx * dx + dy * dy + dz * dz); + } else { + gravity_multipole_init(&c->multipole->m_pole); + c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.; + c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.; + c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.; + c->multipole->r_max = 0.; + } + } } /* Set the values for this cell. */ @@ -2820,11 +2863,26 @@ void space_link_cleanup(struct space *s) { * * @param s The #space to check. * @param ti_drift The (integer) time. + * @param multipole Are we also checking the multipoles ? */ -void space_check_drift_point(struct space *s, integertime_t ti_drift) { +void space_check_drift_point(struct space *s, integertime_t ti_drift, + int multipole) { #ifdef SWIFT_DEBUG_CHECKS /* Recursively check all cells */ - space_map_cells_pre(s, 1, cell_check_drift_point, &ti_drift); + space_map_cells_pre(s, 1, cell_check_particle_drift_point, &ti_drift); + if (multipole) + space_map_cells_pre(s, 1, cell_check_multipole_drift_point, &ti_drift); +#else + error("Calling debugging code without debugging flag activated."); +#endif +} + +void space_check_top_multipoles_drift_point(struct space *s, + integertime_t ti_drift) { +#ifdef SWIFT_DEBUG_CHECKS + for (int i = 0; i < s->nr_cells; ++i) { + cell_check_multipole_drift_point(&s->cells_top[i], &ti_drift); + } #else error("Calling debugging code without debugging flag activated."); #endif diff --git a/src/space.h b/src/space.h index 974e0676b49eb543d200e6d28c2c46e99e63c6a5..c5f588563e5a9fb4b6cb73ac1446514f8149794f 100644 --- a/src/space.h +++ b/src/space.h @@ -213,7 +213,10 @@ void space_init_parts(struct space *s); void space_init_gparts(struct space *s); void space_init_sparts(struct space *s); void space_link_cleanup(struct space *s); -void space_check_drift_point(struct space *s, integertime_t ti_drift); +void space_check_drift_point(struct space *s, integertime_t ti_drift, + int multipole); +void space_check_top_multipoles_drift_point(struct space *s, + integertime_t ti_drift); void space_check_timesteps(struct space *s); void space_replicate(struct space *s, int replicate, int verbose); void space_reset_task_counters(struct space *s); diff --git a/src/timers.c b/src/timers.c index 9b6b32251d8efd2890ed3855b5e9b87c15978784..9ab1ae09c740028bee6eb44ebfd0466bb3ba9406 100644 --- a/src/timers.c +++ b/src/timers.c @@ -83,12 +83,9 @@ char *timers_names[timer_count] = { * * To reset all timers, use the mask #timers_mask_all. */ - void timers_reset(unsigned long long mask) { - int k; - /* Loop over the timers and set the masked ones to zero. */ - for (k = 0; k < timer_count; k++) + for (int k = 0; k < timer_count; k++) if (mask & (1ull << k)) timers[k] = 0; } diff --git a/src/timers.h b/src/timers.h index 9678221d43987456b72df5dfbd2759547cd12db7..d4bfedaa0a11a02b8244ab78645a9502226ef6da 100644 --- a/src/timers.h +++ b/src/timers.h @@ -27,9 +27,12 @@ #include "cycle.h" #include "inline.h" -/* The timers themselves. - If you modify this list, be sure to change timers_names in timers.c as - well! */ +/** + * @brief The timers themselves. + * + * If you modify this list, be sure to change timers_names in timers.c as + * well! + **/ enum { timer_none = 0, timer_prepare, diff --git a/src/vector_power.h b/src/vector_power.h index 4f93ece3c2c82281faeec37d45f5166034b4a946..f2fd4381751706f6fca1e24daa131d8fd8a0eeba 100644 --- a/src/vector_power.h +++ b/src/vector_power.h @@ -307,7 +307,7 @@ __attribute__((always_inline)) INLINE static double X_310(const double v[3]) { */ __attribute__((always_inline)) INLINE static double X_301(const double v[3]) { - return 0.1666666666666667 * v[0] * v[0] * v[0] * v[1]; + return 0.1666666666666667 * v[0] * v[0] * v[0] * v[2]; } /** diff --git a/theory/Multipoles/bibliography.bib b/theory/Multipoles/bibliography.bib new file mode 100644 index 0000000000000000000000000000000000000000..abdb5855f52aefd9b4562851946d16bd402db68a --- /dev/null +++ b/theory/Multipoles/bibliography.bib @@ -0,0 +1,27 @@ +@ARTICLE{Dehnen2014, + author = {{Dehnen}, W.}, + title = "{A fast multipole method for stellar dynamics}", + journal = {Computational Astrophysics and Cosmology}, +archivePrefix = "arXiv", + eprint = {1405.2255}, + primaryClass = "astro-ph.IM", + year = 2014, + month = sep, + volume = 1, + eid = {1}, + pages = {1}, + doi = {10.1186/s40668-014-0001-7}, + adsurl = {http://adsabs.harvard.edu/abs/2014ComAC...1....1D}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@article{Hardy2006, + title={Combinatorics of partial derivatives}, + author={Hardy, Michael}, + journal={Electron. J. Combin}, + volume={13}, + number={1}, + pages={13}, + year={2006} + } + diff --git a/theory/Multipoles/fmm_standalone.tex b/theory/Multipoles/fmm_standalone.tex new file mode 100644 index 0000000000000000000000000000000000000000..6948708622dbf0cfc05daa9d63917f7a3017bfc2 --- /dev/null +++ b/theory/Multipoles/fmm_standalone.tex @@ -0,0 +1,22 @@ +\documentclass[fleqn, usenatbib, useAMS,a4paper]{mnras} +\usepackage{graphicx} +\usepackage{amsmath,paralist,xcolor,xspace,amssymb} +\usepackage{times} + +\newcommand{\swift}{{\sc Swift}\xspace} + +%opening +\title{FMM in SWIFT} +\author{Matthieu Schaller} + +\begin{document} + +\maketitle + +\input{gravity_derivatives} + +\bibliographystyle{mnras} +\bibliography{./bibliography.bib} + + +\end{document} diff --git a/theory/Multipoles/gravity_derivatives.tex b/theory/Multipoles/gravity_derivatives.tex new file mode 100644 index 0000000000000000000000000000000000000000..913bdf752c666aac5913c6e6452bdb06f3fcdc86 --- /dev/null +++ b/theory/Multipoles/gravity_derivatives.tex @@ -0,0 +1,54 @@ +We use the multi-index notation of \cite{Dehnen2014} to simplify expressions. + +\subsection{Derivatives of the gravitational potential} + +The calculation of all the +$D_\mathbf{n}(x,y,z) \equiv \nabla^{\mathbf{n}}\phi(x,y,z)$ terms up +to the relevent order can be quite tedious and it is beneficial to +automatize the whole setup. Ideally, one would like to have an +expression for each of this term that is only made of multiplications +and additions of each of the coordinates and the inverse distance. We +achieve this by writing $\phi$ as a composition of functions +$\phi(u(x,y,z))$ and apply the \textit{Fa\`a di Bruno} +formula \citep[i.e. the ``chain rule'' for higher order derivatives, +e.g.][]{Hardy2006} to construct our terms: + +\begin{equation} +\label{eq:faa_di_bruno} +\frac{\partial^n}{\partial x_1 \cdots \partial x_n} \phi(u) += \sum_{A} \phi^{(|A|)}(u) \prod_{B \in +A} \frac{\partial^{|B|}}{\prod_{c\in B}\partial x_c} u(x,y,z), +\end{equation} +where $A$ is the set of all partitions of $\lbrace1,\cdots, n\rbrace$, +$B$ is a block of a partition $A$ and $|\cdot|$ denotes the +cardinality of a set. For generic functions $\phi$ and $u$ this +formula yields an untracktable number of terms; an 8th-order +derivative will have $4140$ (!) terms in the sum\footnote{The number +of terms in the sum is given by the Bell number of the same order}. \\ +We choose to write +\begin{align} + \phi(x,y,z) &= 1 / \sqrt{u(x,y,z)}, \\ + u(x,y,z) &= x^2 + y^2 + z^2. +\end{align} +This choice allows to have derivatives of any order of $\phi(u)$ that +only depend on powers of $u$: + +\begin{equation} +f^{(n)}(u) = \frac{\Gamma(\frac{1}{2})}{\Gamma(\frac{1}{2} - +n)}\frac{1}{u^{n+\frac{1}{2}}}. +\end{equation} +More importantly, this choice of decomposition allows us to have +derivatives of $u$ only up to second order in $x$, $y$ or $z$. The +number of non-zero terms in eq. \ref{eq:faa_di_bruno} is hence +drastically reduced. For instance, when computing +$D_{(4,1,3)} \equiv \frac{\partial^8}{\partial x^4 \partial y \partial +z^3} \phi$, $4100$ of the $4140$ terms will involve at least one +zero-valued derivative (e.g. $\partial^3/\partial x^3$ or +$\partial^2/\partial x\partial y$) of $u$. Furthermore, among the 40 +remaining terms, many will involve the same derivatives and can be +grouped together, leaving us with a sum of six products of $x$,$y$ and +$z$. This is generally the case for most of the $D_\mathbf{n}$'s and +figuring out which terms are identical in a given set of partitions of +$\lbrace1,\cdots, n\rbrace$ is an interesting exercise in +combinatorics left for the reader \citep[see also][]{Hardy2006}. + diff --git a/theory/Multipoles/run.sh b/theory/Multipoles/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..34304a37dc1cb1fad3a7442544a8df793ea94d35 --- /dev/null +++ b/theory/Multipoles/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash +python kernels.py +pdflatex -jobname=fmm fmm_standalone.tex +bibtex fmm.aux +pdflatex -jobname=fmm fmm_standalone.tex +pdflatex -jobname=fmm fmm_standalone.tex