diff --git a/examples/SedovBlast/makeIC.py b/examples/SedovBlast/makeIC.py index 5440df7b8f2b1d3d8b1ec748227c3cf2b8bb779a..1dd440dd9953d638f1fe0945326da8f4442cdf56 100644 --- a/examples/SedovBlast/makeIC.py +++ b/examples/SedovBlast/makeIC.py @@ -67,7 +67,7 @@ for i in range(L): v[index,1] = 0. v[index,2] = 0. m[index] = mass - h[index] = 2.251 * boxSize / L + h[index] = 2.251 / 2 * boxSize / L u[index] = internalEnergy ids[index] = index if sqrt((x - boxSize/2.)**2 + (y - boxSize/2.)**2 + (z - boxSize/2.)**2) < 2.01 * boxSize/L: diff --git a/examples/SedovBlast/rdf.py b/examples/SedovBlast/rdf.py new file mode 100644 index 0000000000000000000000000000000000000000..a8474939e2732f1b5291fc088f4a19f51d52ce5c --- /dev/null +++ b/examples/SedovBlast/rdf.py @@ -0,0 +1,120 @@ +############################################################################### + # This file is part of SWIFT. + # Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + # Matthieu Schaller (matthieu.schaller@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + +import h5py +import random +import sys +import math +from numpy import * + +# Reads the HDF5 output of SWIFT and generates a radial density profile +# of the different physical values. + +# Input values? +if len(sys.argv) < 3 : + print "Useage: " , sys.argv[0] , " <filename> <nr. bins>" + exit() + +# Get the input arguments +fileName = sys.argv[1]; +nr_bins = int( sys.argv[2] ); + + +# Open the file +fileName = sys.argv[1]; +file = h5py.File( fileName , 'r' ) + +# Get the space dimensions. +grp = file[ "/Header" ] +boxsize = grp.attrs[ 'BoxSize' ] +boxsize = boxsize[0] + +# Get the particle data +grp = file.get( '/PartType0' ) +ds = grp.get( 'Coordinates' ) +coords = ds[...] +ds = grp.get( 'Velocities' ) +v = ds[...] +ds = grp.get( 'Masses' ) +m = ds[...] +ds = grp.get( 'SmoothingLength' ) +h = ds[...] +ds = grp.get( 'InternalEnergy' ) +u = ds[...] +ds = grp.get( 'ParticleIDs' ) +ids = ds[...] +ds = grp.get( 'Density' ) +rho = ds[...] + +# Set the origin to be the middle of the box +origin = [ boxsize / 2 , boxsize / 2 , boxsize / 2 ] + +# Get the maximum radius +r_max = boxsize / 2 + +# Init the bins +nr_parts = coords.shape[0] +bins_v = zeros( nr_bins ) +bins_m = zeros( nr_bins ) +bins_h = zeros( nr_bins ) +bins_u = zeros( nr_bins ) +bins_rho = zeros( nr_bins ) +bins_count = zeros( nr_bins ) +bins_P = zeros( nr_bins ) + +# Loop over the particles and fill the bins. +for i in range( nr_parts ): + + # Get the box index. + r = coords[i] - origin + r = sqrt( r[0]*r[0] + r[1]*r[1] + r[2]*r[2] ) + ind = floor( r / r_max * nr_bins ) + + # Update the bins + if ( ind < nr_bins ): + bins_count[ind] += 1 + bins_v[ind] += sqrt( v[i,0]*v[i,0] + v[i,1]*v[i,1] + v[i,2]*v[i,2] ) + bins_m[ind] += m[i] + bins_h[ind] += h[i] + bins_u[ind] += u[i] + bins_rho[ind] += rho[i] + bins_P[ind] += (2.0/3)*u[i]*rho[i] + +# Loop over the bins and dump them +print "# bucket left right count v m h u rho" +for i in range( nr_bins ): + + # Normalize by the bin volume. + r_left = r_max * i / nr_bins + r_right = r_max * (i+1) / nr_bins + vol = 4/3*math.pi*(r_right*r_right*r_right - r_left*r_left*r_left) + ivol = 1.0 / vol + + print "%i %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e" % \ + ( i , r_left , r_right , \ + bins_count[i] * ivol , \ + bins_v[i] / bins_count[i] , \ + bins_m[i] * ivol , \ + bins_h[i] / bins_count[i] , \ + bins_u[i] / bins_count[i] , \ + bins_rho[i] / bins_count[i] , + bins_P[i] / bins_count[i] ) + + diff --git a/examples/SedovBlast/solution.py b/examples/SedovBlast/solution.py index cd26aa4d408de58ac71c570cd131a017f13ec792..1b2dcf5957bc6684ebaa88d4ea08bee68e56cd6f 100644 --- a/examples/SedovBlast/solution.py +++ b/examples/SedovBlast/solution.py @@ -33,7 +33,7 @@ P_0 = 1.e-5 # Background Pressure E_0 = 1.e2 # Energy of the explosion gamma = 5./3. # Gas polytropic index -t = 0.15 # Time of the solution +t = 0.2 # Time of the solution N = 1000 # Number of radial points R_max = 3. # Maximal radius diff --git a/examples/SodShock/rhox.py b/examples/SodShock/rhox.py new file mode 100644 index 0000000000000000000000000000000000000000..36771c5c0d9dcdec99403249ea8a7bb1a4bc58c6 --- /dev/null +++ b/examples/SodShock/rhox.py @@ -0,0 +1,115 @@ +############################################################################### + # This file is part of SWIFT. + # Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + # Matthieu Schaller (matthieu.schaller@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + +import h5py +import random +import sys +import math +from numpy import * + +# Reads the HDF5 output of SWIFT and generates a radial density profile +# of the different physical values. + +# Input values? +if len(sys.argv) < 3 : + print "Useage: " , sys.argv[0] , " <filename> <nr. bins>" + exit() + +# Get the input arguments +fileName = sys.argv[1]; +nr_bins = int( sys.argv[2] ); + + +# Open the file +fileName = sys.argv[1]; +file = h5py.File( fileName , 'r' ) + +# Get the space dimensions. +grp = file[ "/Header" ] +boxsize = grp.attrs[ 'BoxSize' ] +boxsize = boxsize[0] + +# Get the particle data +grp = file.get( '/PartType0' ) +ds = grp.get( 'Coordinates' ) +coords = ds[...] +ds = grp.get( 'Velocities' ) +v = ds[...] +# ds = grp.get( 'Mass' ) +# m = ds[...] +ds = grp.get( 'SmoothingLength' ) +h = ds[...] +ds = grp.get( 'InternalEnergy' ) +u = ds[...] +ds = grp.get( 'ParticleIDs' ) +ids = ds[...] +ds = grp.get( 'Density' ) +rho = ds[...] + +# Get the maximum radius +r_max = boxsize + +# Init the bins +nr_parts = coords.shape[0] +bins_v = zeros( nr_bins ) +bins_m = zeros( nr_bins ) +bins_h = zeros( nr_bins ) +bins_u = zeros( nr_bins ) +bins_rho = zeros( nr_bins ) +bins_count = zeros( nr_bins ) +bins_P = zeros( nr_bins ) + +# Loop over the particles and fill the bins. +for i in range( nr_parts ): + + # Get the box index. + r = coords[i,0] + ind = floor( r / r_max * nr_bins ) + + # Update the bins + bins_count[ind] += 1 + bins_v[ind] += v[i,0] # sqrt( v[i,0]*v[i,0] + v[i,1]*v[i,1] + v[i,2]*v[i,2] ) + # bins_m[ind] += m[i] + bins_h[ind] += h[i] + bins_u[ind] += u[i] + bins_rho[ind] += rho[i] + bins_P[ind] += (2.0/3)*u[i]*rho[i] + +# Loop over the bins and dump them +print "# bucket left right count v m h u rho" +for i in range( nr_bins ): + + # Normalize by the bin volume. + r_left = r_max * i / nr_bins + r_right = r_max * (i+1) / nr_bins + vol = 4/3*math.pi*(r_right*r_right*r_right - r_left*r_left*r_left) + ivol = 1.0 / vol + + print "%i %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e" % \ + ( i , r_left , r_right , \ + bins_count[i] * ivol , \ + bins_v[i] / bins_count[i] , \ + bins_m[i] * ivol , \ + bins_h[i] / bins_count[i] , \ + bins_u[i] / bins_count[i] , \ + bins_rho[i] / bins_count[i] , + bins_P[i] / bins_count[i] ) + + diff --git a/examples/test.c b/examples/test.c index bf9b05da642e54c536823236b8316c74842761fd..7155f233aaa4bde4ea1e5624bfb731e51d478ad3 100644 --- a/examples/test.c +++ b/examples/test.c @@ -646,7 +646,7 @@ void kernel_dump ( int N ) { // float dw_dx4[4] __attribute__ ((aligned (16))); for ( k = 0 ; k <= N ; k++ ) { - x = ((float)k) / N * kernel_igamma; + x = ((float)k) / N; x4[3] = x4[2]; x4[2] = x4[1]; x4[1] = x4[0]; x4[0] = x; kernel_deval( x , &w , &dw_dx ); // kernel_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 ); @@ -790,13 +790,13 @@ int main ( int argc , char *argv[] ) { /* Read particles and space information from (GADGET) IC */ tic = getticks(); - read_ic(ICfileName, dim, &parts, &N, &periodic); + read_ic( ICfileName , dim , &parts , &N , &periodic ); printf( "main: reading particle properties took %.3f ms.\n" , ((double)(getticks() - tic)) / CPU_TPS * 1000 ); fflush(stdout); /* Apply h scaling */ if(scaling != 1.0) for ( k = 0 ; k < N ; k++ ) - parts[k].h *= scaling; + parts[k].h *= scaling; /* Apply shift */ if(shift[0] !=0 || shift[1] !=0 || shift[2] !=0 ) diff --git a/examples/test_single.c b/examples/test_single.c index 895fd4cf31cfde621f9bc688ae7c91f808649918..3d1657e1fe481992d5cf9a6b24bfdda383bf4d85 100644 --- a/examples/test_single.c +++ b/examples/test_single.c @@ -64,15 +64,15 @@ int main ( int argc , char *argv[] ) { int k, N = 100; struct part p1, p2; - float r2, dx[3] = { 0.0f , 0.0f , 0.0f }; + float x, w, dwdx, r2, dx[3] = { 0.0f , 0.0f , 0.0f }; /* Init the particles. */ for ( k = 0 ; k < 3 ; k++ ) { p1.a[k] = 0.0f; p1.v[k] = 0.0f; p1.x[k] = 0.0; p2.a[k] = 0.0f; p2.v[k] = 0.0f; p2.x[k] = 0.0; } - p1.rho = 1.0f; p1.mass = 9.7059e-4; p1.h = 0.222871287; - p2.rho = 1.0f; p2.mass = 9.7059e-4; p2.h = 0.222871287; + p1.rho = 1.0f; p1.mass = 9.7059e-4; p1.h = 0.222871287 / 2; + p2.rho = 1.0f; p2.mass = 9.7059e-4; p2.h = 0.222871287 / 2; p1.force.c = 0.0f; p1.force.balsara = 0.0f; p2.force.c = 0.0f; p2.force.balsara = 0.0f; p1.u = 1.e-5 / ((const_gamma - 1.)*p1.rho); @@ -87,19 +87,32 @@ int main ( int argc , char *argv[] ) { for ( k = 1 ; k <= N ; k++ ) { /* Set the distance/radius. */ - dx[0] = -((float)k)/N * fmaxf( p1.h , p2.h ); + dx[0] = -((float)k)/N * fmaxf( p1.h , p2.h ) * kernel_gamma; r2 = dx[0]*dx[0]; /* Clear the particle fields. */ - p1.a[0] = 0.0f; p1.force.u_dt = 0.0f; - p2.a[0] = 0.0f; p2.force.u_dt = 0.0f; + /* p1.a[0] = 0.0f; p1.force.u_dt = 0.0f; + p2.a[0] = 0.0f; p2.force.u_dt = 0.0f; */ /* Interact the particles. */ - runner_iact_force( r2 , dx , p1.h , p2.h , &p1 , &p2 ); + // runner_iact_force( r2 , dx , p1.h , p2.h , &p1 , &p2 ); + + /* Clear the particle fields. */ + p1.rho = 0.0f; p1.density.wcount = 0.0f; + p2.rho = 0.0f; p2.density.wcount = 0.0f; + + /* Interact the particles. */ + runner_iact_density( r2 , dx , p1.h , p2.h , &p1 , &p2 ); + + /* Evaluate just the kernel. */ + x = fabsf( dx[0] ) / p1.h; + kernel_deval( x , &w , &dwdx ); /* Output the results. */ - printf( "%.3e %.3e %.3e %.3e %.3e\n" , - -dx[0] , p1.a[0] , p1.force.u_dt , p2.a[0] , p2.force.u_dt ); + printf( "%.3e %.3e %.3e %.3e %.3e %.3e %.3e\n" , + // -dx[0] , p1.a[0] , p1.force.u_dt , p2.a[0] , p2.force.u_dt , + -dx[0] , p1.rho , p1.density.wcount , p2.rho , p2.density.wcount , + w , dwdx ); } /* loop over radii. */ diff --git a/src/const.h b/src/const.h index e2d7409006e96a26c064cbe5a761e68de773417e..98476d129fbfb9be7c1b48e041d38fa23010e417 100644 --- a/src/const.h +++ b/src/const.h @@ -21,7 +21,7 @@ /* Hydrodynamical constants. */ #define const_gamma (5.0f/3.0f) -#define const_viscosity_alpha 0.8 +#define const_viscosity_alpha 0.8f /* Time integration constants. */ #define const_cfl 0.3f diff --git a/src/engine.c b/src/engine.c index 686e14f1fd7fcef844d81936baf0b02d9839b174..451cb558c3242d5cce99ff9ccd2bc3c9c0786049 100644 --- a/src/engine.c +++ b/src/engine.c @@ -229,7 +229,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { (x[1] - x_old[1])*(x[1] - x_old[1]) + (x[2] - x_old[2])*(x[2] - x_old[2]) ); dx_max = fmaxf( dx_max , dx ); - h2dx_max = fmaxf( h2dx_max , dx*2 + h ); + h2dx_max = fmaxf( h2dx_max , dx*2 + h*kernel_gamma ); /* Update positions and energies at the half-step. */ p->v[0] = v[0] += dt * a[0]; @@ -362,12 +362,63 @@ void engine_collect_kick2 ( struct cell *c ) { * @brief Compute the force on a single particle brute-force. */ -void engine_single ( double *dim , long long int pid , struct part *__restrict__ parts , int N , int periodic ) { +void engine_single_density ( double *dim , long long int pid , struct part *__restrict__ parts , int N , int periodic ) { int i, k; double r2, dx[3]; - float fdx[3], ihg; - struct part p, p2; + float fdx[3], ih; + struct part p; + + /* Find "our" part. */ + for ( k = 0 ; k < N && parts[k].id != pid ; k++ ); + if ( k == N ) + error( "Part not found." ); + p = parts[k]; + + /* Clear accumulators. */ + ih = 1.0f / p.h; + p.rho = 0.0f; p.rho_dh = 0.0f; + p.density.wcount = 0.0f; p.density.wcount_dh = 0.0f; + p.density.div_v = 0.0; + for ( k=0 ; k < 3 ; k++) + p.density.curl_v[k] = 0.0; + + /* Loop over all particle pairs (force). */ + for ( k = 0 ; k < N ; k++ ) { + if ( parts[k].id == p.id ) + continue; + for ( i = 0 ; i < 3 ; i++ ) { + dx[i] = p.x[i] - parts[k].x[i]; + if ( periodic ) { + if ( dx[i] < -dim[i]/2 ) + dx[i] += dim[i]; + else if ( dx[i] > dim[i]/2 ) + dx[i] -= dim[i]; + } + fdx[i] = dx[i]; + } + r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2]; + if ( r2 < p.h*p.h*kernel_gamma2 ) { + runner_iact_nonsym_density( r2 , fdx , p.h , parts[k].h , &p , &parts[k] ); + } + } + + /* Dump the result. */ + p.rho = ih * ih * ih * ( p.rho + p.mass*kernel_root ); + p.rho_dh = p.rho_dh * ih * ih * ih * ih; + p.density.wcount = ( p.density.wcount + kernel_root ) * ( 4.0f / 3.0 * M_PI * kernel_gamma3 ); + printf( "pairs_single: part %lli (h=%e) has wcount=%e, rho=%e, rho_dh=%e.\n" , p.id , p.h , p.density.wcount , p.rho , p.rho_dh ); + fflush(stdout); + + } + + +void engine_single_force ( double *dim , long long int pid , struct part *__restrict__ parts , int N , int periodic ) { + + int i, k; + double r2, dx[3]; + float fdx[3]; + struct part p; /* Find "our" part. */ for ( k = 0 ; k < N && parts[k].id != pid ; k++ ); @@ -376,12 +427,12 @@ void engine_single ( double *dim , long long int pid , struct part *__restrict__ p = parts[k]; /* Clear accumulators. */ - ihg = kernel_igamma / p.h; p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f; p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f; /* Loop over all particle pairs (force). */ - for ( k = 0 ; k < N ; k++ ) { + // for ( k = 0 ; k < N ; k++ ) { + for ( k = N-1 ; k >= 0 ; k-- ) { if ( parts[k].id == p.id ) continue; for ( i = 0 ; i < 3 ; i++ ) { @@ -395,21 +446,19 @@ void engine_single ( double *dim , long long int pid , struct part *__restrict__ fdx[i] = dx[i]; } r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2]; - if ( r2 < p.h*p.h || r2 < parts[k].h*parts[k].h ) { + if ( r2 < p.h*p.h*kernel_gamma2 || r2 < parts[k].h*parts[k].h*kernel_gamma2 ) { p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f; p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f; - p2 = parts[k]; - runner_iact_nonsym_force( r2 , fdx , p.h , p2.h , &p , &p2 ); - printf( "pairs_simple: interacting particles %lli and %lli (rho=%.3e,r=%e): a=[%.3e,%.3e,%.3e], u_dt=%.3e, h_dt=%.3e, v_sig=%.3e.\n" , + runner_iact_nonsym_force( r2 , fdx , p.h , parts[k].h , &p , &parts[k] ); + /* printf( "pairs_simple: interacting particles %lli and %lli (rho=%.3e,r=%e): a=[%.3e,%.3e,%.3e], u_dt=%.3e, h_dt=%.3e, v_sig=%.3e.\n" , pid , p2.id , p2.rho , sqrtf(r2) , p.a[0] , p.a[1] , p.a[2] , - p.force.u_dt , p.force.h_dt , p.force.v_sig ); + p.force.u_dt , p.force.h_dt , p.force.v_sig ); */ } } /* Dump the result. */ - ihg = kernel_igamma / p.h; - printf( "pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.\n" , p.id , p.h , p.density.wcount + 32.0/3 , ihg * ihg * ihg * ( p.rho + p.mass*kernel_root ) ); + printf( "pairs_single: part %lli (h=%e) has a=[%.3e,%.3e,%.3e], udt=%e.\n" , p.id , p.h , p.a[0] , p.a[1] , p.a[2] , p.force.u_dt ); fflush(stdout); } @@ -472,6 +521,8 @@ void engine_step ( struct engine *e , int sort_queues ) { } } + // engine_single_density( e->s->dim , 6178 , e->s->parts , e->s->nr_parts , e->s->periodic ); + /* Start the clock. */ TIMER_TIC_ND @@ -488,11 +539,12 @@ void engine_step ( struct engine *e , int sort_queues ) { /* Stop the clock. */ TIMER_TOC(timer_runners); + // engine_single_force( e->s->dim , 6178 , e->s->parts , e->s->nr_parts , e->s->periodic ); + // for(k=0; k<10; ++k) // printParticle(parts, k); - // engine_single( e->s->dim , 432626 , e->s->parts , e->s->nr_parts , e->s->periodic ); // printParticle( parts , 432626 ); - // printParticle( parts , 432628 ); + // printParticle( e->s->parts , 6178 , e->s->nr_parts ); /* Collect the cell data from the second kick. */ for ( k = 0 ; k < e->s->nr_cells ; k++ ) { diff --git a/src/kernel.h b/src/kernel.h index 09781ee080263eb6e69694cdff30a8afd54e5b0b..5996e182611ecaf985e3b7f05fad1d7e4bd1bc29 100644 --- a/src/kernel.h +++ b/src/kernel.h @@ -32,16 +32,15 @@ /* Coefficients for the kernel. */ #define kernel_degree 3 #define kernel_ivals 2 -#define kernel_gamma 0.5f -#define kernel_igamma 2.0f -#define kernel_igamma3 8.0f -#define kernel_igamma4 16.0f +#define kernel_gamma 2.0f +#define kernel_gamma2 4.0f +#define kernel_gamma3 8.0f static float kernel_coeffs[ (kernel_degree + 1) * (kernel_ivals + 1) ] __attribute__ ((aligned (16))) = { 3.0/4.0*M_1_PI , -3.0/2.0*M_1_PI , 0.0 , M_1_PI , -0.25*M_1_PI , 3.0/2.0*M_1_PI , -3.0*M_1_PI , M_2_PI , 0.0 , 0.0 , 0.0 , 0.0 }; #define kernel_root ( kernel_coeffs[ kernel_degree ] ) -#define kernel_wroot ( 4.0/3.0*M_PI*kernel_igamma3*kernel_coeffs[ kernel_degree ] ) +#define kernel_wroot ( 4.0/3.0*M_PI*kernel_coeffs[ kernel_degree ] ) /** diff --git a/src/runner.c b/src/runner.c index 2ee122e82c9cff49993500bb750983b0e4f1adb7..7e72ef0574b28edeab1ed31ad07dbb85c6761dd3 100644 --- a/src/runner.c +++ b/src/runner.c @@ -335,7 +335,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) { struct cell *finger; int i, k, redo, count = c->count; int *pid; - float h, hg, ihg, ihg2, ihg4, h_corr, rho, rho_dh, u, fc; + float h, ih, ih2, ih4, h_corr, rho, wcount, rho_dh, wcount_dh, u, fc; float normDiv_v, normCurl_v; float dt_step = r->e->dt_step; TIMER_TIC @@ -372,23 +372,21 @@ void runner_doghost ( struct runner *r , struct cell *c ) { /* Some smoothing length multiples. */ h = p->h; - hg = kernel_gamma * h; - ihg = 1.0f / hg; - ihg2 = ihg * ihg; - ihg4 = ihg2 * ihg2; - - /* Adjust the computed weighted number of neighbours. */ - p->density.wcount += kernel_wroot; + ih = 1.0f / h; + ih2 = ih * ih; + ih4 = ih2 * ih2; /* Final operation on the density. */ - p->rho = rho = ihg * ihg2 * ( p->rho + p->mass*kernel_root ); - p->rho_dh = rho_dh = p->rho_dh * ihg4; + p->rho = rho = ih * ih2 * ( p->rho + p->mass*kernel_root ); + p->rho_dh = rho_dh = p->rho_dh * ih4; + wcount = ( p->density.wcount + kernel_root ) * ( 4.0f / 3.0 * M_PI * kernel_gamma3 ); + wcount_dh = p->density.wcount_dh * ih * ( 4.0f / 3.0 * M_PI * kernel_gamma3 ); /* Compute the smoothing length update (Newton step). */ - if ( p->density.wcount_dh == 0.0f ) + if ( wcount_dh == 0.0f ) h_corr = p->h; else { - h_corr = ( const_nwneigh - p->density.wcount ) / p->density.wcount_dh; + h_corr = ( const_nwneigh - wcount ) / wcount_dh; /* Truncate to the range [ -p->h/2 , p->h ]. */ h_corr = fminf( h_corr , h ); @@ -400,9 +398,9 @@ void runner_doghost ( struct runner *r , struct cell *c ) { cp->h = p->h; /* Did we get the right number density? */ - if ( p->density.wcount > const_nwneigh + const_delta_nwneigh || - p->density.wcount < const_nwneigh - const_delta_nwneigh ) { - // printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->density.wcount ); fflush(stdout); + if ( wcount > const_nwneigh + const_delta_nwneigh || + wcount < const_nwneigh - const_delta_nwneigh ) { + // printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , wcount ); fflush(stdout); // p->h += ( p->density.wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh; pid[redo] = pid[i]; redo += 1; @@ -417,8 +415,8 @@ void runner_doghost ( struct runner *r , struct cell *c ) { } /* Pre-compute some stuff for the balsara switch. */ - normDiv_v = fabs( p->density.div_v / rho * ihg4 ); - normCurl_v = sqrtf( p->density.curl_v[0] * p->density.curl_v[0] + p->density.curl_v[1] * p->density.curl_v[1] + p->density.curl_v[2] * p->density.curl_v[2] ) / rho * ihg4; + normDiv_v = fabs( p->density.div_v / rho * ih4 ); + normCurl_v = sqrtf( p->density.curl_v[0] * p->density.curl_v[0] + p->density.curl_v[1] * p->density.curl_v[1] + p->density.curl_v[2] * p->density.curl_v[2] ) / rho * ih4; /* As of here, particle force variables will be set. Do _NOT_ try to read any particle density variables! */ @@ -428,10 +426,10 @@ void runner_doghost ( struct runner *r , struct cell *c ) { p->force.c = fc = sqrtf( const_gamma * ( const_gamma - 1.0f ) * u ); /* Compute the P/Omega/rho2. */ - p->force.POrho2 = u * ( const_gamma - 1.0f ) / ( rho + hg * rho_dh / 3.0f ); + p->force.POrho2 = u * ( const_gamma - 1.0f ) / ( rho + h * rho_dh / 3.0f ); - /* Balsara switch */ - p->force.balsara = normDiv_v / ( normDiv_v + normCurl_v + 0.0001f * fc * ihg ); + /* Balsara switch */ + p->force.balsara = normDiv_v / ( normDiv_v + normCurl_v + 0.0001f * fc * ih ); /* Reset the acceleration. */ for ( k = 0 ; k < 3 ; k++ ) @@ -543,7 +541,7 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) { } /* Update the particle's time step. */ - p->dt = pdt = const_cfl * kernel_gamma * h / ( p->force.v_sig ); + p->dt = pdt = const_cfl * h / p->force.v_sig; /* Update positions and energies at the half-step. */ p->v[0] = v[0] = xp->v_old[0] + hdt * p->a[0]; @@ -565,9 +563,9 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) { mom[2] += m * p->v[2]; /* Collect angular momentum */ - ang[0] += m * ( x[1]*v[2] - v[2]*x[1] ); - ang[1] += m * ( x[2]*v[0] - v[0]*x[2] ); - ang[2] += m * ( x[0]*v[1] - v[1]*x[0] ); + ang[0] += m * ( x[1]*v[2] - x[2]*v[1] ); + ang[1] += m * ( x[2]*v[0] - x[0]*v[2] ); + ang[2] += m * ( x[0]*v[1] - x[1]*v[0] ); /* Collect entropic function */ // lent += u * pow( p->rho, 1.f-const_gamma ); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index dd304a8bb93094ea6692168e5f61c399ba036fda..9efb74f364288cb07edc200b309346ca27cab99e 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -105,7 +105,7 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r struct cpart *restrict cpj, *restrict cparts_j = cj->cparts; struct cpart *restrict cpi, *restrict cparts_i = ci->cparts; double pix[3]; - float dx[3], hi, hi2, r2; + float dx[3], hi, hig2, r2; float dt_step = e->dt_step; #ifdef VECTORIZE int icount = 0; @@ -142,7 +142,7 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r for ( k = 0 ; k < 3 ; k++ ) pix[k] = cpi->x[k] - shift[k]; hi = cpi->h; - hi2 = hi * hi; + hig2 = hi * hi * kernel_gamma2; /* Loop over the parts in cj. */ for ( pjd = 0 ; pjd < count_j ; pjd++ ) { @@ -158,7 +158,7 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r } /* Hit or miss? */ - if ( r2 < hi2 || r2 < cpj->h*cpj->h ) { + if ( r2 < hig2 || r2 < cpj->h*cpj->h*kernel_gamma2 ) { #ifndef VECTORIZE @@ -214,7 +214,7 @@ void DOSELF_NAIVE ( struct runner *r , struct cell *restrict c ) { struct part *restrict parts = c->parts; struct cpart *restrict cpi, *restrict cpj,*restrict cparts = c->cparts; double pix[3]; - float dx[3], hi, hi2, r2; + float dx[3], hi, hig2, r2; float dt_step = r->e->dt_step; #ifdef VECTORIZE int icount = 0; @@ -243,7 +243,7 @@ void DOSELF_NAIVE ( struct runner *r , struct cell *restrict c ) { for ( k = 0 ; k < 3 ; k++ ) pix[k] = cpi->x[k]; hi = cpi->h; - hi2 = hi * hi; + hig2 = hi * hi * kernel_gamma2; /* Loop over the parts in cj. */ for ( pjd = pid+1 ; pjd < count ; pjd++ ) { @@ -259,7 +259,7 @@ void DOSELF_NAIVE ( struct runner *r , struct cell *restrict c ) { } /* Hit or miss? */ - if ( r2 < hi2 || r2 < cpj->h*cpj->h ) { + if ( r2 < hig2 || r2 < cpj->h*cpj->h*kernel_gamma2 ) { #ifndef VECTORIZE @@ -328,7 +328,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * struct part *restrict pi, *restrict parts_j = cj->parts; struct cpart *restrict cpj, *restrict cparts_j = cj->cparts; double pix[3]; - float dx[3], hi, hi2, r2, di, dxj; + float dx[3], hi, hig2, r2, di, dxj; struct entry *sort_j; #ifdef VECTORIZE int icount = 0; @@ -376,8 +376,8 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * for ( k = 0 ; k < 3 ; k++ ) pix[k] = pi->x[k] - shift[k]; hi = pi->h; - hi2 = hi * hi; - di = hi + dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ]; + hig2 = hi * hi * kernel_gamma2; + di = hi*kernel_gamma + dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ]; /* Loop over the parts in cj. */ for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) { @@ -393,7 +393,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * } /* Hit or miss? */ - if ( r2 < hi2 ) { + if ( r2 < hig2 ) { #ifndef VECTORIZE @@ -439,8 +439,8 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * for ( k = 0 ; k < 3 ; k++ ) pix[k] = pi->x[k] - shift[k]; hi = pi->h; - hi2 = hi * hi; - di = -hi - dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ]; + hig2 = hi * hi * kernel_gamma2; + di = -hi*kernel_gamma - dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ]; /* Loop over the parts in cj. */ for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) { @@ -456,7 +456,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * } /* Hit or miss? */ - if ( r2 < hi2 ) { + if ( r2 < hig2 ) { #ifndef VECTORIZE @@ -526,7 +526,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * struct part *restrict pi; struct cpart *restrict cpj, *restrict cparts = ci->cparts; double pix[3]; - float dx[3], hi, hi2, r2; + float dx[3], hi, hig2, r2; #ifdef VECTORIZE int icount = 0; float r2q[VEC_SIZE] __attribute__ ((aligned (16))); @@ -550,7 +550,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * for ( k = 0 ; k < 3 ; k++ ) pix[k] = pi->x[k]; hi = pi->h; - hi2 = hi * hi; + hig2 = hi * hi * kernel_gamma2; /* Loop over the parts in cj. */ for ( pjd = 0 ; pjd < count_i ; pjd++ ) { @@ -566,7 +566,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * } /* Hit or miss? */ - if ( r2 > 0.0f && r2 < hi2 ) { + if ( r2 > 0.0f && r2 < hig2 ) { #ifndef VECTORIZE @@ -634,7 +634,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { struct cpart *restrict cpi, *restrict cparts_i; struct cpart *restrict cpj, *restrict cparts_j; double pix[3], pjx[3], di, dj; - float dx[3], hi, hi2, hj, hj2, r2, dx_max; + float dx[3], hi, hig2, hj, hjg2, r2, dx_max; double hi_max, hj_max; double di_max, dj_min; int count_i, count_j; @@ -669,7 +669,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { sort_j = &cj->sort[ sid*(cj->count + 1) ]; /* Get some other useful values. */ - hi_max = ci->h_max - rshift; hj_max = cj->h_max; + hi_max = ci->h_max*kernel_gamma - rshift; hj_max = cj->h_max*kernel_gamma; count_i = ci->count; count_j = cj->count; parts_i = ci->parts; parts_j = cj->parts; cparts_i = ci->cparts; cparts_j = cj->cparts; @@ -690,11 +690,11 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { if ( cpi->dt > dt_step ) continue; hi = cpi->h; - di = sort_i[pid].d + hi + dx_max - rshift; + di = sort_i[pid].d + hi*kernel_gamma + dx_max - rshift; if ( di < dj_min ) continue; - hi2 = hi * hi; + hig2 = hi * hi * kernel_gamma2; for ( k = 0 ; k < 3 ; k++ ) pix[k] = cpi->x[k] - shift[k]; @@ -712,7 +712,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { } /* Hit or miss? */ - if ( r2 < hi2 ) { + if ( r2 < hig2 ) { #ifndef VECTORIZE @@ -757,13 +757,13 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { if ( cpj->dt > dt_step ) continue; hj = cpj->h; - dj = sort_j[pjd].d - hj - dx_max - rshift; + dj = sort_j[pjd].d - hj*kernel_gamma - dx_max - rshift; if ( dj > di_max ) continue; for ( k = 0 ; k < 3 ; k++ ) pjx[k] = cpj->x[k] + shift[k]; - hj2 = hj * hj; + hjg2 = hj * hj * kernel_gamma2; /* Loop over the parts in ci. */ for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d > dj ; pid-- ) { @@ -779,7 +779,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { } /* Hit or miss? */ - if ( r2 < hj2 ) { + if ( r2 < hjg2 ) { #ifndef VECTORIZE @@ -840,7 +840,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { struct cpart *restrict cpi, *restrict cparts_i; struct cpart *restrict cpj, *restrict cparts_j; double pix[3], pjx[3], di, dj; - float dx[3], hi, hi2, hj, hj2, r2, dx_max; + float dx[3], hi, hig2, hj, hjg2, r2, dx_max; double hi_max, hj_max; double di_max, dj_min; int count_i, count_j; @@ -881,7 +881,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { sort_j = &cj->sort[ sid*(cj->count + 1) ]; /* Get some other useful values. */ - hi_max = ci->h_max - rshift; hj_max = cj->h_max; + hi_max = ci->h_max*kernel_gamma - rshift; hj_max = cj->h_max*kernel_gamma; count_i = ci->count; count_j = cj->count; parts_i = ci->parts; parts_j = cj->parts; cparts_i = ci->cparts; cparts_j = cj->cparts; @@ -924,11 +924,11 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { pi = &parts_i[ sort_i[ pid ].i ]; cpi = &cparts_i[ sort_i[ pid ].i ]; hi = cpi->h; - di = sort_i[pid].d + hi + dx_max - rshift; + di = sort_i[pid].d + hi*kernel_gamma + dx_max - rshift; if ( di < dj_min ) continue; - hi2 = hi * hi; + hig2 = hi * hi * kernel_gamma2; for ( k = 0 ; k < 3 ; k++ ) pix[k] = cpi->x[k] - shift[k]; @@ -949,7 +949,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { } /* Hit or miss? */ - if ( r2 < hi2 ) { + if ( r2 < hig2 ) { #ifndef VECTORIZE @@ -999,7 +999,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { } /* Hit or miss? */ - if ( r2 < hi2 ) { + if ( r2 < hig2 ) { #ifndef VECTORIZE @@ -1074,13 +1074,13 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { pj = &parts_j[ sort_j[ pjd ].i ]; cpj = &cparts_j[ sort_j[ pjd ].i ]; hj = cpj->h; - dj = sort_j[pjd].d - hj - dx_max - rshift; + dj = sort_j[pjd].d - hj*kernel_gamma - dx_max - rshift; if ( dj > di_max ) continue; for ( k = 0 ; k < 3 ; k++ ) pjx[k] = cpj->x[k] + shift[k]; - hj2 = hj * hj; + hjg2 = hj * hj * kernel_gamma2; /* Is this particle outside the dt? */ if ( cpj->dt > dt_step ) { @@ -1099,7 +1099,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { } /* Hit or miss? */ - if ( r2 < hj2 && r2 > cpi->h*cpi->h ) { + if ( r2 < hjg2 && r2 > cpi->h*cpi->h*kernel_gamma2 ) { #ifndef VECTORIZE @@ -1148,7 +1148,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { } /* Hit or miss? */ - if ( r2 < hj2 && r2 > cpi->h*cpi->h ) { + if ( r2 < hjg2 && r2 > cpi->h*cpi->h*kernel_gamma2 ) { #ifndef VECTORIZE @@ -1243,7 +1243,7 @@ void DOSELF1 ( struct runner *r , struct cell *restrict c ) { int k, pid, pjd, count = c->count; double pix[3]; - float dx[3], hi, hi2, r2; + float dx[3], hi, hig2, r2; struct part *restrict parts = c->parts, *restrict pi; struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts; float dt_step = r->e->dt_step; @@ -1274,7 +1274,7 @@ void DOSELF1 ( struct runner *r , struct cell *restrict c ) { for ( k = 0 ; k < 3 ; k++ ) pix[k] = cpi->x[k]; hi = cpi->h; - hi2 = hi * hi; + hig2 = hi * hi * kernel_gamma2; /* Loop over the other particles .*/ for ( pjd = 0 ; pjd < count ; pjd++ ) { @@ -1294,7 +1294,7 @@ void DOSELF1 ( struct runner *r , struct cell *restrict c ) { } /* Hit or miss? */ - if ( r2 < hi2 ) { + if ( r2 < hig2 ) { #ifndef VECTORIZE @@ -1347,7 +1347,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) { int k, pid, pjd, count = c->count; double pix[3]; - float dx[3], hi, hi2, r2; + float dx[3], hi, hig2, r2; struct part *restrict parts = c->parts, *restrict pi; struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts; float dt_step = r->e->dt_step; @@ -1392,7 +1392,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) { for ( k = 0 ; k < 3 ; k++ ) pix[k] = cpi->x[k]; hi = cpi->h; - hi2 = hi * hi; + hig2 = hi * hi * kernel_gamma2; /* Is the ith particle not active? */ if ( cpi->dt > dt_step ) { @@ -1411,7 +1411,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) { } /* Hit or miss? */ - if ( r2 < hi2 || r2 < cpj->h*cpj->h ) { + if ( r2 < hig2 || r2 < cpj->h*cpj->h*kernel_gamma2 ) { #ifndef VECTORIZE @@ -1464,7 +1464,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) { } /* Hit or miss? */ - if ( r2 < hi2 || r2 < cpj->h*cpj->h ) { + if ( r2 < hig2 || r2 < cpj->h*cpj->h*kernel_gamma2 ) { #ifndef VECTORIZE @@ -2120,7 +2120,7 @@ void DOSUB2 ( struct runner *r , struct cell *ci , struct cell *cj , int sid ) { } -void DOSUB_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *parts , int *ind , int count , struct cell *restrict cj , int sid ) { +void DOSUB_SUBSET ( struct runner *r , struct cell *ci , struct part *parts , int *ind , int count , struct cell *cj , int sid ) { int j, k; double shift[3]; @@ -2178,31 +2178,8 @@ void DOSUB_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *p ci->h2dx_max*2 < h && cj->h2dx_max*2 < h ) { /* Get the type of pair if not specified explicitly. */ - if ( sid < 0 ) { - - /* Get the relative distance between the pairs, wrapping. */ - for ( k = 0 ; k < 3 ; k++ ) { - if ( cj->loc[k] - ci->loc[k] < -s->dim[k]/2 ) - shift[k] = s->dim[k]; - else if ( cj->loc[k] - ci->loc[k] > s->dim[k]/2 ) - shift[k] = -s->dim[k]; - else - shift[k] = 0.0; - } - - /* Get the sorting index. */ - for ( sid = 0 , k = 0 ; k < 3 ; k++ ) - sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 ); - - /* Flip? */ - if ( sid < 13 ) { - struct cell *temp = cj; cj = ci; ci = temp; - } - else - sid = 26 - sid; + sid = space_getsid( s , &ci , &cj , shift ); - } - /* Different types of flags. */ switch ( sid ) { diff --git a/src/runner_iact.h b/src/runner_iact.h index a8090d160687935a94bcd8ced8d720053fd85863..ab0887be2674accafc510c814cfd833d09118bf1 100644 --- a/src/runner_iact.h +++ b/src/runner_iact.h @@ -42,7 +42,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r float r = sqrtf( r2 ), ri = 1.0f / r; float xi, xj; - float h_inv, hg_inv; + float h_inv; float wi, wj, wi_dx, wj_dx; float mi, mj; float dvdr; @@ -66,18 +66,16 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r for ( k = 0 ; k < 3 ; k++ ) curlvr[k] *= ri; - if ( r2 < hi*hi ) { + if ( r2 < hi*hi*kernel_gamma2 ) { h_inv = 1.0 / hi; - hg_inv = kernel_igamma * h_inv; - xi = r * hg_inv; + xi = r * h_inv; kernel_deval( xi , &wi , &wi_dx ); pi->rho += mj * wi; pi->rho_dh -= mj * ( 3.0*wi + xi*wi_dx ); - pi->density.wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - pi->density.wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - // pi->icount += 1; + pi->density.wcount += wi; + pi->density.wcount_dh -= xi * wi_dx; pi->density.div_v += mj * dvdr * wi_dx; for ( k = 0 ; k < 3 ; k++ ) @@ -85,18 +83,16 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r } - if ( r2 < hj*hj ) { + if ( r2 < hj*hj*kernel_gamma2 ) { h_inv = 1.0 / hj; - hg_inv = kernel_igamma * h_inv; - xj = r * hg_inv; + xj = r * h_inv; kernel_deval( xj , &wj , &wj_dx ); pj->rho += mi * wj; pj->rho_dh -= mi * ( 3.0*wj + xj*wj_dx ); - pj->density.wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - pj->density.wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - // pj->icount += 1; + pj->density.wcount += wj; + pj->density.wcount_dh -= xj * wj_dx; pj->density.div_v += mi * dvdr * wj_dx; for ( k = 0 ; k < 3 ; k++ ) @@ -115,7 +111,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_density ( flo vector r, ri, xi, xj, hi, hj, hi_inv, hj_inv, hig_inv, hjg_inv, wi, wj, wi_dx, wj_dx; vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; - vector mi, mj, wscale; + vector mi, mj; vector dx[3], dv[3]; vector vi[3], vj[3]; vector dvdr, div_vi, div_vj; @@ -144,19 +140,15 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_density ( flo dx[k].v = _mm_set_ps( Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] ); #endif - wscale.v = vec_set1( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - hi.v = vec_load( Hi ); hi_inv.v = vec_rcp( hi.v ); hi_inv.v = hi_inv.v - hi_inv.v * ( hi_inv.v * hi.v - vec_set1( 1.0f ) ); - hig_inv.v = hi_inv.v * vec_set1( kernel_igamma ); - xi.v = r.v * hig_inv.v; + xi.v = r.v * hi_inv.v; hj.v = vec_load( Hj ); hj_inv.v = vec_rcp( hj.v ); hj_inv.v = hj_inv.v - hj_inv.v * ( hj_inv.v * hj.v - vec_set1( 1.0f ) ); - hjg_inv.v = hj_inv.v * vec_set1( kernel_igamma ); - xj.v = r.v * hjg_inv.v; + xj.v = r.v * hj_inv.v; kernel_deval_vec( &xi , &wi , &wi_dx ); kernel_deval_vec( &xj , &wj , &wj_dx ); @@ -179,16 +171,16 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_density ( flo rhoi.v = mj.v * wi.v; rhoi_dh.v = mj.v * ( vec_set1( 3.0f ) * wi.v + xi.v * wi_dx.v ); - wcounti.v = wi.v * wscale.v; - wcounti_dh.v = xi.v * hi_inv.v * wi_dx.v * wscale.v; + wcounti.v = wi.v; + wcounti_dh.v = xi.v * wi_dx.v; div_vi.v = mj.v * dvdr.v * wi_dx.v; for ( k = 0 ; k < 3 ; k++ ) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; rhoj.v = mi.v * wj.v; rhoj_dh.v = mi.v * ( vec_set1( 3.0f ) * wj.v + xj.v * wj_dx.v ); - wcountj.v = wj.v * wscale.v; - wcountj_dh.v = xj.v * hj_inv.v * wj_dx.v * wscale.v; + wcountj.v = wj.v; + wcountj_dh.v = xj.v * wj_dx.v; div_vj.v = mi.v * dvdr.v * wj_dx.v; for ( k = 0 ; k < 3 ; k++ ) curl_vj[k].v = mi.v * curlvr[k].v * wj_dx.v; @@ -230,14 +222,14 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density ( float r, ri; float xi; - float h_inv, hg_inv; + float h_inv; float wi, wi_dx; float mj; float dvdr; float dv[3], curlvr[3]; int k; - if ( r2 < hi*hi ) { + if ( r2 < hi*hi*kernel_gamma2 ) { /* Get the masses. */ mj = pj->mass; @@ -261,15 +253,13 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density ( curlvr[k] *= ri; h_inv = 1.0 / hi; - hg_inv = kernel_igamma * h_inv; - xi = r * hg_inv; + xi = r * h_inv; kernel_deval( xi , &wi , &wi_dx ); pi->rho += mj * wi; pi->rho_dh -= mj * ( 3.0*wi + xi*wi_dx ); - pi->density.wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - pi->density.wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - // pi->icount += 1; + pi->density.wcount += wi; + pi->density.wcount_dh -= xi * wi_dx; pi->density.div_v += mj * dvdr * wi_dx; for ( k = 0 ; k < 3 ; k++ ) @@ -289,7 +279,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_densit vector r, ri, xi, hi, hi_inv, hig_inv, wi, wi_dx; vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; - vector mj, wscale; + vector mj; vector dx[3], dv[3]; vector vi[3], vj[3]; vector dvdr; @@ -315,13 +305,11 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_densit for ( k = 0 ; k < 3 ; k++ ) dx[k].v = _mm_set_ps( Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] ); #endif - wscale.v = vec_set1( 4.0f * M_PI / 3.0f * kernel_igamma3 ); hi.v = vec_load( Hi ); hi_inv.v = vec_rcp( hi.v ); hi_inv.v = hi_inv.v - hi_inv.v * ( hi_inv.v * hi.v - vec_set1( 1.0f ) ); - hig_inv.v = hi_inv.v * vec_set1( kernel_igamma ); - xi.v = r.v * hig_inv.v; + xi.v = r.v * hi_inv.v; kernel_deval_vec( &xi , &wi , &wi_dx ); @@ -343,8 +331,8 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_densit rhoi.v = mj.v * wi.v; rhoi_dh.v = mj.v * ( vec_set1( 3.0f ) * wi.v + xi.v * wi_dx.v ); - wcounti.v = wi.v * wscale.v; - wcounti_dh.v = xi.v * hi_inv.v * wi_dx.v * wscale.v; + wcounti.v = wi.v; + wcounti_dh.v = xi.v * wi_dx.v; div_vi.v = mj.v * dvdr.v * wi_dx.v; for ( k = 0 ; k < 3 ; k++ ) curl_vi[k].v = mj.v * curlvr[k].v * wi_dx.v; @@ -392,14 +380,14 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2 POrho2j = pj->force.POrho2; /* Get the kernel for hi. */ - hi_inv = kernel_igamma / hi; + hi_inv = 1.0f / hi; hi2_inv = hi_inv * hi_inv; xi = r * hi_inv; kernel_deval( xi , &wi , &wi_dx ); wi_dr = hi2_inv * hi2_inv * wi_dx; /* Get the kernel for hj. */ - hj_inv = kernel_igamma / hj; + hj_inv = 1.0f / hj; hj2_inv = hj_inv * hj_inv; xj = r * hj_inv; kernel_deval( xj , &wj , &wj_dx ); @@ -522,7 +510,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float r.v = r2.v * ri.v; /* Get the kernel for hi. */ - hi.v = vec_set1( kernel_gamma ) * vec_load( Hi ); + hi.v = vec_load( Hi ); hi_inv.v = vec_rcp( hi.v ); hi_inv.v = hi_inv.v - hi_inv.v * ( hi.v * hi_inv.v - vec_set1( 1.0f ) ); hi2_inv.v = hi_inv.v * hi_inv.v; @@ -531,7 +519,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; /* Get the kernel for hj. */ - hj.v = vec_set1( kernel_gamma ) * vec_load( Hj ); + hj.v = vec_load( Hj ); hj_inv.v = vec_rcp( hj.v ); hj_inv.v = hj_inv.v - hj_inv.v * ( hj.v * hj_inv.v - vec_set1( 1.0f ) ); hj2_inv.v = hj_inv.v * hj_inv.v; @@ -622,14 +610,14 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl POrho2j = pj->force.POrho2; /* Get the kernel for hi. */ - hi_inv = kernel_igamma / hi; + hi_inv = 1.0f / hi; hi2_inv = hi_inv * hi_inv; xi = r * hi_inv; kernel_deval( xi , &wi , &wi_dx ); wi_dr = hi2_inv * hi2_inv * wi_dx; /* Get the kernel for hj. */ - hj_inv = kernel_igamma / hj; + hj_inv = 1.0f / hj; hj2_inv = hj_inv * hj_inv; xj = r * hj_inv; kernel_deval( xj , &wj , &wj_dx ); @@ -747,7 +735,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force r.v = r2.v * ri.v; /* Get the kernel for hi. */ - hi.v = vec_set1( kernel_gamma ) * vec_load( Hi ); + hi.v = vec_load( Hi ); hi_inv.v = vec_rcp( hi.v ); hi_inv.v = hi_inv.v - hi_inv.v * ( hi.v * hi_inv.v - vec_set1( 1.0f ) ); hi2_inv.v = hi_inv.v * hi_inv.v; @@ -756,7 +744,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force wi_dr.v = hi2_inv.v * hi2_inv.v * wi_dx.v; /* Get the kernel for hj. */ - hj.v = vec_set1( kernel_gamma ) * vec_load( Hj ); + hj.v = vec_load( Hj ); hj_inv.v = vec_rcp( hj.v ); hj_inv.v = hj_inv.v - hj_inv.v * ( hj.v * hj_inv.v - vec_set1( 1.0f ) ); hj2_inv.v = hj_inv.v * hj_inv.v; diff --git a/src/space.c b/src/space.c index 3bbdcb993ff998d79ba86dadb6b47795aa716695..a5f9c211210ce218cb4253d705d30c50749116e7 100644 --- a/src/space.c +++ b/src/space.c @@ -34,6 +34,7 @@ #include "atomic.h" #include "lock.h" #include "task.h" +#include "kernel.h" #include "part.h" #include "cell.h" #include "space.h" @@ -320,123 +321,6 @@ void space_rebuild_recycle ( struct space *s , struct cell *c ) { } -/** - * @brief Recursively rebuild a cell tree. - * - */ - -int space_rebuild_recurse ( struct space *s , struct cell *c ) { - - int k, count, changes = 0, wasmt[8]; - float h, h_limit, h_max = 0.0f, dt_min = c->parts[0].dt, dt_max = dt_min; - struct cell *temp; - - /* Clean out the task pointers. */ - c->sorts = NULL; - c->nr_tasks = 0; - c->nr_density = 0; - c->dx_max = 0.0; - - /* If the cell is already split, check that the split is still ok. */ - if ( c->split ) { - - /* Check the depth. */ - if ( c->depth > s->maxdepth ) - s->maxdepth = c->depth; - - /* Set the minimum cutoff. */ - h_limit = fmin( c->h[0] , fmin( c->h[1] , c->h[2] ) ) / 2; - - /* Count the particles below that. */ - for ( count = 0 , k = 0 ; k < c->count ; k++ ) { - h = c->parts[k].h; - if ( h <= h_limit ) - count += 1; - if ( h > h_max ) - h_max = h; - if ( c->parts[k].dt < dt_min ) - dt_min = c->parts[k].dt; - if ( c->parts[k].dt > dt_max ) - dt_max = c->parts[k].dt; - } - c->h_max = h_max; - c->dt_min = dt_min; - c->dt_max = dt_max; - c->dx_max = 0; - - /* Un-split? */ - if ( count < c->count*space_splitratio || c->count < space_splitsize ) { - - /* Get rid of the progeny. */ - space_rebuild_recycle( s , c ); - - /* Re-set the split flag. */ - c->split = 0; - - } - - /* Otherwise, recurse on the kids. */ - else { - - /* Populate all progeny. */ - for ( k = 0 ; k < 8 ; k++ ) - if ( ( wasmt[k] = ( c->progeny[k] == NULL ) ) ) { - temp = space_getcell( s ); - temp->count = 0; - temp->loc[0] = c->loc[0]; - temp->loc[1] = c->loc[1]; - temp->loc[2] = c->loc[2]; - temp->h[0] = c->h[0]/2; - temp->h[1] = c->h[1]/2; - temp->h[2] = c->h[2]/2; - temp->dmin = c->dmin/2; - if ( k & 4 ) - temp->loc[0] += temp->h[0]; - if ( k & 2 ) - temp->loc[1] += temp->h[1]; - if ( k & 1 ) - temp->loc[2] += temp->h[2]; - temp->depth = c->depth + 1; - temp->split = 0; - temp->h_max = 0.0; - temp->dx_max = 0.0; - temp->parent = c; - c->progeny[k] = temp; - } - - /* Make sure each part is in its place. */ - cell_split( c ); - - /* Remove empty progeny. */ - for ( k = 0 ; k < 8 ; k++ ) - if ( c->progeny[k]->count == 0 ) { - changes += !wasmt[k]; - space_recycle( s , c->progeny[k] ); - c->progeny[k] = NULL; - } - else - changes += wasmt[k]; - - /* Recurse. */ - for ( k = 0 ; k < 8 ; k++ ) - if ( c->progeny[k] != NULL ) - changes += space_rebuild_recurse( s , c->progeny[k] ); - - } - - } - - /* Otherwise, try to split it anyway. */ - else { - space_split( s , c ); - changes += c->split; - } - - /* Return the grand total. */ - return changes; - - } - /** * @brief Re-build the cells as well as the tasks. * @@ -447,7 +331,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) { void space_rebuild ( struct space *s , double cell_max ) { - float h_max = s->cell_min, dmin; + float h_max = s->cell_min / kernel_gamma, dmin; int i, j, k, cdim[3], nr_parts = s->nr_parts; struct cell *restrict c; struct part *restrict finger, *restrict p, *parts = s->parts; @@ -473,12 +357,12 @@ void space_rebuild ( struct space *s , double cell_max ) { } s->h_max = h_max; } - // printf( "space_rebuild: h_max is %.3e.\n" , h_max ); + // printf( "space_rebuild: h_max is %.3e (cell_max=%.3e).\n" , h_max , cell_max ); // printf( "space_rebuild: getting h_min and h_max took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); /* Get the new putative cell dimensions. */ for ( k = 0 ; k < 3 ; k++ ) - cdim[k] = floor( s->dim[k] / fmax( h_max*space_stretch , cell_max ) ); + cdim[k] = floor( s->dim[k] / fmax( h_max*kernel_gamma*space_stretch , cell_max ) ); /* Do we need to re-build the upper-level cells? */ // tic = getticks(); @@ -1014,7 +898,8 @@ void space_splittasks ( struct space *s ) { /* Should this task be split-up? */ if ( ci->split && cj->split && - ci->h_max*space_stretch < hi/2 && cj->h_max*space_stretch < hj/2 ) { + ci->h_max*kernel_gamma*space_stretch < hi/2 && + cj->h_max*kernel_gamma*space_stretch < hj/2 ) { /* Replace by a single sub-task? */ if ( space_dosub && @@ -1421,6 +1306,7 @@ void space_split ( struct space *s , struct cell *c ) { temp->split = 0; temp->h_max = 0.0; temp->dx_max = 0.0; + temp->h2dx_max = 0.0; temp->parent = c; c->progeny[k] = temp; } @@ -1428,10 +1314,6 @@ void space_split ( struct space *s , struct cell *c ) { /* Split the cell data. */ cell_split( c ); - /* Recurse? */ - // for ( k = 0 ; k < 8 ; k++ ) - // space_split( s , c->progeny[k] ); - /* Remove any progeny with zero parts. */ for ( k = 0 ; k < 8 ; k++ ) if ( c->progeny[k]->count == 0 ) { diff --git a/src/space.h b/src/space.h index d5cb42f14dff889d9c3f885461099bb6014f4ff8..51ddbd5c01e945176679529f11e7bc9729e5f0c6 100644 --- a/src/space.h +++ b/src/space.h @@ -26,7 +26,7 @@ #define space_splitratio 0.875 #define space_splitsize_default 400 #define space_subsize_default 5000 -#define space_dosub 1 +#define space_dosub 0 #define space_stretch 1.05 #define space_maxtaskspercell 31