diff --git a/src/Makefile.am b/src/Makefile.am index 33907eb666c96b038a9cd7a1e89abfc7df847167..21a6369eab003cd82dd47c4d5c200a341d34c6a6 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -19,7 +19,9 @@ AUTOMAKE_OPTIONS=gnu # Add the debug flag to the whole thing -AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing $(SIMD_FLAGS) $(CFLAGS) $(OPENMP_CFLAGS) -DTIMER -DCOUNTER -DCPU_TPS=2.67e9 +AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \ + -funroll-loops $(SIMD_FLAGS) $(CFLAGS) $(OPENMP_CFLAGS) \ + -DTIMER -DCOUNTER -DCPU_TPS=2.67e9 -DHIST # Assign a "safe" version number AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 diff --git a/src/cell.h b/src/cell.h index 920ae422e5eb5fca6baa4c78d90e5559ef34b54a..8d54374f9b7a9de711e8dd0f2f04c9ae92d93ee2 100644 --- a/src/cell.h +++ b/src/cell.h @@ -17,7 +17,8 @@ * ******************************************************************************/ - +/* Some constants. */ +#define cell_sid_dt 13 /* The queue timers. */ enum { diff --git a/src/gadgetsmp.h b/src/gadgetsmp.h index 67a01137a8b0f61b7bdfad57465a26e1890eaba5..3ead9719458a06bd1069bbdf37f134f7c391c461 100644 --- a/src/gadgetsmp.h +++ b/src/gadgetsmp.h @@ -29,7 +29,7 @@ #include "cell.h" #include "space.h" #include "queue.h" +#include "runner.h" #include "runner_iact.h" #include "engine.h" -#include "runner.h" #include "ic.h" diff --git a/src/ic.c b/src/ic.c index 11025277e2443d0cfdf0d289df098b45f0f55b2b..c9c7d8559fbe50340ad0d78af9c6bbca81e8cbc1 100644 --- a/src/ic.c +++ b/src/ic.c @@ -1,6 +1,7 @@ /******************************************************************************* * This file is part of GadgetSMP. - * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + * Matthieu Schaller (matthieu.schaller@durham.ac.uk). * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -292,7 +293,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int* readArray(h_grp, "Mass", FLOAT, *N, 1, *parts, mass); readArray(h_grp, "SmoothingLength", FLOAT, *N, 1, *parts, h); readArray(h_grp, "InternalEnergy", FLOAT, *N, 1, *parts, u); - readArray(h_grp, "ParticleIDs", ULONG, *N, 1, *parts, id); + readArray(h_grp, "ParticleIDs", ULONGLONG, *N, 1, *parts, id); /* Close particle group */ H5Gclose(h_grp); diff --git a/src/ic.h b/src/ic.h index 10216bfb8c410695b6aadc14792e0f072be8d5a5..bcacfddad0ca72fa8951c30203d6cff46ead5f48 100644 --- a/src/ic.h +++ b/src/ic.h @@ -1,6 +1,6 @@ /******************************************************************************* * This file is part of GadgetSMP. - * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * Coypright (c) 2012 Matthieu Schaller (matthieu.schaller@durham.ac.uk). * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published diff --git a/src/part.h b/src/part.h index 79324ba0f92477fa731d8ba0dbda2845a0dcb23a..095e85c2e198a1fcaf3938384053cbee80321b46 100644 --- a/src/part.h +++ b/src/part.h @@ -31,13 +31,13 @@ struct part { float h; /* Particle time-step. */ - int dt; + float dt; /* Particle mass. */ float mass; /* Particle ID. */ - unsigned long id; + unsigned long long id; /* Particle position. */ double x[3]; diff --git a/src/runner.c b/src/runner.c index 672c8786bc8d2899e585461e43fa96af88d38f07..7c87642f1da8aa4e98dbcc55f1007590bcfea5f6 100644 --- a/src/runner.c +++ b/src/runner.c @@ -161,64 +161,6 @@ void runner_dosort_ascending ( struct entry *sort , int N ) { } -/** - * @brief inline helper fuction to merge two entry arrays (forward). - * - * @param one the first array - * @param none the length of the first array - * @param two the second array - * @param ntwo the length of the second array - * @param dest the destination array. - */ - -inline void merge_forward ( struct entry *__restrict__ one , int none , struct entry *__restrict__ two , int ntwo , struct entry *__restrict__ dest ) { - - int i = 0, j = 0, k = 0; - - while ( j < none && k < ntwo ) - if ( one[j].d < two[k].d ) - dest[i++] = one[j++]; - else - dest[i++] = two[k++]; - if ( j == none ) - for ( ; k < ntwo ; k++ ) - dest[i++] = two[k]; - else - for ( ; j < none ; j++ ) - dest[i++] = one[j]; - - } - - -/** - * @brief inline helper fuction to merge two entry arrays (forward). - * - * @param one the first array - * @param none the length of the first array - * @param two the second array - * @param ntwo the length of the second array - * @param dest the destination array. - */ - -inline void merge_backward ( struct entry *__restrict__ one , int none , struct entry *__restrict__ two , int ntwo , struct entry *__restrict__ dest ) { - - int i = none + ntwo - 1, j = none - 1, k = ntwo - 1; - - while ( j >= 0 && k >= 0 ) - if ( one[j].d > two[k].d ) - dest[i--] = one[j--]; - else - dest[i--] = two[k--]; - if ( j < 0 ) - for ( ; k >= 0 ; k-- ) - dest[i--] = two[k]; - else - for ( ; j >= 0 ; j-- ) - dest[i--] = one[j]; - - } - - /** * @brief Sort the particles in the given cell along all cardinal directions. * @@ -232,7 +174,6 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { struct entry *fingers[8]; struct part *parts = c->parts; int j, k, count = c->count; - int cone, ctwo; int i, ind, off[8], inds[8], temp_i; // float shift[3]; float buff[8], px[3]; @@ -248,7 +189,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { if ( lock_lock( &c->lock ) != 0 ) error( "Failed to lock cell." ); if ( c->sort == NULL ) - if ( ( c->sort = (struct entry *)malloc( sizeof(struct entry) * (c->count + 1) * 13 ) ) == NULL ) + if ( ( c->sort = (struct entry *)malloc( sizeof(struct entry) * (c->count + 1) * 14 ) ) == NULL ) error( "Failed to allocate sort memory." ); if ( lock_unlock( &c->lock ) != 0 ) error( "Failed to unlock cell." ); @@ -256,90 +197,57 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { /* Does this cell have any progeny? */ if ( c->split ) { - /* Loop over the 13 different sort arrays. */ - for ( j = 0 ; j < 13 ; j++ ) { + /* Loop over the 13+1 different sort arrays. */ + for ( j = 0 ; j < 14 ; j++ ) { /* Has this sort array been flagged? */ if ( !( flags & (1 << j) ) ) continue; - if ( 0 ) { - - /* Get a finger on the sorting array. */ - finger = &c->sort[ j*(count + 1) ]; - - /* Merge the two first sub-cells forward into this cell. */ - cone = c->progeny[0]->count; - ctwo = c->progeny[1]->count; - merge_forward( &c->progeny[0]->sort[ j*(cone + 1) ] , cone , - &c->progeny[1]->sort[ j*(ctwo + 1) ] , ctwo , - finger ); - - /* Merge-in the remaining arrays, alternating forward and - backward merges. */ - for ( k = 2 ; k < 8 ; k++ ) { - cone = cone + ctwo; - ctwo = c->progeny[k]->count; - if ( k & 1 ) - merge_forward( &finger[ count - cone ] , cone , - &c->progeny[k]->sort[ j*(ctwo + 1) ] , ctwo , - finger ); - else - merge_backward( finger , cone , - &c->progeny[k]->sort[ j*(ctwo + 1) ] , ctwo , - &finger[ count - cone - ctwo ] ); + /* Init the particle index offsets. */ + for ( off[0] = 0 , k = 1 ; k < 8 ; k++ ) + if ( c->progeny[k-1] != NULL ) + off[k] = off[k-1] + c->progeny[k-1]->count; + else + off[k] = off[k-1]; + + /* Init the entries and indices. */ + for ( k = 0 ; k < 8 ; k++ ) { + inds[k] = k; + if ( c->progeny[k] != NULL && c->progeny[k]->count > 0 ) { + fingers[k] = &c->progeny[k]->sort[ j*(c->progeny[k]->count + 1) ]; + buff[k] = fingers[k]->d; + off[k] = off[k]; } - + else + buff[k] = FLT_MAX; } - - else { - - /* Init the particle index offsets. */ - for ( off[0] = 0 , k = 1 ; k < 8 ; k++ ) - if ( c->progeny[k-1] != NULL ) - off[k] = off[k-1] + c->progeny[k-1]->count; - else - off[k] = off[k-1]; - - /* Init the entries and indices. */ - for ( k = 0 ; k < 8 ; k++ ) { - inds[k] = k; - if ( c->progeny[k] != NULL && c->progeny[k]->count > 0 ) { - fingers[k] = &c->progeny[k]->sort[ j*(c->progeny[k]->count + 1) ]; - buff[k] = fingers[k]->d; - off[k] = off[k]; - } - else - buff[k] = FLT_MAX; - } - /* Sort the buffer. */ - for ( i = 0 ; i < 7 ; i++ ) - for ( k = i+1 ; k < 8 ; k++ ) - if ( buff[ inds[k] ] < buff[ inds[i] ] ) { - temp_i = inds[i]; inds[i] = inds[k]; inds[k] = temp_i; - } + /* Sort the buffer. */ + for ( i = 0 ; i < 7 ; i++ ) + for ( k = i+1 ; k < 8 ; k++ ) + if ( buff[ inds[k] ] < buff[ inds[i] ] ) { + temp_i = inds[i]; inds[i] = inds[k]; inds[k] = temp_i; + } - /* For each entry in the new sort list. */ - finger = &c->sort[ j*(count + 1) ]; - for ( ind = 0 ; ind < count ; ind++ ) { + /* For each entry in the new sort list. */ + finger = &c->sort[ j*(count + 1) ]; + for ( ind = 0 ; ind < count ; ind++ ) { - /* Copy the minimum into the new sort array. */ - finger[ind].d = buff[inds[0]]; - finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; + /* Copy the minimum into the new sort array. */ + finger[ind].d = buff[inds[0]]; + finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; - /* Update the buffer. */ - fingers[inds[0]] += 1; - buff[inds[0]] = fingers[inds[0]]->d; + /* Update the buffer. */ + fingers[inds[0]] += 1; + buff[inds[0]] = fingers[inds[0]]->d; - /* Find the smallest entry. */ - for ( k = 1 ; k < 8 && buff[inds[k]] < buff[inds[k-1]] ; k++ ) { - temp_i = inds[k-1]; inds[k-1] = inds[k]; inds[k] = temp_i; - } + /* Find the smallest entry. */ + for ( k = 1 ; k < 8 && buff[inds[k]] < buff[inds[k-1]] ; k++ ) { + temp_i = inds[k-1]; inds[k-1] = inds[k]; inds[k] = temp_i; + } - } /* Merge. */ - - } + } /* Merge. */ /* Add a sentinel. */ c->sort[ j*(c->count + 1) + c->count ].d = FLT_MAX; @@ -349,39 +257,6 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { } /* progeny? */ - /* Otherwise, just sort. */ - // else { - // - // /* Loop over the different cell axes. */ - // for ( j = 0 ; j < 13 ; j++ ) { - // - // /* Has this sort array been flagged? */ - // if ( !( flags & (1 << j) ) ) - // continue; - // - // /* Get the shift vector. */ - // shift[0] = runner_shift[ 3*j + 0 ]; - // shift[1] = runner_shift[ 3*j + 1 ]; - // shift[2] = runner_shift[ 3*j + 2 ]; - // - // /* Fill the sort array. */ - // finger = &c->sort[ j*(count + 1) ]; - // for ( k = 0 ; k < count ; k++ ) { - // finger[k].i = k; - // finger[k].d = parts[k].x[0]*shift[0] + parts[k].x[1]*shift[1] + parts[k].x[2]*shift[2]; - // } - // - // /* Add the sentinel. */ - // finger[ c->count ].d = FLT_MAX; - // finger[ c->count ].i = 0; - // - // /* Sort descending. */ - // runner_dosort_ascending( finger , c->count ); - // - // } - // - // } - /* Otherwise, just sort. */ else { @@ -395,10 +270,14 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { c->sort[ j*(count + 1) + k].i = k; c->sort[ j*(count + 1) + k].d = px[0]*runner_shift[ 3*j + 0 ] + px[1]*runner_shift[ 3*j + 1 ] + px[2]*runner_shift[ 3*j + 2 ]; } + if ( flags & (1 << 14) ) { + c->sort[ 14*(count + 1) + k ].i = k; + c->sort[ 14*(count + 1) + k ].d = parts[k].dt; + } } /* Add the sentinel and sort. */ - for ( j = 0 ; j < 13 ; j++ ) + for ( j = 0 ; j < 14 ; j++ ) if ( flags & (1 << j) ) { c->sort[ j*(count + 1) + c->count ].d = FLT_MAX; c->sort[ j*(count + 1) + c->count ].i = 0; @@ -505,7 +384,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) { p->h_dt = 0.0f; /* Compute this particle's time step. */ - p->dt = const_cfl * p->h / ( const_gamma * ( const_gamma - 1.0f ) * p->u ); + p->dt = const_cfl * p->h / sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u ); /* Compute the pressure. */ // p->P = p->rho * p->u * ( const_gamma - 1.0f ); diff --git a/src/runner.h b/src/runner.h index 548165edf8a8913065c4629e1de30de04ff128eb..4b21648853fe9af962c4fde325abccf260c33690 100644 --- a/src/runner.h +++ b/src/runner.h @@ -88,8 +88,8 @@ extern int runner_counter[ runner_counter_count ]; /* Histogram functions. */ #define runner_hist_a 1.0 -#define runner_hist_b 10.0 -#define runner_hist_N 99 +#define runner_hist_b 1000.0 +#define runner_hist_N 100 long long int runner_hist_bins[ runner_hist_N ]; #define runner_hist_hit( x ) __sync_add_and_fetch( &runner_hist_bins[ (int)fmax( 0.0 , fmin( runner_hist_N-1 , ((x) - runner_hist_a) / (runner_hist_b - runner_hist_a) * runner_hist_N ) ) ] , 1 ) diff --git a/src/space.c b/src/space.c index 52c582057086715cec098bf237e94d48008519a2..b43203c97dbe05728b076c34f9e06a00f1fab207 100644 --- a/src/space.c +++ b/src/space.c @@ -76,6 +76,60 @@ const int sortlistID[27] = { }; +/** + * @brief Sort the particles according to the given indices. + * + * @param parts The list of #part + * @param ind The indices with respect to which the parts are sorted. + * @param N The number of parts + * @param min Lowest index. + * @param max highest index. + * + * This function calls itself recursively. + */ + +void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) { + + int pivot = (min + max) / 2; + int i = 0, j = N-1; + int temp_i; + struct part temp_p; + + /* One pass of quicksort. */ + while ( i < j ) { + while ( i < N && ind[i] <= pivot ) + i++; + while ( j >= 0 && ind[j] > pivot ) + j--; + if ( i < j ) { + temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i; + temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p; + } + } + + /* Verify sort. */ + for ( int k = 0 ; k <= j ; k++ ) + if ( ind[k] > pivot ) { + printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N ); + error( "Sorting failed (<=pivot)." ); + } + for ( int k = j+1 ; k < N ; k++ ) + if ( ind[k] <= pivot ) { + printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N ); + error( "Sorting failed (>pivot)." ); + } + + /* Recurse on the left? */ + if ( j > 0 && pivot > min ) + parts_sort( parts , ind , j+1 , min , pivot ); + + /* Recurse on the right? */ + if ( i < N && pivot+1 < max ) + parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); + + } + + /** * @brief Mapping function to free the sorted indices buffers. */ @@ -361,7 +415,7 @@ void space_splittasks ( struct space *s ) { hj = fmax( cj->h[0] , fmax( cj->h[1] , cj->h[2] ) ); /* Should this task be split-up? */ - if ( ci->split && cj->split && ci->h_max < hi/2 && cj->h_max < hj/2 ) { + if ( ci->split && cj->split && ci->h_max*space_stretch < hi/2 && cj->h_max*space_stretch < hj/2 ) { /* Get the relative distance between the pairs, wrapping. */ for ( k = 0 ; k < 3 ; k++ ) { @@ -816,7 +870,7 @@ void space_maketasks ( struct space *s , int do_sort ) { void maketasks_rec ( struct cell *c , struct task *sort_up[] , int nr_sort_up , struct cell *parent ) { int j, k, nr_sort = 0; - struct task *sort[14], *t; + struct task *sort[7], *t; /* Clear the waits on this cell. */ c->wait = 0; @@ -827,38 +881,29 @@ void space_maketasks ( struct space *s , int do_sort ) { if ( do_sort ) { if ( c->count < 1000 ) { - sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x1fff , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x3fff , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); for ( k = 0 ; k < 13 ; k++ ) c->sorts[k] = sort[0]; nr_sort = 1; } else if ( c->count < 5000 ) { - sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0xf , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0xf0 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - sort[2] = space_addtask( s , task_type_sort , task_subtype_none , 0x1f00 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - for ( k = 0 ; k < 4 ; k++ ) + sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x7f , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x3f80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + for ( k = 0 ; k < 7 ; k++ ) c->sorts[k] = sort[0]; - for ( k = 4 ; k < 8 ; k++ ) + for ( k = 7 ; k < 14 ; k++ ) c->sorts[k] = sort[1]; - for ( k = 8 ; k < 13 ; k++ ) - c->sorts[k] = sort[2]; - nr_sort = 3; + nr_sort = 2; } else { - c->sorts[0] = sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x1 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[1] = sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x2 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[2] = sort[2] = space_addtask( s , task_type_sort , task_subtype_none , 0x4 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[3] = sort[3] = space_addtask( s , task_type_sort , task_subtype_none , 0x8 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[4] = sort[4] = space_addtask( s , task_type_sort , task_subtype_none , 0x10 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[5] = sort[5] = space_addtask( s , task_type_sort , task_subtype_none , 0x20 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[6] = sort[6] = space_addtask( s , task_type_sort , task_subtype_none , 0x40 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[7] = sort[7] = space_addtask( s , task_type_sort , task_subtype_none , 0x80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[8] = sort[8] = space_addtask( s , task_type_sort , task_subtype_none , 0x100 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[9] = sort[9] = space_addtask( s , task_type_sort , task_subtype_none , 0x200 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[10] = sort[10] = space_addtask( s , task_type_sort , task_subtype_none , 0x400 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[11] = sort[11] = space_addtask( s , task_type_sort , task_subtype_none , 0x800 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[12] = sort[12] = space_addtask( s , task_type_sort , task_subtype_none , 0x1000 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - nr_sort = 13; + c->sorts[0] = c->sorts[1] = sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x1 + 0x2 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + c->sorts[2] = c->sorts[3] = sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x4 + 0x8 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + c->sorts[4] = c->sorts[5] = sort[2] = space_addtask( s , task_type_sort , task_subtype_none , 0x10 + 0x20 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + c->sorts[6] = c->sorts[7] = sort[3] = space_addtask( s , task_type_sort , task_subtype_none , 0x40 + 0x80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + c->sorts[8] = c->sorts[9] = sort[4] = space_addtask( s , task_type_sort , task_subtype_none , 0x100 + 0x200 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + c->sorts[10] = c->sorts[11] = sort[5] = space_addtask( s , task_type_sort , task_subtype_none , 0x400 + 0x800 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + c->sorts[12] = c->sorts[13] = sort[6] = space_addtask( s , task_type_sort , task_subtype_none , 0x1000 + 0x2000 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + nr_sort = 7; } } @@ -1242,7 +1287,8 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , int nr_cells, cdim[3]; double h_min, h_max, h[3], ih[3]; struct cell *c, *cells; - struct part *parts_new, *finger; + struct part *finger; + int *ind; /* Get the minimum and maximum cutoff radii. */ @@ -1253,6 +1299,9 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , else if ( parts[k].h > h_max ) h_max = parts[k].h; + /* Stretch the maximum smoothing length. */ + h_max *= space_stretch; + /* Get the cell width. */ if ( h_cells < h_max ) h_cells = h_max; @@ -1280,31 +1329,29 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , c->h[0] = h[0]; c->h[1] = h[1]; c->h[2] = h[2]; } - /* Run through the particles and get the counts for each cell. */ - for ( k = 0 ; k < N ; k++ ) - cells[ cell_getid( cdim , parts[k].x[0]*ih[0] , parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] ) ].count += 1; + /* Run through the particles and get their cell index. */ + ind = (int *)alloca( sizeof(int) * N ); + for ( k = 0 ; k < N ; k++ ) { + ind[k] = cell_getid( cdim , parts[k].x[0]*ih[0] , parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] ); + cells[ ind[k] ].count += 1; + } - /* Allocate the new part buffer and set the part pointers in each cell. */ - if ( posix_memalign( (void *)&parts_new , 64 , N * sizeof(struct part) ) != 0 ) - error( "Failed to allocate parts." ); - for ( finger = parts_new , k = 0 ; k < nr_cells ; k++ ) { + /* Sort the parts according to their cells. */ + parts_sort( parts , ind , N , 0 , nr_cells ); + + /* Hook the cells up to the parts. */ + for ( finger = parts , k = 0 ; k < nr_cells ; k++ ) { c = &cells[ k ]; c->parts = finger; finger = &finger[ c->count ]; - c->count = 0; - } - for ( k = 0 ; k < N ; k++ ) { - c = &cells[ cell_getid( cdim , parts[k].x[0]*ih[0] , parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] ) ]; - c->parts[ c->count ] = parts[k]; - c->count += 1; } /* Store eveything in the space. */ s->h_min = h_min; s->h_max = h_max; s->dim[0] = dim[0]; s->dim[1] = dim[1]; s->dim[2] = dim[2]; s->periodic = periodic; - s->parts = parts_new; s->nr_parts = N; + s->parts = parts; s->h[0] = h[0]; s->h[1] = h[1]; s->h[2] = h[2]; s->ih[0] = ih[0]; s->ih[1] = ih[1]; s->ih[2] = ih[2]; s->cdim[0] = cdim[0]; s->cdim[1] = cdim[1]; s->cdim[2] = cdim[2]; diff --git a/src/space.h b/src/space.h index e358744474a9650b07abbc6ef7ca5be0b873dd40..32219c8a4ff89d1d7591e02f905a46a29c7a63b7 100644 --- a/src/space.h +++ b/src/space.h @@ -26,6 +26,7 @@ #define space_splitratio 0.875 #define space_splitsize_default 400 #define space_dosub 1 +#define space_stretch 1.0 /* Convert cell location to ID. */ @@ -57,6 +58,9 @@ struct space { /* The minimum and maximum cutoff radii. */ double h_min, h_max; + /* Current time step for particles. */ + float dt; + /* Number of cells. */ int nr_cells, tot_cells;