Skip to content
Snippets Groups Projects
Commit c4fde7a7 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

added lowest-level sorting of particles, minor fixes to the ic-reader, and...

added lowest-level sorting of particles, minor fixes to the ic-reader, and extension of the copyright statement.


Former-commit-id: 8f1de71456a8c10e009da2d48cbc7683106418c4
parent 79e36b80
Branches
Tags
No related merge requests found
...@@ -19,7 +19,9 @@ ...@@ -19,7 +19,9 @@
AUTOMAKE_OPTIONS=gnu AUTOMAKE_OPTIONS=gnu
# Add the debug flag to the whole thing # Add the debug flag to the whole thing
AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing $(SIMD_FLAGS) $(CFLAGS) $(OPENMP_CFLAGS) -DTIMER -DCOUNTER -DCPU_TPS=2.67e9 AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
-funroll-loops $(SIMD_FLAGS) $(CFLAGS) $(OPENMP_CFLAGS) \
-DTIMER -DCOUNTER -DCPU_TPS=2.67e9 -DHIST
# Assign a "safe" version number # Assign a "safe" version number
AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0
......
...@@ -17,7 +17,8 @@ ...@@ -17,7 +17,8 @@
* *
******************************************************************************/ ******************************************************************************/
/* Some constants. */
#define cell_sid_dt 13
/* The queue timers. */ /* The queue timers. */
enum { enum {
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include "cell.h" #include "cell.h"
#include "space.h" #include "space.h"
#include "queue.h" #include "queue.h"
#include "runner.h"
#include "runner_iact.h" #include "runner_iact.h"
#include "engine.h" #include "engine.h"
#include "runner.h"
#include "ic.h" #include "ic.h"
/******************************************************************************* /*******************************************************************************
* This file is part of GadgetSMP. * This file is part of GadgetSMP.
* Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
* Matthieu Schaller (matthieu.schaller@durham.ac.uk).
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published * it under the terms of the GNU Lesser General Public License as published
...@@ -292,7 +293,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int* ...@@ -292,7 +293,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
readArray(h_grp, "Mass", FLOAT, *N, 1, *parts, mass); readArray(h_grp, "Mass", FLOAT, *N, 1, *parts, mass);
readArray(h_grp, "SmoothingLength", FLOAT, *N, 1, *parts, h); readArray(h_grp, "SmoothingLength", FLOAT, *N, 1, *parts, h);
readArray(h_grp, "InternalEnergy", FLOAT, *N, 1, *parts, u); readArray(h_grp, "InternalEnergy", FLOAT, *N, 1, *parts, u);
readArray(h_grp, "ParticleIDs", ULONG, *N, 1, *parts, id); readArray(h_grp, "ParticleIDs", ULONGLONG, *N, 1, *parts, id);
/* Close particle group */ /* Close particle group */
H5Gclose(h_grp); H5Gclose(h_grp);
......
/******************************************************************************* /*******************************************************************************
* This file is part of GadgetSMP. * This file is part of GadgetSMP.
* Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) * Coypright (c) 2012 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published * it under the terms of the GNU Lesser General Public License as published
......
...@@ -31,13 +31,13 @@ struct part { ...@@ -31,13 +31,13 @@ struct part {
float h; float h;
/* Particle time-step. */ /* Particle time-step. */
int dt; float dt;
/* Particle mass. */ /* Particle mass. */
float mass; float mass;
/* Particle ID. */ /* Particle ID. */
unsigned long id; unsigned long long id;
/* Particle position. */ /* Particle position. */
double x[3]; double x[3];
......
...@@ -161,64 +161,6 @@ void runner_dosort_ascending ( struct entry *sort , int N ) { ...@@ -161,64 +161,6 @@ void runner_dosort_ascending ( struct entry *sort , int N ) {
} }
/**
* @brief inline helper fuction to merge two entry arrays (forward).
*
* @param one the first array
* @param none the length of the first array
* @param two the second array
* @param ntwo the length of the second array
* @param dest the destination array.
*/
inline void merge_forward ( struct entry *__restrict__ one , int none , struct entry *__restrict__ two , int ntwo , struct entry *__restrict__ dest ) {
int i = 0, j = 0, k = 0;
while ( j < none && k < ntwo )
if ( one[j].d < two[k].d )
dest[i++] = one[j++];
else
dest[i++] = two[k++];
if ( j == none )
for ( ; k < ntwo ; k++ )
dest[i++] = two[k];
else
for ( ; j < none ; j++ )
dest[i++] = one[j];
}
/**
* @brief inline helper fuction to merge two entry arrays (forward).
*
* @param one the first array
* @param none the length of the first array
* @param two the second array
* @param ntwo the length of the second array
* @param dest the destination array.
*/
inline void merge_backward ( struct entry *__restrict__ one , int none , struct entry *__restrict__ two , int ntwo , struct entry *__restrict__ dest ) {
int i = none + ntwo - 1, j = none - 1, k = ntwo - 1;
while ( j >= 0 && k >= 0 )
if ( one[j].d > two[k].d )
dest[i--] = one[j--];
else
dest[i--] = two[k--];
if ( j < 0 )
for ( ; k >= 0 ; k-- )
dest[i--] = two[k];
else
for ( ; j >= 0 ; j-- )
dest[i--] = one[j];
}
/** /**
* @brief Sort the particles in the given cell along all cardinal directions. * @brief Sort the particles in the given cell along all cardinal directions.
* *
...@@ -232,7 +174,6 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -232,7 +174,6 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
struct entry *fingers[8]; struct entry *fingers[8];
struct part *parts = c->parts; struct part *parts = c->parts;
int j, k, count = c->count; int j, k, count = c->count;
int cone, ctwo;
int i, ind, off[8], inds[8], temp_i; int i, ind, off[8], inds[8], temp_i;
// float shift[3]; // float shift[3];
float buff[8], px[3]; float buff[8], px[3];
...@@ -248,7 +189,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -248,7 +189,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
if ( lock_lock( &c->lock ) != 0 ) if ( lock_lock( &c->lock ) != 0 )
error( "Failed to lock cell." ); error( "Failed to lock cell." );
if ( c->sort == NULL ) if ( c->sort == NULL )
if ( ( c->sort = (struct entry *)malloc( sizeof(struct entry) * (c->count + 1) * 13 ) ) == NULL ) if ( ( c->sort = (struct entry *)malloc( sizeof(struct entry) * (c->count + 1) * 14 ) ) == NULL )
error( "Failed to allocate sort memory." ); error( "Failed to allocate sort memory." );
if ( lock_unlock( &c->lock ) != 0 ) if ( lock_unlock( &c->lock ) != 0 )
error( "Failed to unlock cell." ); error( "Failed to unlock cell." );
...@@ -256,90 +197,57 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -256,90 +197,57 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
/* Does this cell have any progeny? */ /* Does this cell have any progeny? */
if ( c->split ) { if ( c->split ) {
/* Loop over the 13 different sort arrays. */ /* Loop over the 13+1 different sort arrays. */
for ( j = 0 ; j < 13 ; j++ ) { for ( j = 0 ; j < 14 ; j++ ) {
/* Has this sort array been flagged? */ /* Has this sort array been flagged? */
if ( !( flags & (1 << j) ) ) if ( !( flags & (1 << j) ) )
continue; continue;
if ( 0 ) { /* Init the particle index offsets. */
for ( off[0] = 0 , k = 1 ; k < 8 ; k++ )
/* Get a finger on the sorting array. */ if ( c->progeny[k-1] != NULL )
finger = &c->sort[ j*(count + 1) ]; off[k] = off[k-1] + c->progeny[k-1]->count;
else
/* Merge the two first sub-cells forward into this cell. */ off[k] = off[k-1];
cone = c->progeny[0]->count;
ctwo = c->progeny[1]->count; /* Init the entries and indices. */
merge_forward( &c->progeny[0]->sort[ j*(cone + 1) ] , cone , for ( k = 0 ; k < 8 ; k++ ) {
&c->progeny[1]->sort[ j*(ctwo + 1) ] , ctwo , inds[k] = k;
finger ); if ( c->progeny[k] != NULL && c->progeny[k]->count > 0 ) {
fingers[k] = &c->progeny[k]->sort[ j*(c->progeny[k]->count + 1) ];
/* Merge-in the remaining arrays, alternating forward and buff[k] = fingers[k]->d;
backward merges. */ off[k] = off[k];
for ( k = 2 ; k < 8 ; k++ ) {
cone = cone + ctwo;
ctwo = c->progeny[k]->count;
if ( k & 1 )
merge_forward( &finger[ count - cone ] , cone ,
&c->progeny[k]->sort[ j*(ctwo + 1) ] , ctwo ,
finger );
else
merge_backward( finger , cone ,
&c->progeny[k]->sort[ j*(ctwo + 1) ] , ctwo ,
&finger[ count - cone - ctwo ] );
} }
else
buff[k] = FLT_MAX;
} }
else {
/* Init the particle index offsets. */
for ( off[0] = 0 , k = 1 ; k < 8 ; k++ )
if ( c->progeny[k-1] != NULL )
off[k] = off[k-1] + c->progeny[k-1]->count;
else
off[k] = off[k-1];
/* Init the entries and indices. */
for ( k = 0 ; k < 8 ; k++ ) {
inds[k] = k;
if ( c->progeny[k] != NULL && c->progeny[k]->count > 0 ) {
fingers[k] = &c->progeny[k]->sort[ j*(c->progeny[k]->count + 1) ];
buff[k] = fingers[k]->d;
off[k] = off[k];
}
else
buff[k] = FLT_MAX;
}
/* Sort the buffer. */ /* Sort the buffer. */
for ( i = 0 ; i < 7 ; i++ ) for ( i = 0 ; i < 7 ; i++ )
for ( k = i+1 ; k < 8 ; k++ ) for ( k = i+1 ; k < 8 ; k++ )
if ( buff[ inds[k] ] < buff[ inds[i] ] ) { if ( buff[ inds[k] ] < buff[ inds[i] ] ) {
temp_i = inds[i]; inds[i] = inds[k]; inds[k] = temp_i; temp_i = inds[i]; inds[i] = inds[k]; inds[k] = temp_i;
} }
/* For each entry in the new sort list. */ /* For each entry in the new sort list. */
finger = &c->sort[ j*(count + 1) ]; finger = &c->sort[ j*(count + 1) ];
for ( ind = 0 ; ind < count ; ind++ ) { for ( ind = 0 ; ind < count ; ind++ ) {
/* Copy the minimum into the new sort array. */ /* Copy the minimum into the new sort array. */
finger[ind].d = buff[inds[0]]; finger[ind].d = buff[inds[0]];
finger[ind].i = fingers[inds[0]]->i + off[inds[0]]; finger[ind].i = fingers[inds[0]]->i + off[inds[0]];
/* Update the buffer. */ /* Update the buffer. */
fingers[inds[0]] += 1; fingers[inds[0]] += 1;
buff[inds[0]] = fingers[inds[0]]->d; buff[inds[0]] = fingers[inds[0]]->d;
/* Find the smallest entry. */ /* Find the smallest entry. */
for ( k = 1 ; k < 8 && buff[inds[k]] < buff[inds[k-1]] ; k++ ) { for ( k = 1 ; k < 8 && buff[inds[k]] < buff[inds[k-1]] ; k++ ) {
temp_i = inds[k-1]; inds[k-1] = inds[k]; inds[k] = temp_i; temp_i = inds[k-1]; inds[k-1] = inds[k]; inds[k] = temp_i;
} }
} /* Merge. */ } /* Merge. */
}
/* Add a sentinel. */ /* Add a sentinel. */
c->sort[ j*(c->count + 1) + c->count ].d = FLT_MAX; c->sort[ j*(c->count + 1) + c->count ].d = FLT_MAX;
...@@ -349,39 +257,6 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -349,39 +257,6 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
} /* progeny? */ } /* progeny? */
/* Otherwise, just sort. */
// else {
//
// /* Loop over the different cell axes. */
// for ( j = 0 ; j < 13 ; j++ ) {
//
// /* Has this sort array been flagged? */
// if ( !( flags & (1 << j) ) )
// continue;
//
// /* Get the shift vector. */
// shift[0] = runner_shift[ 3*j + 0 ];
// shift[1] = runner_shift[ 3*j + 1 ];
// shift[2] = runner_shift[ 3*j + 2 ];
//
// /* Fill the sort array. */
// finger = &c->sort[ j*(count + 1) ];
// for ( k = 0 ; k < count ; k++ ) {
// finger[k].i = k;
// finger[k].d = parts[k].x[0]*shift[0] + parts[k].x[1]*shift[1] + parts[k].x[2]*shift[2];
// }
//
// /* Add the sentinel. */
// finger[ c->count ].d = FLT_MAX;
// finger[ c->count ].i = 0;
//
// /* Sort descending. */
// runner_dosort_ascending( finger , c->count );
//
// }
//
// }
/* Otherwise, just sort. */ /* Otherwise, just sort. */
else { else {
...@@ -395,10 +270,14 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -395,10 +270,14 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
c->sort[ j*(count + 1) + k].i = k; c->sort[ j*(count + 1) + k].i = k;
c->sort[ j*(count + 1) + k].d = px[0]*runner_shift[ 3*j + 0 ] + px[1]*runner_shift[ 3*j + 1 ] + px[2]*runner_shift[ 3*j + 2 ]; c->sort[ j*(count + 1) + k].d = px[0]*runner_shift[ 3*j + 0 ] + px[1]*runner_shift[ 3*j + 1 ] + px[2]*runner_shift[ 3*j + 2 ];
} }
if ( flags & (1 << 14) ) {
c->sort[ 14*(count + 1) + k ].i = k;
c->sort[ 14*(count + 1) + k ].d = parts[k].dt;
}
} }
/* Add the sentinel and sort. */ /* Add the sentinel and sort. */
for ( j = 0 ; j < 13 ; j++ ) for ( j = 0 ; j < 14 ; j++ )
if ( flags & (1 << j) ) { if ( flags & (1 << j) ) {
c->sort[ j*(count + 1) + c->count ].d = FLT_MAX; c->sort[ j*(count + 1) + c->count ].d = FLT_MAX;
c->sort[ j*(count + 1) + c->count ].i = 0; c->sort[ j*(count + 1) + c->count ].i = 0;
...@@ -505,7 +384,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) { ...@@ -505,7 +384,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
p->h_dt = 0.0f; p->h_dt = 0.0f;
/* Compute this particle's time step. */ /* Compute this particle's time step. */
p->dt = const_cfl * p->h / ( const_gamma * ( const_gamma - 1.0f ) * p->u ); p->dt = const_cfl * p->h / sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u );
/* Compute the pressure. */ /* Compute the pressure. */
// p->P = p->rho * p->u * ( const_gamma - 1.0f ); // p->P = p->rho * p->u * ( const_gamma - 1.0f );
......
...@@ -88,8 +88,8 @@ extern int runner_counter[ runner_counter_count ]; ...@@ -88,8 +88,8 @@ extern int runner_counter[ runner_counter_count ];
/* Histogram functions. */ /* Histogram functions. */
#define runner_hist_a 1.0 #define runner_hist_a 1.0
#define runner_hist_b 10.0 #define runner_hist_b 1000.0
#define runner_hist_N 99 #define runner_hist_N 100
long long int runner_hist_bins[ runner_hist_N ]; long long int runner_hist_bins[ runner_hist_N ];
#define runner_hist_hit( x ) __sync_add_and_fetch( &runner_hist_bins[ (int)fmax( 0.0 , fmin( runner_hist_N-1 , ((x) - runner_hist_a) / (runner_hist_b - runner_hist_a) * runner_hist_N ) ) ] , 1 ) #define runner_hist_hit( x ) __sync_add_and_fetch( &runner_hist_bins[ (int)fmax( 0.0 , fmin( runner_hist_N-1 , ((x) - runner_hist_a) / (runner_hist_b - runner_hist_a) * runner_hist_N ) ) ] , 1 )
......
...@@ -76,6 +76,60 @@ const int sortlistID[27] = { ...@@ -76,6 +76,60 @@ const int sortlistID[27] = {
}; };
/**
* @brief Sort the particles according to the given indices.
*
* @param parts The list of #part
* @param ind The indices with respect to which the parts are sorted.
* @param N The number of parts
* @param min Lowest index.
* @param max highest index.
*
* This function calls itself recursively.
*/
void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) {
int pivot = (min + max) / 2;
int i = 0, j = N-1;
int temp_i;
struct part temp_p;
/* One pass of quicksort. */
while ( i < j ) {
while ( i < N && ind[i] <= pivot )
i++;
while ( j >= 0 && ind[j] > pivot )
j--;
if ( i < j ) {
temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i;
temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p;
}
}
/* Verify sort. */
for ( int k = 0 ; k <= j ; k++ )
if ( ind[k] > pivot ) {
printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
error( "Sorting failed (<=pivot)." );
}
for ( int k = j+1 ; k < N ; k++ )
if ( ind[k] <= pivot ) {
printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
error( "Sorting failed (>pivot)." );
}
/* Recurse on the left? */
if ( j > 0 && pivot > min )
parts_sort( parts , ind , j+1 , min , pivot );
/* Recurse on the right? */
if ( i < N && pivot+1 < max )
parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
}
/** /**
* @brief Mapping function to free the sorted indices buffers. * @brief Mapping function to free the sorted indices buffers.
*/ */
...@@ -361,7 +415,7 @@ void space_splittasks ( struct space *s ) { ...@@ -361,7 +415,7 @@ void space_splittasks ( struct space *s ) {
hj = fmax( cj->h[0] , fmax( cj->h[1] , cj->h[2] ) ); hj = fmax( cj->h[0] , fmax( cj->h[1] , cj->h[2] ) );
/* Should this task be split-up? */ /* Should this task be split-up? */
if ( ci->split && cj->split && ci->h_max < hi/2 && cj->h_max < hj/2 ) { if ( ci->split && cj->split && ci->h_max*space_stretch < hi/2 && cj->h_max*space_stretch < hj/2 ) {
/* Get the relative distance between the pairs, wrapping. */ /* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
...@@ -816,7 +870,7 @@ void space_maketasks ( struct space *s , int do_sort ) { ...@@ -816,7 +870,7 @@ void space_maketasks ( struct space *s , int do_sort ) {
void maketasks_rec ( struct cell *c , struct task *sort_up[] , int nr_sort_up , struct cell *parent ) { void maketasks_rec ( struct cell *c , struct task *sort_up[] , int nr_sort_up , struct cell *parent ) {
int j, k, nr_sort = 0; int j, k, nr_sort = 0;
struct task *sort[14], *t; struct task *sort[7], *t;
/* Clear the waits on this cell. */ /* Clear the waits on this cell. */
c->wait = 0; c->wait = 0;
...@@ -827,38 +881,29 @@ void space_maketasks ( struct space *s , int do_sort ) { ...@@ -827,38 +881,29 @@ void space_maketasks ( struct space *s , int do_sort ) {
if ( do_sort ) { if ( do_sort ) {
if ( c->count < 1000 ) { if ( c->count < 1000 ) {
sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x1fff , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x3fff , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
for ( k = 0 ; k < 13 ; k++ ) for ( k = 0 ; k < 13 ; k++ )
c->sorts[k] = sort[0]; c->sorts[k] = sort[0];
nr_sort = 1; nr_sort = 1;
} }
else if ( c->count < 5000 ) { else if ( c->count < 5000 ) {
sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0xf , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x7f , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0xf0 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x3f80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
sort[2] = space_addtask( s , task_type_sort , task_subtype_none , 0x1f00 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); for ( k = 0 ; k < 7 ; k++ )
for ( k = 0 ; k < 4 ; k++ )
c->sorts[k] = sort[0]; c->sorts[k] = sort[0];
for ( k = 4 ; k < 8 ; k++ ) for ( k = 7 ; k < 14 ; k++ )
c->sorts[k] = sort[1]; c->sorts[k] = sort[1];
for ( k = 8 ; k < 13 ; k++ ) nr_sort = 2;
c->sorts[k] = sort[2];
nr_sort = 3;
} }
else { else {
c->sorts[0] = sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x1 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); c->sorts[0] = c->sorts[1] = sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x1 + 0x2 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[1] = sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x2 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); c->sorts[2] = c->sorts[3] = sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x4 + 0x8 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[2] = sort[2] = space_addtask( s , task_type_sort , task_subtype_none , 0x4 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); c->sorts[4] = c->sorts[5] = sort[2] = space_addtask( s , task_type_sort , task_subtype_none , 0x10 + 0x20 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[3] = sort[3] = space_addtask( s , task_type_sort , task_subtype_none , 0x8 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); c->sorts[6] = c->sorts[7] = sort[3] = space_addtask( s , task_type_sort , task_subtype_none , 0x40 + 0x80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[4] = sort[4] = space_addtask( s , task_type_sort , task_subtype_none , 0x10 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); c->sorts[8] = c->sorts[9] = sort[4] = space_addtask( s , task_type_sort , task_subtype_none , 0x100 + 0x200 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[5] = sort[5] = space_addtask( s , task_type_sort , task_subtype_none , 0x20 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); c->sorts[10] = c->sorts[11] = sort[5] = space_addtask( s , task_type_sort , task_subtype_none , 0x400 + 0x800 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[6] = sort[6] = space_addtask( s , task_type_sort , task_subtype_none , 0x40 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); c->sorts[12] = c->sorts[13] = sort[6] = space_addtask( s , task_type_sort , task_subtype_none , 0x1000 + 0x2000 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[7] = sort[7] = space_addtask( s , task_type_sort , task_subtype_none , 0x80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); nr_sort = 7;
c->sorts[8] = sort[8] = space_addtask( s , task_type_sort , task_subtype_none , 0x100 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[9] = sort[9] = space_addtask( s , task_type_sort , task_subtype_none , 0x200 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[10] = sort[10] = space_addtask( s , task_type_sort , task_subtype_none , 0x400 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[11] = sort[11] = space_addtask( s , task_type_sort , task_subtype_none , 0x800 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
c->sorts[12] = sort[12] = space_addtask( s , task_type_sort , task_subtype_none , 0x1000 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
nr_sort = 13;
} }
} }
...@@ -1242,7 +1287,8 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , ...@@ -1242,7 +1287,8 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N ,
int nr_cells, cdim[3]; int nr_cells, cdim[3];
double h_min, h_max, h[3], ih[3]; double h_min, h_max, h[3], ih[3];
struct cell *c, *cells; struct cell *c, *cells;
struct part *parts_new, *finger; struct part *finger;
int *ind;
/* Get the minimum and maximum cutoff radii. */ /* Get the minimum and maximum cutoff radii. */
...@@ -1253,6 +1299,9 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , ...@@ -1253,6 +1299,9 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N ,
else if ( parts[k].h > h_max ) else if ( parts[k].h > h_max )
h_max = parts[k].h; h_max = parts[k].h;
/* Stretch the maximum smoothing length. */
h_max *= space_stretch;
/* Get the cell width. */ /* Get the cell width. */
if ( h_cells < h_max ) if ( h_cells < h_max )
h_cells = h_max; h_cells = h_max;
...@@ -1280,31 +1329,29 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , ...@@ -1280,31 +1329,29 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N ,
c->h[0] = h[0]; c->h[1] = h[1]; c->h[2] = h[2]; c->h[0] = h[0]; c->h[1] = h[1]; c->h[2] = h[2];
} }
/* Run through the particles and get the counts for each cell. */ /* Run through the particles and get their cell index. */
for ( k = 0 ; k < N ; k++ ) ind = (int *)alloca( sizeof(int) * N );
cells[ cell_getid( cdim , parts[k].x[0]*ih[0] , parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] ) ].count += 1; for ( k = 0 ; k < N ; k++ ) {
ind[k] = cell_getid( cdim , parts[k].x[0]*ih[0] , parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] );
cells[ ind[k] ].count += 1;
}
/* Allocate the new part buffer and set the part pointers in each cell. */ /* Sort the parts according to their cells. */
if ( posix_memalign( (void *)&parts_new , 64 , N * sizeof(struct part) ) != 0 ) parts_sort( parts , ind , N , 0 , nr_cells );
error( "Failed to allocate parts." );
for ( finger = parts_new , k = 0 ; k < nr_cells ; k++ ) { /* Hook the cells up to the parts. */
for ( finger = parts , k = 0 ; k < nr_cells ; k++ ) {
c = &cells[ k ]; c = &cells[ k ];
c->parts = finger; c->parts = finger;
finger = &finger[ c->count ]; finger = &finger[ c->count ];
c->count = 0;
}
for ( k = 0 ; k < N ; k++ ) {
c = &cells[ cell_getid( cdim , parts[k].x[0]*ih[0] , parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] ) ];
c->parts[ c->count ] = parts[k];
c->count += 1;
} }
/* Store eveything in the space. */ /* Store eveything in the space. */
s->h_min = h_min; s->h_max = h_max; s->h_min = h_min; s->h_max = h_max;
s->dim[0] = dim[0]; s->dim[1] = dim[1]; s->dim[2] = dim[2]; s->dim[0] = dim[0]; s->dim[1] = dim[1]; s->dim[2] = dim[2];
s->periodic = periodic; s->periodic = periodic;
s->parts = parts_new;
s->nr_parts = N; s->nr_parts = N;
s->parts = parts;
s->h[0] = h[0]; s->h[1] = h[1]; s->h[2] = h[2]; s->h[0] = h[0]; s->h[1] = h[1]; s->h[2] = h[2];
s->ih[0] = ih[0]; s->ih[1] = ih[1]; s->ih[2] = ih[2]; s->ih[0] = ih[0]; s->ih[1] = ih[1]; s->ih[2] = ih[2];
s->cdim[0] = cdim[0]; s->cdim[1] = cdim[1]; s->cdim[2] = cdim[2]; s->cdim[0] = cdim[0]; s->cdim[1] = cdim[1]; s->cdim[2] = cdim[2];
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#define space_splitratio 0.875 #define space_splitratio 0.875
#define space_splitsize_default 400 #define space_splitsize_default 400
#define space_dosub 1 #define space_dosub 1
#define space_stretch 1.0
/* Convert cell location to ID. */ /* Convert cell location to ID. */
...@@ -57,6 +58,9 @@ struct space { ...@@ -57,6 +58,9 @@ struct space {
/* The minimum and maximum cutoff radii. */ /* The minimum and maximum cutoff radii. */
double h_min, h_max; double h_min, h_max;
/* Current time step for particles. */
float dt;
/* Number of cells. */ /* Number of cells. */
int nr_cells, tot_cells; int nr_cells, tot_cells;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment