Commit 14b47f55 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

better reporting of partition stuff.


Former-commit-id: 0f606eea6be16d026b110ca8d5f855bfd066c2fe
parent e976799c
...@@ -94,6 +94,7 @@ void engine_redistribute ( struct engine *e ) { ...@@ -94,6 +94,7 @@ void engine_redistribute ( struct engine *e ) {
int my_cells = 0; int my_cells = 0;
int *cdim = s->cdim; int *cdim = s->cdim;
struct cell *cells = s->cells; struct cell *cells = s->cells;
int nr_cells = s->nr_cells;
/* Start by sorting the particles according to their nodes and /* Start by sorting the particles according to their nodes and
getting the counts. */ getting the counts. */
...@@ -175,6 +176,9 @@ void engine_redistribute ( struct engine *e ) { ...@@ -175,6 +176,9 @@ void engine_redistribute ( struct engine *e ) {
s->size_parts = 2*nr_parts; s->size_parts = 2*nr_parts;
/* Be verbose about what just happened. */ /* Be verbose about what just happened. */
for ( k = 0 ; k < nr_cells ; k++ )
if ( cells[k].nodeID == nodeID )
my_cells += 1;
message( "node %i now has %i parts in %i cells." , nodeID , nr_parts , my_cells ); message( "node %i now has %i parts in %i cells." , nodeID , nr_parts , my_cells );
/* Clean up other stuff. */ /* Clean up other stuff. */
...@@ -249,8 +253,12 @@ void engine_repartition ( struct engine *e ) { ...@@ -249,8 +253,12 @@ void engine_repartition ( struct engine *e ) {
} }
/* Init the weights arrays. */ /* Init the weights arrays. */
bzero( weights_e , sizeof(idx_t) * 26*nr_cells ); /* bzero( weights_e , sizeof(idx_t) * 26*nr_cells );
bzero( weights_v , sizeof(idx_t) * nr_cells ); bzero( weights_v , sizeof(idx_t) * nr_cells ); */
for ( k = 0 ; k < 26*nr_nodes ; k++ )
weights_e[k] = 1;
for ( k = 0 ; k < nr_nodes ; k++ )
weights_v[k] = 1;
/* Loop over the tasks... */ /* Loop over the tasks... */
for ( j = 0 ; j < e->sched.nr_tasks ; j++ ) { for ( j = 0 ; j < e->sched.nr_tasks ; j++ ) {
......
...@@ -27,7 +27,38 @@ ...@@ -27,7 +27,38 @@
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type #define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
/* So what will the vector size be? */ /* So what will the vector size be? */
#ifdef __AVX__ #ifdef __MIC__
#define VECTORIZE
#define VEC_HAVE_GATHER
#define VEC_SIZE 16
#define VEC_FLOAT __m512
#define VEC_DBL __m512d
#define VEC_INT __m512i
#define vec_load(a) _mm512_load_ps(a)
#define vec_set1(a) _mm512_set1_ps(a)
#define vec_set(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) _mm512_set_ps(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a)
#define vec_dbl_set(a,b,c,d,e,f,g,h) _mm512_set_pd(h,g,f,e,d,c,b,a)
#define vec_sqrt(a) _mm512_sqrt_ps(a)
#define vec_rcp(a) _mm512_rcp_ps(a)
#define vec_rsqrt(a) _mm512_rsqrt_ps(a)
#define vec_ftoi(a) _mm512_cvttps_epi32(a)
#define vec_fmin(a,b) _mm512_min_ps(a,b)
#define vec_fmax(a,b) _mm512_max_ps(a,b)
#define vec_fabs(a) _mm512_andnot_ps(_mm512_set1_ps(-0.f), a)
#define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a,0))
#define vec_todbl_hi(a) _mm512_cvtps_pd(_mm512_extract128_ps(a,1))
#define vec_dbl_tofloat(a,b) _mm512_insertf128( _mm512_castps128_ps512(a) , b , 1 )
#define vec_dbl_load(a) _mm512_load_pd(a)
#define vec_dbl_set1(a) _mm512_set1_pd(a)
#define vec_dbl_sqrt(a) _mm512_sqrt_pd(a)
#define vec_dbl_rcp(a) _mm512_rcp_pd(a)
#define vec_dbl_rsqrt(a) _mm512_rsqrt_pd(a)
#define vec_dbl_ftoi(a) _mm512_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm512_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm512_max_pd(a,b)
#define vec_getoffsets(ptrs) _mm512_insertf64x4( _mm512_insertf64x4( _mm512_setzero_pd() , _mm512_cvtepi64_epi32( _mm512_load_epi64(ptrs) - _mm512_set1_epi64(ptrs[0]) ) , 0 ) , _mm512_cvtepi64_epi32( _mm512_load_epi64(&ptrs[4]) - _mm512_set1_epi64(ptrs[0]) ) , 1 )
#define vec_gather(base,offsets) _mm512_i32gather_ps( offsets.m , base , 1 )
#elif defined(__AVX__)
#define VECTORIZE #define VECTORIZE
#define VEC_SIZE 8 #define VEC_SIZE 8
#define VEC_FLOAT __m256 #define VEC_FLOAT __m256
...@@ -55,6 +86,10 @@ ...@@ -55,6 +86,10 @@
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a) #define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm256_min_pd(a,b) #define vec_dbl_fmin(a,b) _mm256_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm256_max_pd(a,b) #define vec_dbl_fmax(a,b) _mm256_max_pd(a,b)
#ifdef __AVX2__
#define VEC_HAVE_GATHER
#define vec_gather(base,offsets) _mm256_i32gather_ps( base , offsets.m , 1 )
#endif
#elif defined( __SSE2__ ) #elif defined( __SSE2__ )
#define VECTORIZE #define VECTORIZE
#define VEC_SIZE 4 #define VEC_SIZE 4
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment