Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
14b47f55
Commit
14b47f55
authored
11 years ago
by
Pedro Gonnet
Browse files
Options
Downloads
Patches
Plain Diff
better reporting of partition stuff.
Former-commit-id: 0f606eea6be16d026b110ca8d5f855bfd066c2fe
parent
e976799c
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/engine.c
+10
-2
10 additions, 2 deletions
src/engine.c
src/vector.h
+36
-1
36 additions, 1 deletion
src/vector.h
with
46 additions
and
3 deletions
src/engine.c
+
10
−
2
View file @
14b47f55
...
...
@@ -94,6 +94,7 @@ void engine_redistribute ( struct engine *e ) {
int
my_cells
=
0
;
int
*
cdim
=
s
->
cdim
;
struct
cell
*
cells
=
s
->
cells
;
int
nr_cells
=
s
->
nr_cells
;
/* Start by sorting the particles according to their nodes and
getting the counts. */
...
...
@@ -175,6 +176,9 @@ void engine_redistribute ( struct engine *e ) {
s
->
size_parts
=
2
*
nr_parts
;
/* Be verbose about what just happened. */
for
(
k
=
0
;
k
<
nr_cells
;
k
++
)
if
(
cells
[
k
].
nodeID
==
nodeID
)
my_cells
+=
1
;
message
(
"node %i now has %i parts in %i cells."
,
nodeID
,
nr_parts
,
my_cells
);
/* Clean up other stuff. */
...
...
@@ -249,8 +253,12 @@ void engine_repartition ( struct engine *e ) {
}
/* Init the weights arrays. */
bzero
(
weights_e
,
sizeof
(
idx_t
)
*
26
*
nr_cells
);
bzero
(
weights_v
,
sizeof
(
idx_t
)
*
nr_cells
);
/* bzero( weights_e , sizeof(idx_t) * 26*nr_cells );
bzero( weights_v , sizeof(idx_t) * nr_cells ); */
for
(
k
=
0
;
k
<
26
*
nr_nodes
;
k
++
)
weights_e
[
k
]
=
1
;
for
(
k
=
0
;
k
<
nr_nodes
;
k
++
)
weights_v
[
k
]
=
1
;
/* Loop over the tasks... */
for
(
j
=
0
;
j
<
e
->
sched
.
nr_tasks
;
j
++
)
{
...
...
This diff is collapsed.
Click to expand it.
src/vector.h
+
36
−
1
View file @
14b47f55
...
...
@@ -27,7 +27,38 @@
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
/* So what will the vector size be? */
#ifdef __AVX__
#ifdef __MIC__
#define VECTORIZE
#define VEC_HAVE_GATHER
#define VEC_SIZE 16
#define VEC_FLOAT __m512
#define VEC_DBL __m512d
#define VEC_INT __m512i
#define vec_load(a) _mm512_load_ps(a)
#define vec_set1(a) _mm512_set1_ps(a)
#define vec_set(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) _mm512_set_ps(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a)
#define vec_dbl_set(a,b,c,d,e,f,g,h) _mm512_set_pd(h,g,f,e,d,c,b,a)
#define vec_sqrt(a) _mm512_sqrt_ps(a)
#define vec_rcp(a) _mm512_rcp_ps(a)
#define vec_rsqrt(a) _mm512_rsqrt_ps(a)
#define vec_ftoi(a) _mm512_cvttps_epi32(a)
#define vec_fmin(a,b) _mm512_min_ps(a,b)
#define vec_fmax(a,b) _mm512_max_ps(a,b)
#define vec_fabs(a) _mm512_andnot_ps(_mm512_set1_ps(-0.f), a)
#define vec_todbl_lo(a) _mm512_cvtps_pd(_mm512_extract128_ps(a,0))
#define vec_todbl_hi(a) _mm512_cvtps_pd(_mm512_extract128_ps(a,1))
#define vec_dbl_tofloat(a,b) _mm512_insertf128( _mm512_castps128_ps512(a) , b , 1 )
#define vec_dbl_load(a) _mm512_load_pd(a)
#define vec_dbl_set1(a) _mm512_set1_pd(a)
#define vec_dbl_sqrt(a) _mm512_sqrt_pd(a)
#define vec_dbl_rcp(a) _mm512_rcp_pd(a)
#define vec_dbl_rsqrt(a) _mm512_rsqrt_pd(a)
#define vec_dbl_ftoi(a) _mm512_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm512_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm512_max_pd(a,b)
#define vec_getoffsets(ptrs) _mm512_insertf64x4( _mm512_insertf64x4( _mm512_setzero_pd() , _mm512_cvtepi64_epi32( _mm512_load_epi64(ptrs) - _mm512_set1_epi64(ptrs[0]) ) , 0 ) , _mm512_cvtepi64_epi32( _mm512_load_epi64(&ptrs[4]) - _mm512_set1_epi64(ptrs[0]) ) , 1 )
#define vec_gather(base,offsets) _mm512_i32gather_ps( offsets.m , base , 1 )
#elif defined(__AVX__)
#define VECTORIZE
#define VEC_SIZE 8
#define VEC_FLOAT __m256
...
...
@@ -55,6 +86,10 @@
#define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
#define vec_dbl_fmin(a,b) _mm256_min_pd(a,b)
#define vec_dbl_fmax(a,b) _mm256_max_pd(a,b)
#ifdef __AVX2__
#define VEC_HAVE_GATHER
#define vec_gather(base,offsets) _mm256_i32gather_ps( base , offsets.m , 1 )
#endif
#elif defined( __SSE2__ )
#define VECTORIZE
#define VEC_SIZE 4
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment