Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
0c135c44
Commit
0c135c44
authored
Jun 24, 2013
by
Pedro Gonnet
Browse files
limit number of threads on memory-bounded bits of the code.
Former-commit-id: db4d3aa9d24dc019f91f77c56f5a2884997b7c94
parent
58bc2643
Changes
6
Hide whitespace changes
Inline
Side-by-side
src/Makefile.am
View file @
0c135c44
...
...
@@ -22,7 +22,7 @@ AUTOMAKE_OPTIONS=gnu
# Add the debug flag to the whole thing
AM_CFLAGS
=
-g
-O3
-Wall
-Werror
-ffast-math
-fstrict-aliasing
-ftree-vectorize
\
-funroll-loops
$(SIMD_FLAGS)
$(OPENMP_CFLAGS)
\
-DTIMER
-DCOUNTER
-DCPU_TPS
=
2.40e9
-DTIMER
-DCOUNTER
-DCPU_TPS
=
2.40e9
-mfma4
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
...
...
src/engine.c
View file @
0c135c44
...
...
@@ -326,7 +326,7 @@ void engine_prepare ( struct engine *e ) {
/* Run through the tasks and mark as skip or not. */
// tic = getticks();
rebuild
=
(
e
->
step
==
0
||
engine_marktasks
(
e
)
);
rebuild
=
1
||
(
e
->
step
==
0
||
engine_marktasks
(
e
)
);
// printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Did this not go through? */
...
...
src/scheduler.c
View file @
0c135c44
...
...
@@ -625,7 +625,7 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) {
void
scheduler_enqueue
(
struct
scheduler
*
s
,
struct
task
*
t
)
{
int
k
,
qid
=
-
1
;
int
qid
=
-
1
;
/* Ignore skipped tasks. */
if
(
t
->
skip
)
...
...
@@ -652,9 +652,10 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
/* If no previous owner, find the shortest queue. */
if
(
qid
<
0
)
for
(
qid
=
0
,
k
=
1
;
k
<
s
->
nr_queues
;
k
++
)
qid
=
rand
()
%
s
->
nr_queues
;
/* for ( qid = 0 , int k = 1 ; k < s->nr_queues ; k++ )
if ( s->queues[k].count < s->queues[qid].count )
qid
=
k
;
qid = k;
*/
/* Increase the waiting counter. */
atomic_inc
(
&
s
->
waiting
);
...
...
@@ -674,7 +675,7 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
void
scheduler_done
(
struct
scheduler
*
s
,
struct
task
*
t
)
{
int
k
;
int
k
,
res
;
struct
task
*
t2
;
/* Release whatever locks this task held. */
...
...
@@ -695,15 +696,17 @@ void scheduler_done ( struct scheduler *s , struct task *t ) {
they are ready. */
for
(
k
=
0
;
k
<
t
->
nr_unlock_tasks
;
k
++
)
{
t2
=
t
->
unlock_tasks
[
k
];
if
(
atomic_dec
(
&
t2
->
wait
)
==
1
&&
!
t2
->
skip
)
if
(
(
res
=
atomic_dec
(
&
t2
->
wait
)
)
<
1
)
error
(
"Negative wait!"
);
if
(
res
==
1
&&
!
t2
->
skip
)
scheduler_enqueue
(
s
,
t2
);
}
/* Task definitely done. */
pthread_mutex_lock
(
&
s
->
sleep_mutex
);
//
pthread_mutex_lock( &s->sleep_mutex );
atomic_dec
(
&
s
->
waiting
);
pthread_cond_broadcast
(
&
s
->
sleep_cond
);
pthread_mutex_unlock
(
&
s
->
sleep_mutex
);
//
pthread_cond_broadcast( &s->sleep_cond );
//
pthread_mutex_unlock( &s->sleep_mutex );
}
...
...
@@ -751,10 +754,10 @@ struct task *scheduler_gettask ( struct scheduler *s , int qid ) {
}
/* If we failed, take a short nap. */
pthread_mutex_lock
(
&
s
->
sleep_mutex
);
/*
pthread_mutex_lock( &s->sleep_mutex );
if ( s->waiting > 0 )
pthread_cond_wait( &s->sleep_cond , &s->sleep_mutex );
pthread_mutex_unlock
(
&
s
->
sleep_mutex
);
pthread_mutex_unlock( &s->sleep_mutex );
*/
}
...
...
src/scheduler.h
View file @
0c135c44
...
...
@@ -27,7 +27,7 @@
/* Flags . */
#define scheduler_flag_none 0
#define scheduler_flag_steal 1
#define scheduler_flag_maxsteal
2
#define scheduler_flag_maxsteal
1
/* Data of a scheduler. */
...
...
src/space.c
View file @
0c135c44
...
...
@@ -161,7 +161,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
struct
part
*
restrict
finger
,
*
restrict
p
,
*
parts
=
s
->
parts
;
int
*
ind
;
double
ih
[
3
],
dim
[
3
];
//
ticks tic;
ticks
tic
;
/* Be verbose about this. */
printf
(
"space_rebuild: (re)building space...
\n
"
);
fflush
(
stdout
);
...
...
@@ -252,6 +252,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
s
->
cells
[
k
].
dx_max
=
0
.
0
f
;
s
->
cells
[
k
].
sorted
=
0
;
s
->
cells
[
k
].
count
=
0
;
s
->
cells
[
k
].
kick1
=
NULL
;
s
->
cells
[
k
].
kick2
=
NULL
;
}
s
->
maxdepth
=
0
;
...
...
@@ -259,7 +260,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
}
/* Run through the particles and get their cell index. */
//
tic = getticks();
tic
=
getticks
();
if
(
(
ind
=
(
int
*
)
malloc
(
sizeof
(
int
)
*
s
->
nr_parts
)
)
==
NULL
)
error
(
"Failed to allocate temporary particle indices."
);
ih
[
0
]
=
s
->
ih
[
0
];
ih
[
1
]
=
s
->
ih
[
1
];
ih
[
2
]
=
s
->
ih
[
2
];
...
...
@@ -276,12 +277,12 @@ void space_rebuild ( struct space *s , double cell_max ) {
ind
[
k
]
=
cell_getid
(
cdim
,
p
->
x
[
0
]
*
ih
[
0
]
,
p
->
x
[
1
]
*
ih
[
1
]
,
p
->
x
[
2
]
*
ih
[
2
]
);
atomic_inc
(
&
s
->
cells
[
ind
[
k
]
].
count
);
}
//
printf( "space_rebuild: getting particle indices took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
printf
(
"space_rebuild: getting particle indices took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
/* Sort the parts according to their cells. */
//
tic = getticks();
tic
=
getticks
();
parts_sort
(
parts
,
ind
,
s
->
nr_parts
,
0
,
s
->
nr_cells
-
1
);
//
printf( "space_rebuild: parts_sort took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
printf
(
"space_rebuild: parts_sort took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
/* Verify sort. */
/* for ( k = 1 ; k < nr_parts ; k++ ) {
...
...
@@ -307,21 +308,19 @@ void space_rebuild ( struct space *s , double cell_max ) {
/* At this point, we have the upper-level cells, old or new. Now make
sure that the parts in each cell are ok. */
//
tic = getticks();
tic
=
getticks
();
k
=
0
;
#pragma omp parallel shared(s,k)
#pragma omp parallel
num_threads(8)
shared(s,k)
{
while
(
1
)
{
int
myk
;
#pragma omp critical
myk
=
k
++
;
int
myk
=
atomic_inc
(
&
k
);
if
(
myk
<
s
->
nr_cells
)
space_split
(
s
,
&
s
->
cells
[
myk
]
);
else
break
;
}
}
//
printf( "space_rebuild: space_rebuild_recurse took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
printf
(
"space_rebuild: space_rebuild_recurse took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
}
...
...
@@ -358,7 +357,7 @@ void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) {
first
=
0
;
last
=
1
;
waiting
=
1
;
/* Parallel bit. */
#pragma omp parallel default(shared) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_p)
#pragma omp parallel
num_threads(8)
default(shared) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_p)
{
/* Main loop. */
...
...
@@ -565,11 +564,11 @@ void space_map_cells_post ( struct space *s , int full , void (*fun)( struct cel
}
/* Call the recursive function on all higher-level cells. */
#pragma omp parallel shared(s,cid)
//
#pragma omp parallel shared(s,cid)
{
int
mycid
;
while
(
1
)
{
#pragma omp critical
//
#pragma omp critical
mycid
=
cid
++
;
if
(
mycid
<
s
->
nr_cells
)
rec_map
(
&
s
->
cells
[
mycid
]
);
...
...
@@ -602,11 +601,11 @@ void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell
}
/* Call the recursive function on all higher-level cells. */
#pragma omp parallel shared(s,cid)
//
#pragma omp parallel shared(s,cid)
{
int
mycid
;
while
(
1
)
{
#pragma omp critical
//
#pragma omp critical
mycid
=
cid
++
;
if
(
mycid
<
s
->
nr_cells
)
rec_map
(
&
s
->
cells
[
mycid
]
);
...
...
@@ -790,15 +789,22 @@ struct cell *space_getcell ( struct space *s ) {
s
->
cells_new
=
c
->
next
;
s
->
tot_cells
+=
1
;
/* Unlock the space. */
lock_unlock_blind
(
&
s
->
lock
);
/* Init some things in the cell. */
bzero
(
c
,
sizeof
(
struct
cell
)
);
c
->
sorts
=
NULL
;
c
->
nr_tasks
=
0
;
c
->
nr_density
=
0
;
c
->
dx_max
=
0
.
0
f
;
c
->
sorted
=
0
;
c
->
count
=
0
;
c
->
kick1
=
NULL
;
c
->
kick2
=
NULL
;
if
(
lock_init
(
&
c
->
lock
)
!=
0
)
error
(
"Failed to initialize cell spinlock."
);
c
->
owner
=
-
1
;
/* Unlock the space. */
lock_unlock_blind
(
&
s
->
lock
);
return
c
;
}
...
...
src/space.h
View file @
0c135c44
...
...
@@ -26,8 +26,8 @@
#define space_splitratio 0.875f
#define space_splitsize_default 400
#define space_subsize_default 5000
#define space_stretch 1.0
5
f
#define space_maxreldx 0.2f
#define space_stretch 1.
1
0f
#define space_maxreldx 0.2
5
f
#define space_qstack 1000
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment