Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
1caa0a07
Commit
1caa0a07
authored
9 years ago
by
Angus Lepper
Browse files
Options
Downloads
Patches
Plain Diff
Don't overstep the affinity we're given at entry
parent
22234b65
No related branches found
No related tags found
1 merge request
!120
Simplify, permit user control over affinity
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
examples/main.c
+2
-10
2 additions, 10 deletions
examples/main.c
src/engine.c
+81
-25
81 additions, 25 deletions
src/engine.c
src/engine.h
+1
-0
1 addition, 0 deletions
src/engine.h
with
84 additions
and
35 deletions
examples/main.c
+
2
−
10
View file @
1caa0a07
...
...
@@ -138,17 +138,9 @@ int main(int argc, char *argv[]) {
if
((
ENGINE_POLICY
)
&
engine_policy_setaffinity
)
{
/* Ensure the NUMA node on which we initialise (first touch) everything
* doesn't change before engine_init allocates NUMA-local workers.
* Otherwise,
* we may be scheduled elsewhere between the two times.
* Otherwise, we may be scheduled elsewhere between the two times.
*/
cpu_set_t
affinity
;
CPU_ZERO
(
&
affinity
);
CPU_SET
(
sched_getcpu
(),
&
affinity
);
if
(
sched_setaffinity
(
0
,
sizeof
(
cpu_set_t
),
&
affinity
)
!=
0
)
{
message
(
"failed to set entry thread's affinity"
);
}
else
{
message
(
"set entry thread's affinity"
);
}
engine_pin
();
}
#endif
...
...
This diff is collapsed.
Click to expand it.
src/engine.c
+
81
−
25
View file @
1caa0a07
...
...
@@ -1925,6 +1925,40 @@ static bool hyperthreads_present(void) {
}
#endif
#ifdef HAVE_SETAFFINITY
static
cpu_set_t
entry_affinity
;
static
bool
use_entry_affinity
=
false
;
static
cpu_set_t
*
engine_entry_affinity
(
void
)
{
if
(
!
use_entry_affinity
)
{
pthread_t
engine
=
pthread_self
();
pthread_getaffinity_np
(
engine
,
sizeof
entry_affinity
,
&
entry_affinity
);
use_entry_affinity
=
true
;
}
return
&
entry_affinity
;
}
void
engine_pin
(
void
)
{
cpu_set_t
*
entry_affinity
=
engine_entry_affinity
();
int
pin
;
for
(
pin
=
0
;
pin
<
CPU_SETSIZE
&&
!
CPU_ISSET
(
pin
,
entry_affinity
);
++
pin
);
cpu_set_t
affinity
;
CPU_ZERO
(
&
affinity
);
CPU_SET
(
pin
,
&
affinity
);
if
(
sched_setaffinity
(
0
,
sizeof
affinity
,
&
affinity
)
!=
0
)
{
error
(
"failed to set engine's affinity"
);
}
}
static
void
engine_unpin
(
void
)
{
pthread_t
main_thread
=
pthread_self
();
pthread_setaffinity_np
(
main_thread
,
sizeof
entry_affinity
,
&
entry_affinity
);
}
#endif
/**
* @brief init an engine with the given number of threads, queues, and
* the given policy.
...
...
@@ -1982,22 +2016,39 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
#if defined(HAVE_SETAFFINITY)
const
int
nr_cores
=
sysconf
(
_SC_NPROCESSORS_ONLN
);
int
cpuid
[
nr_cores
];
cpu_set_t
*
entry_affinity
=
engine_entry_affinity
();
const
int
nr_affinity_cores
=
CPU_COUNT
(
entry_affinity
);
if
(
nr_cores
>
CPU_SETSIZE
)
{
// Unlikely, except on e.g. SGI UV.
error
(
"must allocate dynamic cpu_set_t (too many cores per node)"
);
}
char
buf
[
nr_cores
+
1
];
buf
[
nr_cores
]
=
'\0'
;
for
(
int
j
=
0
;
j
<
nr_cores
;
++
j
)
{
/* Reversed bit order from convention, but same as e.g. Intel MPI's
* I_MPI_PIN_DOMAIN explicit mask: left-to-right, LSB-to-MSB.
*/
buf
[
j
]
=
CPU_ISSET
(
j
,
entry_affinity
)
?
'1'
:
'0'
;
}
message
(
"affinity at entry: %s"
,
buf
);
#endif
int
cpuid
[
nr_affinity_cores
];
cpu_set_t
cpuset
;
if
((
policy
&
engine_policy_cputight
)
==
engine_policy_cputight
)
{
for
(
int
k
=
0
;
k
<
nr_cores
;
k
++
)
cpuid
[
k
]
=
k
;
}
else
{
/* Get next highest power of 2. */
int
maxint
=
1
;
while
(
maxint
<
nr_cores
)
maxint
*=
2
;
cpuid
[
0
]
=
0
;
int
k
=
1
;
for
(
int
i
=
1
;
i
<
maxint
;
i
*=
2
)
for
(
int
j
=
maxint
/
i
/
2
;
j
<
maxint
;
j
+=
maxint
/
i
)
if
(
j
<
nr_cores
&&
j
!=
0
)
cpuid
[
k
++
]
=
j
;
int
skip
=
0
;
for
(
int
k
=
0
;
k
<
nr_affinity_cores
;
k
++
)
{
int
c
;
for
(
c
=
skip
;
c
<
CPU_SETSIZE
&&
!
CPU_ISSET
(
c
,
entry_affinity
);
++
c
);
cpuid
[
k
]
=
c
;
skip
=
c
+
1
;
}
#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
if
((
policy
&
engine_policy_cputight
)
!=
engine_policy_cputight
)
{
/* Ascending NUMA distance. Bubblesort(!) for stable equidistant CPUs. */
if
(
numa_available
()
>=
0
)
{
if
(
nodeID
==
0
)
message
(
"prefer NUMA-local CPUs"
);
...
...
@@ -2011,7 +2062,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
while
(
!
done
)
{
done
=
true
;
for
(
int
i
=
1
;
i
<
nr_cores
;
i
++
)
{
for
(
int
i
=
1
;
i
<
nr_
affinity_
cores
;
i
++
)
{
const
int
node_a
=
numa_node_of_cpu
(
cpuid
[
i
-
1
]);
const
int
node_b
=
numa_node_of_cpu
(
cpuid
[
i
]);
...
...
@@ -2034,20 +2085,24 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
}
}
}
}
#endif
if
(
nodeID
==
0
)
{
/* Avoid (unexpected) interference between engine and runner threads. We can
* do this once we've made at least one call to engine_entry_affinity and
* maybe numa_node_of_cpu(sched_getcpu()), even if the engine isn't already
* pinned.
*/
engine_unpin
();
#ifdef WITH_MPI
printf
(
"[%04i] %s engine_init: cpu map is [ "
,
nodeID
,
clocks_get_timesincestart
());
printf
(
"[%04i] %s engine_init: cpu map is [ "
,
nodeID
,
clocks_get_timesincestart
());
#else
printf
(
"%s engine_init: cpu map is [ "
,
clocks_get_timesincestart
());
#endif
for
(
int
i
=
0
;
i
<
nr_cores
;
i
++
)
printf
(
"%i "
,
cpuid
[
i
]);
printf
(
"].
\n
"
);
}
}
printf
(
"%s engine_init: cpu map is [ "
,
clocks_get_timesincestart
());
#endif
for
(
int
i
=
0
;
i
<
nr_affinity_cores
;
i
++
)
printf
(
"%i "
,
cpuid
[
i
]);
printf
(
"].
\n
"
);
/* Are we doing stuff in parallel? */
if
(
nr_nodes
>
1
)
{
...
...
@@ -2176,15 +2231,16 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
#if defined(HAVE_SETAFFINITY)
/* Set a reasonable queue ID. */
e
->
runners
[
k
].
cpuid
=
cpuid
[
k
%
nr_cores
];
int
coreid
=
k
%
nr_affinity_cores
;
e
->
runners
[
k
].
cpuid
=
cpuid
[
coreid
];
if
(
nr_queues
<
nr_threads
)
e
->
runners
[
k
].
qid
=
cpuid
[
k
%
nr_
core
s
]
*
nr_queues
/
nr_cores
;
e
->
runners
[
k
].
qid
=
cpuid
[
core
id
]
*
nr_queues
/
nr_
affinity_
cores
;
else
e
->
runners
[
k
].
qid
=
k
;
/* Set the cpu mask to zero | e->id. */
CPU_ZERO
(
&
cpuset
);
CPU_SET
(
cpuid
[
k
%
nr_
core
s
],
&
cpuset
);
CPU_SET
(
cpuid
[
core
id
],
&
cpuset
);
/* Apply this mask to the runner's pthread. */
if
(
pthread_setaffinity_np
(
e
->
runners
[
k
].
thread
,
sizeof
(
cpu_set_t
),
...
...
This diff is collapsed.
Click to expand it.
src/engine.h
+
1
−
0
View file @
1caa0a07
...
...
@@ -170,6 +170,7 @@ struct engine {
/* Function prototypes. */
void
engine_barrier
(
struct
engine
*
e
,
int
tid
);
void
engine_pin
(
void
);
void
engine_init
(
struct
engine
*
e
,
struct
space
*
s
,
float
dt
,
int
nr_threads
,
int
nr_queues
,
int
nr_nodes
,
int
nodeID
,
int
policy
,
float
timeBegin
,
float
timeEnd
,
float
dt_min
,
float
dt_max
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment