Skip to content
Snippets Groups Projects
Commit 14418b1d authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'numa_affinity' into 'master'

NUMA-aware affinity

Resolves #76. Yes, ```hyperthreads_present``` is bonkers - it's easy to detect whether the processor supports the technology, but this is the best I've found to tell whether it's enabled.

See merge request !78
parents 62727f47 b564d32d
Branches
Tags
1 merge request!136Master
......@@ -270,6 +270,9 @@ AC_CHECK_FUNC(pthread_setaffinity_np, AC_DEFINE([HAVE_SETAFFINITY],[true],
AM_CONDITIONAL(HAVESETAFFINITY,
[test "$ac_cv_func_pthread_setaffinity_np" = "yes"])
# Check for libnuma.
AC_CHECK_LIB([numa], [numa_available])
# Check for timing functions needed by cycle.h.
AC_HEADER_TIME
AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h])
......
......@@ -116,6 +116,21 @@ int main(int argc, char *argv[]) {
/* Greeting message */
if (myrank == 0) greetings();
#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
/* Ensure the NUMA node on which we initialise (first touch) everything
* doesn't change before engine_init allocates NUMA-local workers. Otherwise,
* we may be scheduled elsewhere between the two times.
*/
cpu_set_t affinity;
CPU_ZERO(&affinity);
CPU_SET(sched_getcpu(), &affinity);
if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity) != 0) {
message("failed to set entry thread's affinity");
} else {
message("set entry thread's affinity");
}
#endif
/* Init the space. */
bzero(&s, sizeof(struct space));
......
......@@ -28,6 +28,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdbool.h>
/* MPI headers. */
#ifdef WITH_MPI
......@@ -38,6 +39,10 @@
#endif
#endif
#ifdef HAVE_LIBNUMA
#include <numa.h>
#endif
/* This object's header. */
#include "engine.h"
......@@ -2117,6 +2122,22 @@ void engine_split(struct engine *e, int *grid) {
s->xparts = xparts_new;
}
#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
static bool hyperthreads_present(void) {
#ifdef __linux__
FILE *f = fopen("/sys/devices/system/cpu/cpu0/topology/thread_siblings_list", "r");
int c;
while ((c = fgetc(f)) != EOF && c != ',');
fclose(f);
return c == ',';
#else
return true; // just guess
#endif
}
#endif
/**
* @brief init an engine with the given number of threads, queues, and
* the given policy.
......@@ -2153,6 +2174,42 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
for (j = maxint / i / 2; j < maxint; j += maxint / i)
if (j < nr_cores && j != 0) cpuid[k++] = j;
#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
/* Ascending NUMA distance. Bubblesort(!) for stable equidistant CPUs. */
if (numa_available() >= 0) {
if (nodeID == 0) message("prefer NUMA-local CPUs");
int home = numa_node_of_cpu(sched_getcpu()), half = nr_cores / 2;
bool done = false, swap_hyperthreads = hyperthreads_present();
if (swap_hyperthreads) message("prefer physical cores to hyperthreads");
while (!done) {
done = true;
for (i = 1; i < nr_cores; i++) {
int node_a = numa_node_of_cpu(cpuid[i-1]);
int node_b = numa_node_of_cpu(cpuid[i]);
/* Avoid using local hyperthreads over unused remote physical cores.
* Assume two hyperthreads, and that cpuid >= half partitions them.
*/
int thread_a = swap_hyperthreads && cpuid[i-1] >= half;
int thread_b = swap_hyperthreads && cpuid[i] >= half;
bool swap = thread_a > thread_b;
if (thread_a == thread_b)
swap = numa_distance(home, node_a) > numa_distance(home, node_b);
if (swap) {
int t = cpuid[i-1];
cpuid[i-1] = cpuid[i];
cpuid[i] = t;
done = false;
}
}
}
}
#endif
if (nodeID == 0) {
#ifdef WITH_MPI
message("engine_init: cpu map is [ ");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment