Commit 39c9a233 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'master' of gitlab.cosma.dur.ac.uk:swift/swiftsim into pasc_paper

parents 45fa0e48 b46be395
......@@ -270,6 +270,9 @@ AC_CHECK_FUNC(pthread_setaffinity_np, AC_DEFINE([HAVE_SETAFFINITY],[true],
AM_CONDITIONAL(HAVESETAFFINITY,
[test "$ac_cv_func_pthread_setaffinity_np" = "yes"])
# Check for libnuma.
AC_CHECK_LIB([numa], [numa_available])
# Check for timing functions needed by cycle.h.
AC_HEADER_TIME
AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h])
......
......@@ -116,6 +116,21 @@ int main(int argc, char *argv[]) {
/* Greeting message */
if (myrank == 0) greetings();
#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
/* Ensure the NUMA node on which we initialise (first touch) everything
* doesn't change before engine_init allocates NUMA-local workers. Otherwise,
* we may be scheduled elsewhere between the two times.
*/
cpu_set_t affinity;
CPU_ZERO(&affinity);
CPU_SET(sched_getcpu(), &affinity);
if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity) != 0) {
message("failed to set entry thread's affinity");
} else {
message("set entry thread's affinity");
}
#endif
/* Init the space. */
bzero(&s, sizeof(struct space));
......
......@@ -28,6 +28,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdbool.h>
/* MPI headers. */
#ifdef WITH_MPI
......@@ -38,6 +39,10 @@
#endif
#endif
#ifdef HAVE_LIBNUMA
#include <numa.h>
#endif
/* This object's header. */
#include "engine.h"
......@@ -2117,6 +2122,22 @@ void engine_split(struct engine *e, int *grid) {
s->xparts = xparts_new;
}
#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
static bool hyperthreads_present(void) {
#ifdef __linux__
FILE *f = fopen("/sys/devices/system/cpu/cpu0/topology/thread_siblings_list", "r");
int c;
while ((c = fgetc(f)) != EOF && c != ',');
fclose(f);
return c == ',';
#else
return true; // just guess
#endif
}
#endif
/**
* @brief init an engine with the given number of threads, queues, and
* the given policy.
......@@ -2153,6 +2174,42 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
for (j = maxint / i / 2; j < maxint; j += maxint / i)
if (j < nr_cores && j != 0) cpuid[k++] = j;
#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
/* Ascending NUMA distance. Bubblesort(!) for stable equidistant CPUs. */
if (numa_available() >= 0) {
if (nodeID == 0) message("prefer NUMA-local CPUs");
int home = numa_node_of_cpu(sched_getcpu()), half = nr_cores / 2;
bool done = false, swap_hyperthreads = hyperthreads_present();
if (swap_hyperthreads) message("prefer physical cores to hyperthreads");
while (!done) {
done = true;
for (i = 1; i < nr_cores; i++) {
int node_a = numa_node_of_cpu(cpuid[i-1]);
int node_b = numa_node_of_cpu(cpuid[i]);
/* Avoid using local hyperthreads over unused remote physical cores.
* Assume two hyperthreads, and that cpuid >= half partitions them.
*/
int thread_a = swap_hyperthreads && cpuid[i-1] >= half;
int thread_b = swap_hyperthreads && cpuid[i] >= half;
bool swap = thread_a > thread_b;
if (thread_a == thread_b)
swap = numa_distance(home, node_a) > numa_distance(home, node_b);
if (swap) {
int t = cpuid[i-1];
cpuid[i-1] = cpuid[i];
cpuid[i] = t;
done = false;
}
}
}
}
#endif
if (nodeID == 0) {
#ifdef WITH_MPI
message("engine_init: cpu map is [ ");
......
......@@ -516,15 +516,16 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float x,
#define kernel_name "Wendland C2"
#define kernel_degree 5
#define kernel_ivals 1
#define kernel_gamma 1.f
#define kernel_gamma2 1.f
#define kernel_gamma3 1.f
#define kernel_igamma 1.f
#define kernel_gamma 2.f
#define kernel_gamma2 4.f
#define kernel_gamma3 8.f
#define kernel_igamma 0.5f
#define kernel_nwneigh \
(4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
7.261825f)
static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
__attribute__((aligned(16))) = {4.0f, -15.0f, 20.0f, -10.0f, 0.0f, 1.0f,
__attribute__((aligned(16))) = {
0.05222272f, -0.39167037f, 1.04445431f, -1.04445431f, 0.f, 0.41778173f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
#define kernel_root (kernel_coeffs[kernel_degree])
#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
......@@ -537,7 +538,7 @@ static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
__attribute__((always_inline)) INLINE static void kernel_deval(float x,
float *W,
float *dW_dx) {
int ind = fminf(x, kernel_ivals);
int ind = fminf(0.5f * x, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
float dw_dx = coeffs[0];
......@@ -563,7 +564,7 @@ __attribute__((always_inline))
int j, k;
/* Load x and get the interval id. */
ind.m = vec_ftoi(vec_fmin(x->v, vec_set1((float)kernel_ivals)));
ind.m = vec_ftoi(vec_fmin(0.5f * x->v, vec_set1((float)kernel_ivals)));
/* load the coefficients. */
for (k = 0; k < VEC_SIZE; k++)
......@@ -590,7 +591,7 @@ __attribute__((always_inline))
__attribute__((always_inline)) INLINE static void kernel_eval(float x,
float *W) {
int ind = fmin(x, kernel_ivals);
int ind = fmin(0.5f * x, kernel_ivals);
float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)];
float w = coeffs[0] * x + coeffs[1];
for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k];
......
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import matplotlib
matplotlib.use("Agg")
from pylab import *
from scipy import integrate
import distinct_colours as colours
from scipy.optimize import curve_fit
from scipy.optimize import fsolve
from matplotlib.font_manager import FontProperties
import numpy
params = {
'axes.labelsize': 8,
'axes.titlesize': 8,
'font.size': 8,
'legend.fontsize': 9,
'xtick.labelsize': 8,
'ytick.labelsize': 8,
'xtick.major.pad': 2.5,
'ytick.major.pad': 2.5,
'text.usetex': True,
'figure.figsize' : (3.15,3.15),
'figure.subplot.left' : 0.12,
'figure.subplot.right' : 0.99 ,
'figure.subplot.bottom' : 0.08 ,
'figure.subplot.top' : 0.99 ,
'figure.subplot.wspace' : 0. ,
'figure.subplot.hspace' : 0. ,
'lines.markersize' : 6,
'lines.linewidth' : 2.,
'text.latex.unicode': True
}
rcParams.update(params)
rc('font',**{'family':'sans-serif','sans-serif':['Times']})
#Parameters
eta = 1.2349
h = 2.1
#Constants
PI = math.pi
#Cubic Spline
cubic_kernel_degree = 3
cubic_kernel_ivals = 2
cubic_kernel_gamma = 2.
cubic_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 6.0858
cubic_kernel_coeffs = array([[3./(4.*PI) , -3./(2.*PI), 0., 1./PI],
[-1./(4.*PI), 3./(2.*PI), -3./PI, 2./PI],
[0., 0., 0., 0.]])
def cubic_W(x):
if size(x) == 1:
x = array([x])
ind = (minimum(x, cubic_kernel_ivals)).astype(int)
coeffs = cubic_kernel_coeffs[ind,:]
w = coeffs[:,0] * x + coeffs[:,1]
for k in range(2, cubic_kernel_degree+1):
w = x * w + coeffs[:,k]
return w
#Quartic Spline
quartic_kernel_degree = 4
quartic_kernel_ivals = 3
quartic_kernel_gamma = 2.5
quartic_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 8.2293
quartic_kernel_coeffs = array([[3./(10.*PI) , 0., -3./(4.*PI) , 0. , 23./(32.*PI)],
[-1./(5.*PI) , 1./PI , -3./(2.*PI) ,1./(4.*PI) , 11./(16.*PI)],
[1./(20.*PI) , -1./(2.*PI) , 15./(8.*PI) , -25./(8.*PI), 125./(64.*PI)],
[ 0. , 0., 0., 0., 0.]])
def quartic_W(x):
if size(x) == 1:
x = array([x])
ind = (minimum(x+0.5, quartic_kernel_ivals)).astype(int)
coeffs = quartic_kernel_coeffs[ind,:]
w = coeffs[:,0] * x + coeffs[:,1]
for k in range(2, quartic_kernel_degree+1):
w = x * w + coeffs[:,k]
return w
# Wendland kernel
wendland2_kernel_degree = 5
wendland2_kernel_ivals = 1
wendland2_kernel_gamma = 2
wendland2_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 7.261825
wendland2_kernel_coeffs = 3.342253804929802 * array([[1./8, -30./32, 80./32, -80./32., 0., 1.],
[ 0. , 0., 0., 0., 0., 0.]]) / 8.
print wendland2_kernel_coeffs
def wendland2_W(x):
if size(x) == 1:
x = array([x])
ind = (minimum(0.5*x, wendland2_kernel_ivals)).astype(int)
coeffs = wendland2_kernel_coeffs[ind,:]
w = coeffs[:,0] * x + coeffs[:,1]
for k in range(2, wendland2_kernel_degree+1):
w = x * w + coeffs[:,k]
return w
#def wendland2_W(x):
# if size(x) == 1:
# x = array([x])
# x /= 1.936492
# x[x>1.] = 1.
# oneminusu = 1.-x
# oneminusu4 = oneminusu * oneminusu * oneminusu * oneminusu
# return 3.342253804929802 * oneminusu4 * (1. + 4.*x) / h**3
#Find H
r = arange(0, 3.5*h, 1./1000.)
xi = r/h
cubic_Ws = cubic_W(xi)
quartic_Ws = quartic_W(xi)
wendland2_Ws = wendland2_W(xi)
for j in range(size(r)):
if cubic_Ws[j] == 0:
cubic_H = r[j]
break
for j in range(size(r)):
if quartic_Ws[j] == 0:
quartic_H = r[j]
break
for j in range(size(r)):
if wendland2_Ws[j] == 0:
wendland2_H = r[j]
break
print "H=", cubic_H
print "H=", quartic_H
print "H=", wendland2_H
# Compute sigma -----------------------------------------
cubic_norm = 4.*PI*integrate.quad(lambda x: x**2*cubic_W(x), 0, cubic_H)[0]
quartic_norm = 4.*PI*integrate.quad(lambda x: x**2*quartic_W(x), 0, quartic_H)[0]
wendland2_norm = 4.*PI*integrate.quad(lambda x: x**2*wendland2_W(x), 0, wendland2_H)[0]
print cubic_norm
print quartic_norm
print wendland2_norm
# Plot kernels ------------------------------------------
r = arange(0, 3.5*h, 1./100.)
xi = r/h
cubic_Ws = cubic_W(xi)
quartic_Ws = quartic_W(xi)
wendland2_Ws = wendland2_W(xi)
figure()
text(h-0.1, cubic_Ws[0]/20., "h", ha="right",va="center")
arrow(h, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='k', ec='k', length_includes_head=True, head_length=cubic_Ws[0]/30., head_width=0.1)
plot(r,cubic_Ws, 'b-' ,label="Cubic")
plot(r, quartic_Ws, 'r-', label="Quartic")
plot(r, wendland2_Ws, 'g-', label="Wendland C2")
text(cubic_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='b')
arrow(cubic_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='b', ec='b', length_includes_head=True, head_length=cubic_Ws[0]/30., head_width=0.1)
text(quartic_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='r')
arrow(quartic_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='r', ec='r', length_includes_head=True, head_length=quartic_Ws[0]/30., head_width=0.1)
text(wendland2_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='r')
arrow(wendland2_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='g', ec='g', length_includes_head=True, head_length=wendland2_Ws[0]/30., head_width=0.1)
xlabel("r", labelpad=0)
ylabel("W(r,h)", labelpad=0)
legend(loc="upper right")
savefig("kernel.pdf")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment