Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
39c9a233
Commit
39c9a233
authored
Jan 22, 2016
by
Matthieu Schaller
Browse files
Merge branch 'master' of gitlab.cosma.dur.ac.uk:swift/swiftsim into pasc_paper
parents
45fa0e48
b46be395
Changes
5
Hide whitespace changes
Inline
Side-by-side
configure.ac
View file @
39c9a233
...
...
@@ -270,6 +270,9 @@ AC_CHECK_FUNC(pthread_setaffinity_np, AC_DEFINE([HAVE_SETAFFINITY],[true],
AM_CONDITIONAL(HAVESETAFFINITY,
[test "$ac_cv_func_pthread_setaffinity_np" = "yes"])
# Check for libnuma.
AC_CHECK_LIB([numa], [numa_available])
# Check for timing functions needed by cycle.h.
AC_HEADER_TIME
AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h])
...
...
examples/main.c
View file @
39c9a233
...
...
@@ -116,6 +116,21 @@ int main(int argc, char *argv[]) {
/* Greeting message */
if
(
myrank
==
0
)
greetings
();
#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
/* Ensure the NUMA node on which we initialise (first touch) everything
* doesn't change before engine_init allocates NUMA-local workers. Otherwise,
* we may be scheduled elsewhere between the two times.
*/
cpu_set_t
affinity
;
CPU_ZERO
(
&
affinity
);
CPU_SET
(
sched_getcpu
(),
&
affinity
);
if
(
sched_setaffinity
(
0
,
sizeof
(
cpu_set_t
),
&
affinity
)
!=
0
)
{
message
(
"failed to set entry thread's affinity"
);
}
else
{
message
(
"set entry thread's affinity"
);
}
#endif
/* Init the space. */
bzero
(
&
s
,
sizeof
(
struct
space
));
...
...
src/engine.c
View file @
39c9a233
...
...
@@ -28,6 +28,7 @@
#include
<stdlib.h>
#include
<string.h>
#include
<unistd.h>
#include
<stdbool.h>
/* MPI headers. */
#ifdef WITH_MPI
...
...
@@ -38,6 +39,10 @@
#endif
#endif
#ifdef HAVE_LIBNUMA
#include
<numa.h>
#endif
/* This object's header. */
#include
"engine.h"
...
...
@@ -2117,6 +2122,22 @@ void engine_split(struct engine *e, int *grid) {
s
->
xparts
=
xparts_new
;
}
#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
static
bool
hyperthreads_present
(
void
)
{
#ifdef __linux__
FILE
*
f
=
fopen
(
"/sys/devices/system/cpu/cpu0/topology/thread_siblings_list"
,
"r"
);
int
c
;
while
((
c
=
fgetc
(
f
))
!=
EOF
&&
c
!=
','
);
fclose
(
f
);
return
c
==
','
;
#else
return
true
;
// just guess
#endif
}
#endif
/**
* @brief init an engine with the given number of threads, queues, and
* the given policy.
...
...
@@ -2153,6 +2174,42 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
for
(
j
=
maxint
/
i
/
2
;
j
<
maxint
;
j
+=
maxint
/
i
)
if
(
j
<
nr_cores
&&
j
!=
0
)
cpuid
[
k
++
]
=
j
;
#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
/* Ascending NUMA distance. Bubblesort(!) for stable equidistant CPUs. */
if
(
numa_available
()
>=
0
)
{
if
(
nodeID
==
0
)
message
(
"prefer NUMA-local CPUs"
);
int
home
=
numa_node_of_cpu
(
sched_getcpu
()),
half
=
nr_cores
/
2
;
bool
done
=
false
,
swap_hyperthreads
=
hyperthreads_present
();
if
(
swap_hyperthreads
)
message
(
"prefer physical cores to hyperthreads"
);
while
(
!
done
)
{
done
=
true
;
for
(
i
=
1
;
i
<
nr_cores
;
i
++
)
{
int
node_a
=
numa_node_of_cpu
(
cpuid
[
i
-
1
]);
int
node_b
=
numa_node_of_cpu
(
cpuid
[
i
]);
/* Avoid using local hyperthreads over unused remote physical cores.
* Assume two hyperthreads, and that cpuid >= half partitions them.
*/
int
thread_a
=
swap_hyperthreads
&&
cpuid
[
i
-
1
]
>=
half
;
int
thread_b
=
swap_hyperthreads
&&
cpuid
[
i
]
>=
half
;
bool
swap
=
thread_a
>
thread_b
;
if
(
thread_a
==
thread_b
)
swap
=
numa_distance
(
home
,
node_a
)
>
numa_distance
(
home
,
node_b
);
if
(
swap
)
{
int
t
=
cpuid
[
i
-
1
];
cpuid
[
i
-
1
]
=
cpuid
[
i
];
cpuid
[
i
]
=
t
;
done
=
false
;
}
}
}
}
#endif
if
(
nodeID
==
0
)
{
#ifdef WITH_MPI
message
(
"engine_init: cpu map is [ "
);
...
...
src/kernel.h
View file @
39c9a233
...
...
@@ -516,16 +516,17 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float x,
#define kernel_name "Wendland C2"
#define kernel_degree 5
#define kernel_ivals 1
#define kernel_gamma
1
.f
#define kernel_gamma2
1
.f
#define kernel_gamma3
1
.f
#define kernel_igamma
1.
f
#define kernel_gamma
2
.f
#define kernel_gamma2
4
.f
#define kernel_gamma3
8
.f
#define kernel_igamma
0.5
f
#define kernel_nwneigh \
(4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \
7.261825f)
static
float
kernel_coeffs
[(
kernel_degree
+
1
)
*
(
kernel_ivals
+
1
)]
__attribute__
((
aligned
(
16
)))
=
{
4
.
0
f
,
-
15
.
0
f
,
20
.
0
f
,
-
10
.
0
f
,
0
.
0
f
,
1
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
__attribute__
((
aligned
(
16
)))
=
{
0
.
05222272
f
,
-
0
.
39167037
f
,
1
.
04445431
f
,
-
1
.
04445431
f
,
0
.
f
,
0
.
41778173
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
#define kernel_root (kernel_coeffs[kernel_degree])
#define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree])
...
...
@@ -537,7 +538,7 @@ static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)]
__attribute__
((
always_inline
))
INLINE
static
void
kernel_deval
(
float
x
,
float
*
W
,
float
*
dW_dx
)
{
int
ind
=
fminf
(
x
,
kernel_ivals
);
int
ind
=
fminf
(
0
.
5
f
*
x
,
kernel_ivals
);
float
*
coeffs
=
&
kernel_coeffs
[
ind
*
(
kernel_degree
+
1
)];
float
w
=
coeffs
[
0
]
*
x
+
coeffs
[
1
];
float
dw_dx
=
coeffs
[
0
];
...
...
@@ -563,7 +564,7 @@ __attribute__((always_inline))
int
j
,
k
;
/* Load x and get the interval id. */
ind
.
m
=
vec_ftoi
(
vec_fmin
(
x
->
v
,
vec_set1
((
float
)
kernel_ivals
)));
ind
.
m
=
vec_ftoi
(
vec_fmin
(
0
.
5
f
*
x
->
v
,
vec_set1
((
float
)
kernel_ivals
)));
/* load the coefficients. */
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
...
...
@@ -590,7 +591,7 @@ __attribute__((always_inline))
__attribute__
((
always_inline
))
INLINE
static
void
kernel_eval
(
float
x
,
float
*
W
)
{
int
ind
=
fmin
(
x
,
kernel_ivals
);
int
ind
=
fmin
(
0
.
5
f
*
x
,
kernel_ivals
);
float
*
coeffs
=
&
kernel_coeffs
[
ind
*
(
kernel_degree
+
1
)];
float
w
=
coeffs
[
0
]
*
x
+
coeffs
[
1
];
for
(
int
k
=
2
;
k
<=
kernel_degree
;
k
++
)
w
=
x
*
w
+
coeffs
[
k
];
...
...
theory/kernel/kernels.py
0 → 100644
View file @
39c9a233
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import
matplotlib
matplotlib
.
use
(
"Agg"
)
from
pylab
import
*
from
scipy
import
integrate
import
distinct_colours
as
colours
from
scipy.optimize
import
curve_fit
from
scipy.optimize
import
fsolve
from
matplotlib.font_manager
import
FontProperties
import
numpy
params
=
{
'axes.labelsize'
:
8
,
'axes.titlesize'
:
8
,
'font.size'
:
8
,
'legend.fontsize'
:
9
,
'xtick.labelsize'
:
8
,
'ytick.labelsize'
:
8
,
'xtick.major.pad'
:
2.5
,
'ytick.major.pad'
:
2.5
,
'text.usetex'
:
True
,
'figure.figsize'
:
(
3.15
,
3.15
),
'figure.subplot.left'
:
0.12
,
'figure.subplot.right'
:
0.99
,
'figure.subplot.bottom'
:
0.08
,
'figure.subplot.top'
:
0.99
,
'figure.subplot.wspace'
:
0.
,
'figure.subplot.hspace'
:
0.
,
'lines.markersize'
:
6
,
'lines.linewidth'
:
2.
,
'text.latex.unicode'
:
True
}
rcParams
.
update
(
params
)
rc
(
'font'
,
**
{
'family'
:
'sans-serif'
,
'sans-serif'
:[
'Times'
]})
#Parameters
eta
=
1.2349
h
=
2.1
#Constants
PI
=
math
.
pi
#Cubic Spline
cubic_kernel_degree
=
3
cubic_kernel_ivals
=
2
cubic_kernel_gamma
=
2.
cubic_kernel_ngb
=
4.0
/
3.0
*
PI
*
eta
**
3
*
6.0858
cubic_kernel_coeffs
=
array
([[
3.
/
(
4.
*
PI
)
,
-
3.
/
(
2.
*
PI
),
0.
,
1.
/
PI
],
[
-
1.
/
(
4.
*
PI
),
3.
/
(
2.
*
PI
),
-
3.
/
PI
,
2.
/
PI
],
[
0.
,
0.
,
0.
,
0.
]])
def
cubic_W
(
x
):
if
size
(
x
)
==
1
:
x
=
array
([
x
])
ind
=
(
minimum
(
x
,
cubic_kernel_ivals
)).
astype
(
int
)
coeffs
=
cubic_kernel_coeffs
[
ind
,:]
w
=
coeffs
[:,
0
]
*
x
+
coeffs
[:,
1
]
for
k
in
range
(
2
,
cubic_kernel_degree
+
1
):
w
=
x
*
w
+
coeffs
[:,
k
]
return
w
#Quartic Spline
quartic_kernel_degree
=
4
quartic_kernel_ivals
=
3
quartic_kernel_gamma
=
2.5
quartic_kernel_ngb
=
4.0
/
3.0
*
PI
*
eta
**
3
*
8.2293
quartic_kernel_coeffs
=
array
([[
3.
/
(
10.
*
PI
)
,
0.
,
-
3.
/
(
4.
*
PI
)
,
0.
,
23.
/
(
32.
*
PI
)],
[
-
1.
/
(
5.
*
PI
)
,
1.
/
PI
,
-
3.
/
(
2.
*
PI
)
,
1.
/
(
4.
*
PI
)
,
11.
/
(
16.
*
PI
)],
[
1.
/
(
20.
*
PI
)
,
-
1.
/
(
2.
*
PI
)
,
15.
/
(
8.
*
PI
)
,
-
25.
/
(
8.
*
PI
),
125.
/
(
64.
*
PI
)],
[
0.
,
0.
,
0.
,
0.
,
0.
]])
def
quartic_W
(
x
):
if
size
(
x
)
==
1
:
x
=
array
([
x
])
ind
=
(
minimum
(
x
+
0.5
,
quartic_kernel_ivals
)).
astype
(
int
)
coeffs
=
quartic_kernel_coeffs
[
ind
,:]
w
=
coeffs
[:,
0
]
*
x
+
coeffs
[:,
1
]
for
k
in
range
(
2
,
quartic_kernel_degree
+
1
):
w
=
x
*
w
+
coeffs
[:,
k
]
return
w
# Wendland kernel
wendland2_kernel_degree
=
5
wendland2_kernel_ivals
=
1
wendland2_kernel_gamma
=
2
wendland2_kernel_ngb
=
4.0
/
3.0
*
PI
*
eta
**
3
*
7.261825
wendland2_kernel_coeffs
=
3.342253804929802
*
array
([[
1.
/
8
,
-
30.
/
32
,
80.
/
32
,
-
80.
/
32.
,
0.
,
1.
],
[
0.
,
0.
,
0.
,
0.
,
0.
,
0.
]])
/
8.
print
wendland2_kernel_coeffs
def
wendland2_W
(
x
):
if
size
(
x
)
==
1
:
x
=
array
([
x
])
ind
=
(
minimum
(
0.5
*
x
,
wendland2_kernel_ivals
)).
astype
(
int
)
coeffs
=
wendland2_kernel_coeffs
[
ind
,:]
w
=
coeffs
[:,
0
]
*
x
+
coeffs
[:,
1
]
for
k
in
range
(
2
,
wendland2_kernel_degree
+
1
):
w
=
x
*
w
+
coeffs
[:,
k
]
return
w
#def wendland2_W(x):
# if size(x) == 1:
# x = array([x])
# x /= 1.936492
# x[x>1.] = 1.
# oneminusu = 1.-x
# oneminusu4 = oneminusu * oneminusu * oneminusu * oneminusu
# return 3.342253804929802 * oneminusu4 * (1. + 4.*x) / h**3
#Find H
r
=
arange
(
0
,
3.5
*
h
,
1.
/
1000.
)
xi
=
r
/
h
cubic_Ws
=
cubic_W
(
xi
)
quartic_Ws
=
quartic_W
(
xi
)
wendland2_Ws
=
wendland2_W
(
xi
)
for
j
in
range
(
size
(
r
)):
if
cubic_Ws
[
j
]
==
0
:
cubic_H
=
r
[
j
]
break
for
j
in
range
(
size
(
r
)):
if
quartic_Ws
[
j
]
==
0
:
quartic_H
=
r
[
j
]
break
for
j
in
range
(
size
(
r
)):
if
wendland2_Ws
[
j
]
==
0
:
wendland2_H
=
r
[
j
]
break
print
"H="
,
cubic_H
print
"H="
,
quartic_H
print
"H="
,
wendland2_H
# Compute sigma -----------------------------------------
cubic_norm
=
4.
*
PI
*
integrate
.
quad
(
lambda
x
:
x
**
2
*
cubic_W
(
x
),
0
,
cubic_H
)[
0
]
quartic_norm
=
4.
*
PI
*
integrate
.
quad
(
lambda
x
:
x
**
2
*
quartic_W
(
x
),
0
,
quartic_H
)[
0
]
wendland2_norm
=
4.
*
PI
*
integrate
.
quad
(
lambda
x
:
x
**
2
*
wendland2_W
(
x
),
0
,
wendland2_H
)[
0
]
print
cubic_norm
print
quartic_norm
print
wendland2_norm
# Plot kernels ------------------------------------------
r
=
arange
(
0
,
3.5
*
h
,
1.
/
100.
)
xi
=
r
/
h
cubic_Ws
=
cubic_W
(
xi
)
quartic_Ws
=
quartic_W
(
xi
)
wendland2_Ws
=
wendland2_W
(
xi
)
figure
()
text
(
h
-
0.1
,
cubic_Ws
[
0
]
/
20.
,
"h"
,
ha
=
"right"
,
va
=
"center"
)
arrow
(
h
,
cubic_Ws
[
0
]
/
10.
,
0.
,
-
cubic_Ws
[
0
]
/
10.
,
fc
=
'k'
,
ec
=
'k'
,
length_includes_head
=
True
,
head_length
=
cubic_Ws
[
0
]
/
30.
,
head_width
=
0.1
)
plot
(
r
,
cubic_Ws
,
'b-'
,
label
=
"Cubic"
)
plot
(
r
,
quartic_Ws
,
'r-'
,
label
=
"Quartic"
)
plot
(
r
,
wendland2_Ws
,
'g-'
,
label
=
"Wendland C2"
)
text
(
cubic_H
-
0.1
,
cubic_Ws
[
0
]
/
20.
,
"H"
,
ha
=
"right"
,
va
=
"center"
,
color
=
'b'
)
arrow
(
cubic_H
,
cubic_Ws
[
0
]
/
10.
,
0.
,
-
cubic_Ws
[
0
]
/
10.
,
fc
=
'b'
,
ec
=
'b'
,
length_includes_head
=
True
,
head_length
=
cubic_Ws
[
0
]
/
30.
,
head_width
=
0.1
)
text
(
quartic_H
-
0.1
,
cubic_Ws
[
0
]
/
20.
,
"H"
,
ha
=
"right"
,
va
=
"center"
,
color
=
'r'
)
arrow
(
quartic_H
,
cubic_Ws
[
0
]
/
10.
,
0.
,
-
cubic_Ws
[
0
]
/
10.
,
fc
=
'r'
,
ec
=
'r'
,
length_includes_head
=
True
,
head_length
=
quartic_Ws
[
0
]
/
30.
,
head_width
=
0.1
)
text
(
wendland2_H
-
0.1
,
cubic_Ws
[
0
]
/
20.
,
"H"
,
ha
=
"right"
,
va
=
"center"
,
color
=
'r'
)
arrow
(
wendland2_H
,
cubic_Ws
[
0
]
/
10.
,
0.
,
-
cubic_Ws
[
0
]
/
10.
,
fc
=
'g'
,
ec
=
'g'
,
length_includes_head
=
True
,
head_length
=
wendland2_Ws
[
0
]
/
30.
,
head_width
=
0.1
)
xlabel
(
"r"
,
labelpad
=
0
)
ylabel
(
"W(r,h)"
,
labelpad
=
0
)
legend
(
loc
=
"upper right"
)
savefig
(
"kernel.pdf"
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment