Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
c826e1a1
Commit
c826e1a1
authored
Feb 28, 2013
by
Pedro Gonnet
Browse files
more efficient distribution of work in engine_prepare.
Former-commit-id: 1d78f54c1bc07c1f0d81e0451509058f106b6c08
parent
58d81f85
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/engine.c
View file @
c826e1a1
...
...
@@ -72,7 +72,7 @@ void engine_prepare ( struct engine *e ) {
/* Rebuild the space. */
// tic = getticks();
space_prepare
(
e
->
s
);
// printf( "engine_prepare: space_prepare
with %i changes
took %.3f ms.\n" ,
changes ,
(double)(getticks() - tic) / CPU_TPS * 1000 );
// printf( "engine_prepare: space_prepare took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
// tic = getticks();
/* Init the queues (round-robin). */
...
...
@@ -92,7 +92,7 @@ void engine_prepare ( struct engine *e ) {
/* Re-set the particle data. */
// tic = getticks();
#pragma omp parallel for schedule(static) private(j)
#pragma omp parallel for schedule(static) private(j)
for
(
k
=
0
;
k
<
s
->
nr_parts
;
k
++
)
if
(
s
->
parts
[
k
].
dt
<=
dt_step
)
{
s
->
parts
[
k
].
wcount
=
0
.
0
f
;
...
...
@@ -170,6 +170,127 @@ void engine_barrier( struct engine *e ) {
}
/**
* @brief Mapping function to set dt_min and dt_max, do the first
* kick.
*/
void
engine_map_kick_first
(
struct
cell
*
c
,
void
*
data
)
{
int
j
,
k
;
struct
engine
*
e
=
(
struct
engine
*
)
data
;
float
dt_step
=
e
->
dt_step
,
dt
=
e
->
dt
,
hdt
=
0
.
5
f
*
dt
;
float
dt_min
,
dt_max
,
h_max
,
dx_max
;
struct
part
*
restrict
p
;
struct
xpart
*
restrict
xp
;
struct
cpart
*
restrict
cp
;
/* No children? */
if
(
!
c
->
split
)
{
/* Init the min/max counters. */
dt_min
=
FLT_MAX
;
dt_max
=
0
.
0
f
;
h_max
=
0
.
0
f
;
dx_max
=
0
.
0
f
;
/* Loop over parts. */
for
(
k
=
0
;
k
<
c
->
count
;
k
++
)
{
/* Get a handle on the kth particle. */
p
=
&
c
->
parts
[
k
];
xp
=
p
->
xtras
;
cp
=
&
c
->
cparts
[
k
];
/* Store the min/max dt. */
dt_min
=
fminf
(
dt_min
,
p
->
dt
);
dt_max
=
fmaxf
(
dt_max
,
p
->
dt
);
/* Step and store the velocity and internal energy. */
xp
->
v_old
[
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
xp
->
v_old
[
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
xp
->
v_old
[
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
xp
->
u_old
=
p
->
u
+
hdt
*
p
->
force
.
u_dt
;
/* Move the particles with the velocitie at the half-step. */
p
->
x
[
0
]
+=
dt
*
xp
->
v_old
[
0
];
p
->
x
[
1
]
+=
dt
*
xp
->
v_old
[
1
];
p
->
x
[
2
]
+=
dt
*
xp
->
v_old
[
2
];
dx_max
=
fmaxf
(
dx_max
,
sqrtf
(
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
)
*
2
+
p
->
h
);
/* Update positions and energies at the half-step. */
p
->
v
[
0
]
+=
dt
*
p
->
a
[
0
];
p
->
v
[
1
]
+=
dt
*
p
->
a
[
1
];
p
->
v
[
2
]
+=
dt
*
p
->
a
[
2
];
p
->
u
*=
expf
(
p
->
force
.
u_dt
/
p
->
u
*
dt
);
p
->
h
*=
expf
(
p
->
force
.
h_dt
/
p
->
h
*
dt
);
h_max
=
fmaxf
(
h_max
,
p
->
h
);
/* Integrate other values if this particle will not be updated. */
if
(
p
->
dt
>
dt_step
)
{
p
->
rho
*=
expf
(
-
3
.
0
f
*
p
->
force
.
h_dt
/
p
->
h
*
dt
);
p
->
force
.
POrho2
=
p
->
u
*
(
const_gamma
-
1
.
0
f
)
/
(
p
->
rho
+
p
->
h
*
p
->
rho_dh
/
3
.
0
f
);
}
/* Fill the cpart. */
cp
->
x
[
0
]
=
p
->
x
[
0
];
cp
->
x
[
1
]
=
p
->
x
[
1
];
cp
->
x
[
2
]
=
p
->
x
[
2
];
cp
->
h
=
p
->
h
;
cp
->
dt
=
p
->
dt
;
/* Init fields for density calculation. */
if
(
p
->
dt
<=
dt_step
)
{
p
->
wcount
=
0
.
0
f
;
p
->
density
.
wcount_dh
=
0
.
0
f
;
p
->
rho
=
0
.
0
f
;
p
->
rho_dh
=
0
.
0
f
;
p
->
density
.
div_v
=
0
.
0
f
;
for
(
j
=
0
;
j
<
3
;
++
j
)
p
->
density
.
curl_v
[
j
]
=
0
.
0
f
;
}
}
}
/* Otherwise, agregate data from children. */
else
{
/* Init with the first non-null child. */
for
(
k
=
0
;
c
->
progeny
[
k
]
==
0
;
k
++
);
dt_min
=
c
->
progeny
[
k
]
->
dt_min
;
dt_max
=
c
->
progeny
[
k
]
->
dt_max
;
h_max
=
c
->
progeny
[
k
]
->
h_max
;
dx_max
=
c
->
progeny
[
k
]
->
dx_max
;
/* Loop over the remaining progeny. */
for
(
k
+=
1
;
k
<
8
;
k
++
)
if
(
c
->
progeny
[
k
]
!=
NULL
)
{
dt_min
=
fminf
(
dt_min
,
c
->
progeny
[
k
]
->
dt_min
);
dt_max
=
fmaxf
(
dt_max
,
c
->
progeny
[
k
]
->
dt_max
);
h_max
=
fmaxf
(
h_max
,
c
->
progeny
[
k
]
->
h_max
);
dx_max
=
fmaxf
(
dx_max
,
c
->
progeny
[
k
]
->
dx_max
);
}
}
/* Store the values. */
c
->
dt_min
=
dt_min
;
c
->
dt_max
=
dt_max
;
c
->
h_max
=
h_max
;
c
->
dx_max
=
dx_max
;
/* Clean out the task pointers. */
c
->
sorts
[
0
]
=
NULL
;
c
->
nr_tasks
=
0
;
c
->
nr_density
=
0
;
}
/**
* @brief Let the #engine loose to compute the forces.
*
...
...
@@ -201,38 +322,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
/* First kick. */
TIMER_TIC
#pragma omp parallel for schedule(static) private(p,xp)
for
(
k
=
0
;
k
<
nr_parts
;
k
++
)
{
/* Get a handle on the part. */
p
=
&
parts
[
k
];
xp
=
p
->
xtras
;
/* Step and store the velocity and internal energy. */
xp
->
v_old
[
0
]
=
p
->
v
[
0
]
+
hdt
*
p
->
a
[
0
];
xp
->
v_old
[
1
]
=
p
->
v
[
1
]
+
hdt
*
p
->
a
[
1
];
xp
->
v_old
[
2
]
=
p
->
v
[
2
]
+
hdt
*
p
->
a
[
2
];
xp
->
u_old
=
p
->
u
+
hdt
*
p
->
force
.
u_dt
;
/* Move the particles with the velocitie at the half-step. */
p
->
x
[
0
]
+=
dt
*
xp
->
v_old
[
0
];
p
->
x
[
1
]
+=
dt
*
xp
->
v_old
[
1
];
p
->
x
[
2
]
+=
dt
*
xp
->
v_old
[
2
];
/* Update positions and energies at the half-step. */
p
->
v
[
0
]
+=
dt
*
p
->
a
[
0
];
p
->
v
[
1
]
+=
dt
*
p
->
a
[
1
];
p
->
v
[
2
]
+=
dt
*
p
->
a
[
2
];
p
->
u
*=
expf
(
p
->
force
.
u_dt
/
p
->
u
*
dt
);
p
->
h
*=
expf
(
p
->
force
.
h_dt
/
p
->
h
*
dt
);
/* Integrate other values if this particle will not be updated. */
if
(
p
->
dt
>
dt_step
)
{
p
->
rho
*=
expf
(
-
3
.
0
f
*
p
->
force
.
h_dt
/
p
->
h
*
dt
);
p
->
force
.
POrho2
=
p
->
u
*
(
const_gamma
-
1
.
0
f
)
/
(
p
->
rho
+
p
->
h
*
p
->
rho_dh
/
3
.
0
f
);
}
}
space_map_cells_post
(
e
->
s
,
1
,
&
engine_map_kick_first
,
e
);
TIMER_TOC
(
timer_kick1
);
// for(k=0; k<10; ++k)
...
...
src/space.c
View file @
c826e1a1
...
...
@@ -85,8 +85,9 @@ void space_map_prepare ( struct cell *c , void *data ) {
int
k
;
float
dt_min
,
dt_max
,
h_max
,
dx_max
;
struct
part
*
p
;
struct
xpart
*
xp
;
struct
part
*
restrict
p
;
struct
xpart
*
restrict
xp
;
struct
cpart
*
restrict
cp
;
/* No children? */
if
(
!
c
->
split
)
{
...
...
@@ -94,23 +95,36 @@ void space_map_prepare ( struct cell *c , void *data ) {
/* Init with first part. */
p
=
&
c
->
parts
[
0
];
xp
=
p
->
xtras
;
cp
=
&
c
->
cparts
[
0
];
dt_min
=
p
->
dt
;
dt_max
=
p
->
dt
;
h_max
=
p
->
h
;
dx_max
=
sqrtf
(
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
)
*
2
+
p
->
h
;
cp
->
x
[
0
]
=
p
->
x
[
0
];
cp
->
x
[
1
]
=
p
->
x
[
1
];
cp
->
x
[
2
]
=
p
->
x
[
2
];
cp
->
h
=
p
->
h
;
cp
->
dt
=
p
->
dt
;
/* Loop over parts. */
for
(
k
=
1
;
k
<
c
->
count
;
k
++
)
{
p
=
&
c
->
parts
[
k
];
xp
=
p
->
xtras
;
cp
=
&
c
->
cparts
[
0
];
dt_min
=
fminf
(
dt_min
,
p
->
dt
);
dt_max
=
fmaxf
(
dt_max
,
p
->
dt
);
h_max
=
fmaxf
(
h_max
,
p
->
h
);
dx_max
=
fmaxf
(
dx_max
,
sqrtf
(
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
*
(
p
->
x
[
0
]
-
xp
->
x_old
[
0
])
+
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
*
(
p
->
x
[
1
]
-
xp
->
x_old
[
1
])
+
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
*
(
p
->
x
[
2
]
-
xp
->
x_old
[
2
])
)
*
2
+
p
->
h
);
cp
->
x
[
0
]
=
p
->
x
[
0
];
cp
->
x
[
1
]
=
p
->
x
[
1
];
cp
->
x
[
2
]
=
p
->
x
[
2
];
cp
->
h
=
p
->
h
;
cp
->
dt
=
p
->
dt
;
}
}
...
...
@@ -166,9 +180,12 @@ void space_prepare ( struct space *s ) {
struct
task
*
t
;
float
dt_step
=
s
->
dt_step
,
dx_max
=
0
.
0
f
;
int
counts
[
task_type_count
+
1
];
ticks
tic
;
/* Traverse the cells and set their dt_min and dx_max. */
space_map_cells_post
(
s
,
1
,
&
space_map_prepare
,
NULL
);
// tic = getticks();
// space_map_cells_post( s , 1 , &space_map_prepare , NULL );
// printf( "space_prepare: space_map_prepare took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Get the maximum displacement in the whole system. */
for
(
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
...
...
@@ -176,6 +193,7 @@ void space_prepare ( struct space *s ) {
printf
(
"space_prepare: dx_max is %e.
\n
"
,
dx_max
);
/* Run through the tasks and mark as skip or not. */
tic
=
getticks
();
for
(
k
=
0
;
k
<
s
->
nr_tasks
;
k
++
)
{
t
=
&
s
->
tasks
[
k
];
if
(
t
->
type
==
task_type_sort
||
...
...
@@ -190,34 +208,30 @@ void space_prepare ( struct space *s ) {
break
;
}
}
printf
(
"space_prepare: checking tasks took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
/* Did this not go through? */
if
(
k
<
s
->
nr_tasks
)
{
/* Re-build the space. */
tic
=
getticks
();
space_rebuild
(
s
,
0
.
0
);
printf
(
"space_prepare: space_rebuild took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
/* Traverse the cells and set their dt_min and dx_max. */
tic
=
getticks
();
space_map_cells_post
(
s
,
1
,
&
space_map_prepare
,
NULL
);
printf
(
"space_prepare: space_map_prepare took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
}
/* Store the condensed particle data. */
#pragma omp parallel for schedule(static)
for
(
k
=
0
;
k
<
s
->
nr_parts
;
k
++
)
{
s
->
cparts
[
k
].
x
[
0
]
=
s
->
parts
[
k
].
x
[
0
];
s
->
cparts
[
k
].
x
[
1
]
=
s
->
parts
[
k
].
x
[
1
];
s
->
cparts
[
k
].
x
[
2
]
=
s
->
parts
[
k
].
x
[
2
];
s
->
cparts
[
k
].
h
=
s
->
parts
[
k
].
h
;
s
->
cparts
[
k
].
dt
=
s
->
parts
[
k
].
dt
;
}
/* Now that we have the cell structre, re-build the tasks. */
//
tic = getticks();
tic
=
getticks
();
space_maketasks
(
s
,
1
);
//
printf( "space_prepare: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
printf
(
"space_prepare: maketasks took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
/* Count the number of each task type. */
tic
=
getticks
();
for
(
k
=
0
;
k
<=
task_type_count
;
k
++
)
counts
[
k
]
=
0
;
for
(
k
=
0
;
k
<
s
->
nr_tasks
;
k
++
)
...
...
@@ -229,6 +243,7 @@ void space_prepare ( struct space *s ) {
for
(
k
=
1
;
k
<
task_type_count
;
k
++
)
printf
(
" %s=%i"
,
taskID_names
[
k
]
,
counts
[
k
]
);
printf
(
" skipped=%i ]
\n
"
,
counts
[
task_type_count
]
);
fflush
(
stdout
);
printf
(
"space_prepare: task counting took %.3f ms.
\n
"
,
(
double
)(
getticks
()
-
tic
)
/
CPU_TPS
*
1000
);
}
...
...
@@ -762,6 +777,7 @@ void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct ce
}
/* Call the recursive function on all higher-level cells. */
#pragma omp parallel for schedule(dynamic,1)
for
(
i
=
0
;
i
<
s
->
nr_cells
;
i
++
)
rec_map
(
&
s
->
cells
[
i
]
);
...
...
@@ -798,6 +814,7 @@ void space_map_cells_post ( struct space *s , int full , void (*fun)( struct cel
}
/* Call the recursive function on all higher-level cells. */
#pragma omp parallel for schedule(dynamic,1)
for
(
i
=
0
;
i
<
s
->
nr_cells
;
i
++
)
rec_map
(
&
s
->
cells
[
i
]
);
...
...
@@ -825,6 +842,7 @@ void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell
}
/* Call the recursive function on all higher-level cells. */
#pragma omp parallel for schedule(dynamic,1)
for
(
i
=
0
;
i
<
s
->
nr_cells
;
i
++
)
rec_map
(
&
s
->
cells
[
i
]
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment