space.c 54.4 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
Pedro Gonnet's avatar
Pedro Gonnet committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
 * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <float.h>
#include <limits.h>
#include <math.h>

/* Local headers. */
#include "cycle.h"
34
#include "atomic.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
35
#include "lock.h"
36
#include "task.h"
37
#include "kernel.h"
38
39
#include "part.h"
#include "cell.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
40
41
#include "space.h"
#include "runner.h"
42
#include "error.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
43
44
45

/* Split size. */
int space_splitsize = space_splitsize_default;
46
int space_subsize = space_subsize_default;
Pedro Gonnet's avatar
Pedro Gonnet committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

/* Map shift vector to sortlist. */
const int sortlistID[27] = {
    /* ( -1 , -1 , -1 ) */   0 ,
    /* ( -1 , -1 ,  0 ) */   1 , 
    /* ( -1 , -1 ,  1 ) */   2 ,
    /* ( -1 ,  0 , -1 ) */   3 ,
    /* ( -1 ,  0 ,  0 ) */   4 , 
    /* ( -1 ,  0 ,  1 ) */   5 ,
    /* ( -1 ,  1 , -1 ) */   6 ,
    /* ( -1 ,  1 ,  0 ) */   7 , 
    /* ( -1 ,  1 ,  1 ) */   8 ,
    /* (  0 , -1 , -1 ) */   9 ,
    /* (  0 , -1 ,  0 ) */   10 , 
    /* (  0 , -1 ,  1 ) */   11 ,
    /* (  0 ,  0 , -1 ) */   12 ,
    /* (  0 ,  0 ,  0 ) */   0 , 
    /* (  0 ,  0 ,  1 ) */   12 ,
    /* (  0 ,  1 , -1 ) */   11 ,
    /* (  0 ,  1 ,  0 ) */   10 , 
    /* (  0 ,  1 ,  1 ) */   9 ,
    /* (  1 , -1 , -1 ) */   8 ,
    /* (  1 , -1 ,  0 ) */   7 , 
    /* (  1 , -1 ,  1 ) */   6 ,
    /* (  1 ,  0 , -1 ) */   5 ,
    /* (  1 ,  0 ,  0 ) */   4 , 
    /* (  1 ,  0 ,  1 ) */   3 ,
    /* (  1 ,  1 , -1 ) */   2 ,
    /* (  1 ,  1 ,  0 ) */   1 , 
    /* (  1 ,  1 ,  1 ) */   0 
    };
78
79
80
81
82
83
84
85
86
87
    
    
/**
 * @brief Mark tasks to be skipped and set the sort flags accordingly.
 * 
 * @return 1 if the space has to be rebuilt, 0 otherwise.
 */
 
int space_marktasks ( struct space *s ) {

88
89
    int k, nr_tasks = s->nr_tasks, *ind = s->tasks_ind;
    struct task *t, *tasks = s->tasks;
90
91
    float dt_step = s->dt_step;
    
92
93
    /* Run through the tasks and mark as skip or not. */
    for ( k = 0 ; k < nr_tasks ; k++ ) {
94
95
    
        /* Get a handle on the kth task. */
96
        t = &tasks[ ind[k] ];
97
        
98
99
        /* Sort-task? Note that due to the task ranking, the sorts
           will all come before the pairs and/or subs. */
100
101
102
103
        if ( t->type == task_type_sort ) {
        
            /* Re-set the flags. */
            t->flags = 0;
104
105
            t->skip = 1;
        
106
107
108
            }
        
        /* Single-cell task? */
109
        else if ( t->type == task_type_self ||
110
111
                  t->type == task_type_ghost ||
                ( t->type == task_type_sub && t->cj == NULL ) ) {
112
113
114
115
116
117
118
119
120
121
122
123
124
             
            /* Set this task's skip. */
            t->skip = ( t->ci->dt_min > dt_step );
            
            }
        
        /* Pair? */
        else if ( t->type == task_type_pair || ( t->type == task_type_sub && t->cj != NULL ) ) {
            
            /* Set this task's skip. */
            t->skip = ( t->ci->dt_min > dt_step && t->cj->dt_min > dt_step );
            
            /* Too much particle movement? */
125
            if ( t->tight &&
126
                 ( t->ci->h2dx_max > t->ci->dmin || t->cj->h2dx_max > t->cj->dmin ) )
Pedro Gonnet's avatar
Pedro Gonnet committed
127
                return 1;
128
129
                
            /* Set the sort flags. */
130
131
132
133
134
            if ( !t->skip && t->type == task_type_pair ) {
                t->ci->sorts->flags |= (1 << t->flags);
                t->ci->sorts->skip = 0;
                t->cj->sorts->flags |= (1 << t->flags);
                t->cj->sorts->skip = 0;
135
136
137
138
139
140
141
142
143
144
145
146
147
148
                }
                
            }
            
        /* None? */
        else if ( t->type == task_type_none )
            t->skip = 1;
            
        }
        
    /* All is well... */
    return 0;
    
    }
149
150
151
152
153
154
155
156
157
158
159
160


/**
 * @brief Check the integrity of the space and rebuild if necessary.
 *
 * @param s The #space.
 *
 * Runs through the tasks and marks those as "skip" which have no
 * effect for the current @c dt_max. Verifies the integrity of the
 * cell tree for those tasks and triggers a rebuild if necessary.
 */
 
Pedro Gonnet's avatar
Pedro Gonnet committed
161
int space_prepare ( struct space *s ) {
162

163
164
165
166
    int k, rebuild;
    // struct task *t;
    // float dt_step = s->dt_step;
    float dx_max = 0.0f;
167
    // int counts[ task_type_count + 1 ];
168
    ticks tic;
169
    
170
171
172
    /* Get the maximum displacement in the whole system. */
    for ( k = 0 ; k < s->nr_cells ; k++ )
        dx_max = fmaxf( dx_max , s->cells[k].dx_max );
173
    // printf( "space_prepare: dx_max is %e.\n" , dx_max );
174
    
175
    /* Run through the tasks and mark as skip or not. */
176
    // tic = getticks();
177
    rebuild = space_marktasks( s );
178
    // printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
179
180
        
    /* Did this not go through? */
181
    if ( rebuild ) {
182
183
    
        /* Re-build the space. */
184
        tic = getticks();
185
        space_rebuild( s , 0.0 );
186
        printf( "space_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
187
    
188
        /* Run through the tasks and mark as skip or not. */
189
        // tic = getticks();
Pedro Gonnet's avatar
Pedro Gonnet committed
190
191
        if ( space_marktasks( s ) )
            error( "space_marktasks failed after space_rebuild." );
192
        // printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
193
        
194
195
196
        }

    
Pedro Gonnet's avatar
Pedro Gonnet committed
197
198
199
    /* Let whoever cares know if we rebuilt. */
    return rebuild;
    
200
201
202
203
204
205
206
207
208
209
210
211
    }
    
    
/** 
 * @brief Sort the tasks in topological order over all queues.
 *
 * @param s The #space.
 */
 
void space_ranktasks ( struct space *s ) {

    int i, j = 0, k, temp, left = 0, rank;
212
213
    struct task *t, *tasks = s->tasks;
    int *tid = s->tasks_ind, nr_tasks = s->nr_tasks;
214
215
    
    /* Run throught the tasks and get all the waits right. */
216
    for ( i = 0 , k = 0 ; k < nr_tasks ; k++ ) {
217
        tid[k] = k;
218
219
        for ( j = 0 ; j < tasks[k].nr_unlock_tasks ; j++ )
            tasks[k].unlock_tasks[j]->wait += 1;
220
221
222
        }
        
    /* Main loop. */
223
    for ( j = 0 , rank = 0 ; left < nr_tasks ; rank++ ) {
224
225
        
        /* Load the tids of tasks with no waits. */
226
227
        for ( k = left ; k < nr_tasks ; k++ )
            if ( tasks[ tid[k] ].wait == 0 ) {
228
229
230
                temp = tid[j]; tid[j] = tid[k]; tid[k] = temp;
                j += 1;
                }
231
232
233
234
                
        /* Did we get anything? */
        if ( j == left )
            error( "Unsatisfiable task dependencies detected." );
235

236
        /* Unlock the next layer of tasks. */
237
        for ( i = left ; i < j ; i++ ) {
238
            t = &tasks[ tid[i] ];
239
            t->rank = rank;
240
241
242
            tid[i] = t - tasks;
            if ( tid[i] >= nr_tasks )
                error( "Task index overshoot." );
243
244
245
246
247
248
249
250
251
252
253
254
255
256
            /* printf( "engine_ranktasks: task %i of type %s has rank %i.\n" , i , 
                (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */
            for ( k = 0 ; k < t->nr_unlock_tasks ; k++ )
                t->unlock_tasks[k]->wait -= 1;
            }
            
        /* The new left (no, not tony). */
        left = j;
            
        }
        
    }


257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/**
 * @brief Get the shift-id of the given pair of cells, swapping them
 *      if need be.
 *
 * @param s The space
 * @param ci Pointer to first #cell.
 * @param cj Pointer second #cell.
 * @param shift Vector from ci to cj.
 *
 * @return The shift ID and set shift, may or may not swap ci and cj.
 */
 
int space_getsid ( struct space *s , struct cell **ci , struct cell **cj , double *shift ) {

    int k, sid = 0;
    struct cell *temp;
    double dx[3];

    /* Get the relative distance between the pairs, wrapping. */
    for ( k = 0 ; k < 3 ; k++ ) {
        dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
        if ( dx[k] < -s->dim[k]/2 )
            shift[k] = s->dim[k];
        else if ( dx[k] > s->dim[k]/2 )
            shift[k] = -s->dim[k];
        else
            shift[k] = 0.0;
        dx[k] += shift[k];
        }
        
    /* Get the sorting index. */
    for ( k = 0 ; k < 3 ; k++ )
        sid = 3*sid + ( (dx[k] < 0.0) ? 0 : ( (dx[k] > 0.0) ? 2 : 1 ) );

    /* Switch the cells around? */
    if ( runner_flip[sid] ) {
        temp = *ci; *ci = *cj; *cj = temp;
        for ( k = 0 ; k < 3 ; k++ )
            shift[k] = -shift[k];
        }
    sid = sortlistID[sid];
    
    /* Return the sort ID. */
    return sid;

    }


305
/**
306
 * @brief Recursively dismantle a cell tree.
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
 *
 */
 
void space_rebuild_recycle ( struct space *s , struct cell *c ) {
    
    int k;
    
    if ( c->split )
        for ( k = 0 ; k < 8 ; k++ )
            if ( c->progeny[k] != NULL ) {
                space_rebuild_recycle( s , c->progeny[k] );
                space_recycle( s , c->progeny[k] );
                c->progeny[k] = NULL;
                }
    
    }

/**
325
 * @brief Re-build the cells as well as the tasks.
326
327
 *
 * @param s The #space in which to update the cells.
328
 * @param cell_max Maximal cell size.
329
330
331
 *
 */
 
332
void space_rebuild ( struct space *s , double cell_max ) {
333

334
    float h_max = s->cell_min / kernel_gamma, dmin;
335
    int i, j, k, cdim[3], nr_parts = s->nr_parts;
336
    struct cell *restrict c;
337
338
    struct part *restrict finger, *restrict p, *parts = s->parts;
    struct cpart *restrict cfinger;
339
    int *ind;
340
    // ticks tic;
341
    
342
343
344
    /* Be verbose about this. */
    printf( "space_rebuild: (re)building space...\n" ); fflush(stdout);
    
345
    /* Run through the parts and get the current h_max. */
346
    // tic = getticks();
347
348
349
350
351
352
353
354
355
356
357
358
    if ( s->cells != NULL ) {
        for ( k = 0 ; k < s->nr_cells ; k++ ) {
            if ( s->cells[k].h_max > h_max )
                h_max = s->cells[k].h_max;
            }
        }
    else {
        for ( k = 0 ; k < nr_parts ; k++ ) {
            if ( s->parts[k].h > h_max )
                h_max = s->parts[k].h;
            }
        s->h_max = h_max;
359
        }
360
    // printf( "space_rebuild: h_max is %.3e (cell_max=%.3e).\n" , h_max , cell_max );
361
    // printf( "space_rebuild: getting h_min and h_max took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
362
363
364
    
    /* Get the new putative cell dimensions. */
    for ( k = 0 ; k < 3 ; k++ )
365
        cdim[k] = floor( s->dim[k] / fmax( h_max*kernel_gamma*space_stretch , cell_max ) );
366
367
        
    /* Do we need to re-build the upper-level cells? */
368
    // tic = getticks();
369
370
    if ( s->cells == NULL ||
         cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2] ) {
371
372
373
    
        /* Free the old cells, if they were allocated. */
        if ( s->cells != NULL ) {
374
            for ( k = 0 ; k < s->nr_cells ; k++ ) {
375
                space_rebuild_recycle( s , &s->cells[k] );
376
377
378
                if ( s->cells[k].sort != NULL )
                    free( s->cells[k].sort );
                }
379
380
381
382
            free( s->cells );
            s->maxdepth = 0;
            }
            
383
        /* Set the new cell dimensions only if smaller. */
384
385
386
387
388
        for ( k = 0 ; k < 3 ; k++ ) {
            s->cdim[k] = cdim[k];
            s->h[k] = s->dim[k] / cdim[k];
            s->ih[k] = 1.0 / s->h[k];
            }
389
        dmin = fminf( s->h[0] , fminf( s->h[1] , s->h[2] ) );
390

391
        /* Allocate the highest level of cells. */
Pedro Gonnet's avatar
Pedro Gonnet committed
392
        s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
393
394
395
396
397
398
399
400
401
402
403
404
405
406
        if ( posix_memalign( (void *)&s->cells , 64 , s->nr_cells * sizeof(struct cell) ) != 0 )
            error( "Failed to allocate cells." );
        bzero( s->cells , s->nr_cells * sizeof(struct cell) );
        for ( k = 0 ; k < s->nr_cells ; k++ )
            if ( lock_init( &s->cells[k].lock ) != 0 )
                error( "Failed to init spinlock." );

        /* Set the cell location and sizes. */
        for ( i = 0 ; i < cdim[0] ; i++ )
            for ( j = 0 ; j < cdim[1] ; j++ )
                for ( k = 0 ; k < cdim[2] ; k++ ) {
                    c = &s->cells[ cell_getid( cdim , i , j , k ) ];
                    c->loc[0] = i*s->h[0]; c->loc[1] = j*s->h[1]; c->loc[2] = k*s->h[2];
                    c->h[0] = s->h[0]; c->h[1] = s->h[1]; c->h[2] = s->h[2];
407
                    c->dmin = dmin;
408
                    c->depth = 0;
409
                    lock_init( &c->lock );
410
                    }
411
412
413
           
        /* Be verbose about the change. */         
        printf( "space_rebuild: set cell dimensions to [ %i %i %i ].\n" , cdim[0] , cdim[1] , cdim[2] ); fflush(stdout);
414
415
                    
        } /* re-build upper-level cells? */
416
    // printf( "space_rebuild: rebuilding upper-level cells took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
417
        
Pedro Gonnet's avatar
Pedro Gonnet committed
418
419
420
421
422
423
424
425
426
427
    /* Otherwise, just clean up the cells. */
    else {
    
        /* Free the old cells, if they were allocated. */
        for ( k = 0 ; k < s->nr_cells ; k++ ) {
            space_rebuild_recycle( s , &s->cells[k] );
            s->cells[k].sorts = NULL;
            s->cells[k].nr_tasks = 0;
            s->cells[k].nr_density = 0;
            s->cells[k].dx_max = 0.0f;
428
429
            s->cells[k].h2dx_max = 0.0f;
            s->cells[k].sorted = 0;
Pedro Gonnet's avatar
Pedro Gonnet committed
430
431
432
433
            }
        s->maxdepth = 0;
    
        }
434
435
        
    /* Run through the particles and get their cell index. */
436
    // tic = getticks();
437
438
    if ( ( ind = (int *)malloc( sizeof(int) * s->nr_parts ) ) == NULL )
        error( "Failed to allocate temporary particle indices." );
439
440
    for ( k = 0 ; k < s->nr_cells ; k++ )
        s->cells[ k ].count = 0;
441
442
443
    #pragma omp parallel for private(p,j)
    for ( k = 0 ; k < nr_parts ; k++ )  {
        p = &parts[k];
444
445
446
447
448
449
        for ( j = 0 ; j < 3 ; j++ )
            if ( p->x[j] < 0.0 )
                p->x[j] += s->dim[j];
            else if ( p->x[j] >= s->dim[j] )
                p->x[j] -= s->dim[j];
        ind[k] = cell_getid( s->cdim , p->x[0]*s->ih[0] , p->x[1]*s->ih[1] , p->x[2]*s->ih[2] );
450
        atomic_inc( &s->cells[ ind[k] ].count );
451
        }
452
    // printf( "space_rebuild: getting particle indices took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
453
454

    /* Sort the parts according to their cells. */
455
    // tic = getticks();
456
    parts_sort( parts , ind , s->nr_parts , 0 , s->nr_cells );
457
    // printf( "space_rebuild: parts_sort took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
458
459
460
    
    /* We no longer need the indices as of here. */
    free( ind );    
461
462

    /* Hook the cells up to the parts. */
463
    // tic = getticks();
464
465
466
    finger = s->parts;
    cfinger = s->cparts;
    for ( k = 0 ; k < s->nr_cells ; k++ ) {
467
468
469
470
471
472
        c = &s->cells[ k ];
        c->parts = finger;
        c->cparts = cfinger;
        finger = &finger[ c->count ];
        cfinger = &cfinger[ c->count ];
        }
473
    // printf( "space_rebuild: hooking up cells took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
474
        
475
476
    /* At this point, we have the upper-level cells, old or new. Now make
       sure that the parts in each cell are ok. */
477
    // tic = getticks();
478
    #pragma omp parallel for schedule(dynamic,1) shared(s)
479
    for ( k = 0 ; k < s->nr_cells ; k++ )
Pedro Gonnet's avatar
Pedro Gonnet committed
480
        space_split( s , &s->cells[k] );
481
    // printf( "space_rebuild: space_rebuild_recurse took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
482
        
483
    /* Now that we have the cell structre, re-build the tasks. */
484
    // tic = getticks();
485
    space_maketasks( s , 1 );
486
    // printf( "space_rebuild: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
487
    
488
489
490
    }


491
/**
492
 * @brief Sort the particles and condensed particles according to the given indices.
493
494
495
496
497
498
499
500
501
502
 *
 * @param parts The list of #part
 * @param ind The indices with respect to which the parts are sorted.
 * @param N The number of parts
 * @param min Lowest index.
 * @param max highest index.
 *
 * This function calls itself recursively.
 */
 
503
void parts_sort_rec ( struct part *parts , int *ind , int N , int min , int max ) {
504
505
506
507
508
509

    int pivot = (min + max) / 2;
    int i = 0, j = N-1;
    int temp_i;
    struct part temp_p;
    
510
511
512
513
514
515
    /* If N is small enough, just do insert sort. */
    if ( N < 16 ) {
    
        for ( i = 1 ; i < N ; i++ )
            if ( ind[i] < ind[i-1] ) {
                temp_i = ind[i];
Pedro Gonnet's avatar
Pedro Gonnet committed
516
                temp_p = parts[i];
517
518
519
520
521
522
523
524
                for ( j = i ; j > 0 && ind[j-1] > temp_i ; j-- ) {
                    ind[j] = ind[j-1];
                    parts[j] = parts[j-1];
                    }
                ind[j] = temp_i;
                parts[j] = temp_p;
                }
    
525
526
        }
        
527
528
529
530
531
532
533
534
535
536
537
538
539
    /* Otherwise, recurse with Quicksort. */
    else {
    
        /* One pass of quicksort. */
        while ( i < j ) {
            while ( i < N && ind[i] <= pivot )
                i++;
            while ( j >= 0 && ind[j] > pivot )
                j--;
            if ( i < j ) {
                temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i;
                temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p;
                }
540
            }
541
542

        /* Verify sort. */
Pedro Gonnet's avatar
Pedro Gonnet committed
543
        /* for ( int k = 0 ; k <= j ; k++ )
544
545
546
547
548
549
550
551
            if ( ind[k] > pivot ) {
                printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
                error( "Sorting failed (<=pivot)." );
                }
        for ( int k = j+1 ; k < N ; k++ )
            if ( ind[k] <= pivot ) {
                printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
                error( "Sorting failed (>pivot)." );
Pedro Gonnet's avatar
Pedro Gonnet committed
552
                } */
553

Pedro Gonnet's avatar
Pedro Gonnet committed
554
        /* Recurse on the left? */
555
556
        if ( j > 0  && pivot > min ) {
            #pragma omp task untied
Pedro Gonnet's avatar
Pedro Gonnet committed
557
            parts_sort( parts , ind , j+1 , min , pivot );
558
            }
559

Pedro Gonnet's avatar
Pedro Gonnet committed
560
        /* Recurse on the right? */
561
562
        if ( i < N && pivot+1 < max ) {
            #pragma omp task untied
Pedro Gonnet's avatar
Pedro Gonnet committed
563
            parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
564
            }
565
            
566
567
        }
    
568
569
570
    }


571
572
573
574
575
576
577
578
579
580
581
582
void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) {

    /* Call the first sort as an OpenMP task. */
    #pragma omp parallel
    {
        #pragma omp single nowait
        parts_sort_rec( parts , ind , N , min , max );
    }
    
    }


Pedro Gonnet's avatar
Pedro Gonnet committed
583
/**
584
 * @brief Mapping function to free the sorted indices buffers.
Pedro Gonnet's avatar
Pedro Gonnet committed
585
586
587
588
589
590
591
592
593
594
595
596
 */

void space_map_clearsort ( struct cell *c , void *data ) {

    if ( c->sort != NULL ) {
        free( c->sort );
        c->sort = NULL;
        }

    }


597
598
/**
 * @brief Mapping function to append a ghost task to each cell.
Pedro Gonnet's avatar
Pedro Gonnet committed
599
600
601
602
 *
 * Looks for the super cell, e.g. the highest-level cell above each
 * cell for which a pair is defined. All ghosts below this cell will
 * depend on the ghost of their parents (sounds spooky, but it isn't).
603
604
 *
 * A kick2-task is appended to each super cell.
605
606
607
608
609
 */

void space_map_mkghosts ( struct cell *c , void *data ) {

    struct space *s = (struct space *)data;
Pedro Gonnet's avatar
Pedro Gonnet committed
610
    struct cell *finger;
611

Pedro Gonnet's avatar
Pedro Gonnet committed
612
613
614
615
616
617
618
    /* Find the super cell, i.e. the highest cell hierarchically above
       this one to still have at least one task associated with it. */
    c->super = c;
    for ( finger = c->parent ; finger != NULL ; finger = finger->parent )
        if ( finger->nr_tasks > 0 )
            c->super = finger;
            
Pedro Gonnet's avatar
Pedro Gonnet committed
619
    /* Make the ghost task */
Pedro Gonnet's avatar
Pedro Gonnet committed
620
    if ( c->super != c || c->nr_tasks > 0 )
621
        c->ghost = space_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , 0 );
Pedro Gonnet's avatar
Pedro Gonnet committed
622

623
624
625
626
    /* Append a kick task if we are the active super cell. */
    if ( c->super == c && c->nr_tasks > 0 )
        c->kick2 = space_addtask( s , task_type_kick2 , task_subtype_none , 0 , 0 , c , NULL , 0 );
    
Pedro Gonnet's avatar
Pedro Gonnet committed
627
628
629
630
    /* If we are not the super cell ourselves, make our ghost depend
       on our parent cell. */
    if ( c->super != c )
        task_addunlock( c->parent->ghost , c->ghost );
631
        
Pedro Gonnet's avatar
Pedro Gonnet committed
632
633
634
    }


Pedro Gonnet's avatar
Pedro Gonnet committed
635
636
637
638
/**
 * @brief Map a function to all particles in a aspace.
 *
 * @param s The #space we are working in.
639
640
 * @param fun Function pointer to apply on the cells.
 * @param data Data passed to the function fun.
Pedro Gonnet's avatar
Pedro Gonnet committed
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
 */
 
void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct cell *c , void *data ) , void *data ) {

    int i;

    void rec_map ( struct cell *c ) {
    
        int k;
        
        /* No progeny? */
        if ( !c->split )
            for ( k = 0 ; k < c->count ; k++ )
                fun( &c->parts[k] , c , data );
                
        /* Otherwise, recurse. */
        else
            for ( k = 0 ; k < 8 ; k++ )
                if ( c->progeny[k] != NULL )
                    rec_map( c->progeny[k] );
                
        }
        
    /* Call the recursive function on all higher-level cells. */
665
    #pragma omp parallel for schedule(dynamic,1)
Pedro Gonnet's avatar
Pedro Gonnet committed
666
667
668
669
670
671
672
673
674
675
    for ( i = 0 ; i < s->nr_cells ; i++ )
        rec_map( &s->cells[i] );

    }


/**
 * @brief Map a function to all particles in a aspace.
 *
 * @param s The #space we are working in.
676
 * @param full Map to all cells, including cells with sub-cells.
677
678
 * @param fun Function pointer to apply on the cells.
 * @param data Data passed to the function fun.
Pedro Gonnet's avatar
Pedro Gonnet committed
679
680
 */
 
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
void space_map_cells_post ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data ) {

    int i;

    void rec_map ( struct cell *c ) {
    
        int k;
        
        /* Recurse. */
        if ( c->split )
            for ( k = 0 ; k < 8 ; k++ )
                if ( c->progeny[k] != NULL )
                    rec_map( c->progeny[k] );
                
        /* No progeny? */
        if ( full || !c->split )
            fun( c , data );
                
        }
        
    /* Call the recursive function on all higher-level cells. */
702
    #pragma omp parallel for schedule(dynamic,1)
703
704
705
706
707
708
709
    for ( i = 0 ; i < s->nr_cells ; i++ )
        rec_map( &s->cells[i] );

    }


void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data ) {
Pedro Gonnet's avatar
Pedro Gonnet committed
710
711
712
713
714
715
716
717

    int i;

    void rec_map ( struct cell *c ) {
    
        int k;
        
        /* No progeny? */
718
        if ( full || !c->split )
Pedro Gonnet's avatar
Pedro Gonnet committed
719
720
            fun( c , data );
                
721
722
        /* Recurse. */
        if ( c->split )
Pedro Gonnet's avatar
Pedro Gonnet committed
723
724
725
726
727
728
729
            for ( k = 0 ; k < 8 ; k++ )
                if ( c->progeny[k] != NULL )
                    rec_map( c->progeny[k] );
                
        }
        
    /* Call the recursive function on all higher-level cells. */
730
    #pragma omp parallel for schedule(dynamic,1)
Pedro Gonnet's avatar
Pedro Gonnet committed
731
732
733
734
735
736
737
738
739
740
    for ( i = 0 ; i < s->nr_cells ; i++ )
        rec_map( &s->cells[i] );

    }


/**
 * @brief Add a #task to the #space.
 *
 * @param s The #space we are working in.
741
742
743
744
745
746
747
 * @param type The type of the task.
 * @param subtype The sub-type of the task.
 * @param flags The flags of the task.
 * @param wait 
 * @param ci The first cell to interact.
 * @param cj The second cell to interact.
 * @param tight
Pedro Gonnet's avatar
Pedro Gonnet committed
748
749
 */
 
750
struct task *space_addtask ( struct space *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight ) {
Pedro Gonnet's avatar
Pedro Gonnet committed
751

752
    int ind;
753
754
755
    struct task *t;
    
    /* Get the next free task. */
756
757
    ind = atomic_inc( &s->tasks_next );
    t = &s->tasks[ ind ];
Pedro Gonnet's avatar
Pedro Gonnet committed
758
759
760
    
    /* Copy the data. */
    t->type = type;
761
    t->subtype = subtype;
Pedro Gonnet's avatar
Pedro Gonnet committed
762
763
764
765
    t->flags = flags;
    t->wait = wait;
    t->ci = ci;
    t->cj = cj;
766
    t->skip = 0;
767
    t->tight = tight;
768
    t->nr_unlock_tasks = 0;
Pedro Gonnet's avatar
Pedro Gonnet committed
769
    
770
771
772
773
    /* Init the lock. */
    lock_init( &t->lock );
    
    /* Add an index for it. */
774
775
    lock_lock( &s->lock );
    s->tasks_ind[ s->nr_tasks ] = ind;
Pedro Gonnet's avatar
Pedro Gonnet committed
776
    s->nr_tasks += 1;
777
778
    lock_unlock_blind( &s->lock );
    
Pedro Gonnet's avatar
Pedro Gonnet committed
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
    /* Return a pointer to the new task. */
    return t;

    }



/**
 * @brief Split tasks that may be too large.
 *
 * @param s The #space we are working in.
 */
 
void space_splittasks ( struct space *s ) {

794
    int j, k, ind, sid, tid = 0, redo;
Pedro Gonnet's avatar
Pedro Gonnet committed
795
796
    struct cell *ci, *cj;
    double hi, hj, shift[3];
797
    struct task *t, *t_old;
798
    // float dt_step = s->dt_step;
799
800
801
802
803
    int pts[7][8] = { { -1 , 12 , 10 ,  9 ,  4 ,  3 ,  1 ,  0 } ,
                      { -1 , -1 , 11 , 10 ,  5 ,  4 ,  2 ,  1 } ,
                      { -1 , -1 , -1 , 12 ,  7 ,  6 ,  4 ,  3 } , 
                      { -1 , -1 , -1 , -1 ,  8 ,  7 ,  5 ,  4 } ,
                      { -1 , -1 , -1 , -1 , -1 , 12 , 10 ,  9 } ,
804
805
                      { -1 , -1 , -1 , -1 , -1 , -1 , 11 , 10 } ,
                      { -1 , -1 , -1 , -1 , -1 , -1 , -1 , 12 } };
Pedro Gonnet's avatar
Pedro Gonnet committed
806
807

    /* Loop through the tasks... */
Pedro Gonnet's avatar
Pedro Gonnet committed
808
    // #pragma omp parallel default(none) shared(s,tid,pts,space_subsize) private(ind,j,k,t,t_old,redo,ci,cj,hi,hj,sid,shift)
809
810
    {
    redo = 0; t_old = t = NULL;
Pedro Gonnet's avatar
Pedro Gonnet committed
811
    while ( 1 ) {
Pedro Gonnet's avatar
Pedro Gonnet committed
812
813
    
        /* Get a pointer on the task. */
814
815
816
817
818
        if ( redo ) {
            redo = 0;
            t = t_old;
            }
        else {
Pedro Gonnet's avatar
Pedro Gonnet committed
819
820
821
            if ( ( ind = atomic_inc( &tid ) ) < s->nr_tasks )
                t_old = t = &s->tasks[ s->tasks_ind[ ind ] ];
            else
822
823
                break;
            }
824
        
825
826
827
828
829
830
831
        /* Empty task? */
        if ( t->ci == NULL || ( t->type == task_type_pair && t->cj == NULL ) ) {
            t->type = task_type_none;
            t->skip = 1;
            continue;
            }
        
832
833
834
835
836
        /* Self-interaction? */
        if ( t->type == task_type_self ) {
        
            /* Get a handle on the cell involved. */
            ci = t->ci;
Pedro Gonnet's avatar
Pedro Gonnet committed
837
            
838
            /* Ingore this task? */
839
            /* if ( ci->dt_min > dt_step ) {
840
                t->skip = 1;
841
                continue;
842
                } */
843
            
844
            /* Is this cell even split? */
Pedro Gonnet's avatar
Pedro Gonnet committed
845
            if ( ci->split ) {
Pedro Gonnet's avatar
Pedro Gonnet committed
846
            
847
            /* Make a sub? */
848
            if ( space_dosub && ci->count < space_subsize ) {
849
850
851
852
853
854
855
856
857
858
            
                /* convert to a self-subtask. */
                t->type = task_type_sub;
                
                }
                
            /* Otherwise, make tasks explicitly. */
            else {
            
                /* Take a step back (we're going to recycle the current task)... */
859
                redo = 1;
860
861
862
863
864
865

                /* Add the self taks. */
                for ( k = 0 ; ci->progeny[k] == NULL ; k++ );
                t->ci = ci->progeny[k];
                for ( k += 1 ; k < 8 ; k++ )
                    if ( ci->progeny[k] != NULL )
866
                        space_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci->progeny[k] , NULL , 0 );
867
868
869
            
                /* Make a task for each pair of progeny. */
                for ( j = 0 ; j < 8 ; j++ )
870
                    if ( ci->progeny[j] != NULL )
871
                        for ( k = j + 1 ; k < 8 ; k++ )
872
                            if ( ci->progeny[k] != NULL )
873
                                space_addtask( s , task_type_pair , task_subtype_density , pts[j][k] , 0 , ci->progeny[j] , ci->progeny[k] , 0 );
Pedro Gonnet's avatar
Pedro Gonnet committed
874
                }
Pedro Gonnet's avatar
Pedro Gonnet committed
875
876

                }
877
878
879
880
881
882
883
884
885
        
            }
    
        /* Pair interaction? */
        else if ( t->type == task_type_pair ) {
            
            /* Get a handle on the cells involved. */
            ci = t->ci;
            cj = t->cj;
886
887
            hi = ci->dmin;
            hj = cj->dmin;
888

889
            /* Ingore this task? */
890
            /* if ( ci->dt_min > dt_step && cj->dt_min > dt_step ) {
891
                t->skip = 1;
892
                continue;
893
                } */
894
895
896
897
898
            
            /* Get the sort ID, use space_getsid and not t->flags
               to make sure we get ci and cj swapped if needed. */
            sid = space_getsid( s , &ci , &cj , shift );
                
899
900
            /* Should this task be split-up? */
            if ( ci->split && cj->split &&
901
902
                 ci->h_max*kernel_gamma*space_stretch < hi/2 &&
                 cj->h_max*kernel_gamma*space_stretch < hj/2 ) {
903
904
                 
                /* Replace by a single sub-task? */
905
906
                if ( space_dosub &&
                     ci->count < space_subsize && cj->count < space_subsize &&
907
                     sid != 0 && sid != 2 && sid != 6 && sid != 8 ) {
Pedro Gonnet's avatar
Pedro Gonnet committed
908
                
909
910
                    /* Make this task a sub task. */
                    t->type = task_type_sub;
Pedro Gonnet's avatar
Pedro Gonnet committed
911

912
                    }
Pedro Gonnet's avatar
Pedro Gonnet committed
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
                    
                /* Otherwise, split it. */
                else {

                    /* Take a step back (we're going to recycle the current task)... */
                    redo = 1;

                    /* For each different sorting type... */
                    switch ( sid ) {

                        case 0: /* (  1 ,  1 ,  1 ) */
                            t->ci = ci->progeny[7]; t->cj = cj->progeny[0]; t->flags = 0;
                            break;

                        case 1: /* (  1 ,  1 ,  0 ) */
                            t->ci = ci->progeny[6]; t->cj = cj->progeny[0]; t->flags = 1; t->tight = 1;
                            t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[7] , cj->progeny[1] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
                            break;

                        case 2: /* (  1 ,  1 , -1 ) */
                            t->ci = ci->progeny[6]; t->cj = cj->progeny[1]; t->flags = 2; t->tight = 1;
                            break;

                        case 3: /* (  1 ,  0 ,  1 ) */
                            t->ci = ci->progeny[5]; t->cj = cj->progeny[0]; t->flags = 3; t->tight = 1;
                            t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[7] , cj->progeny[2] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
                            break;

                        case 4: /* (  1 ,  0 ,  0 ) */
                            t->ci = ci->progeny[4]; t->cj = cj->progeny[0]; t->flags = 4; t->tight = 1;
                            t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[4] , cj->progeny[1] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[5] , cj->progeny[1] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[4] , cj->progeny[2] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[6] , cj->progeny[2] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[3] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[5] , cj->progeny[3] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[6] , cj->progeny[3] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[7] , cj->progeny[3] , 1 );
                            break;

                        case 5: /* (  1 ,  0 , -1 ) */
                            t->ci = ci->progeny[4]; t->cj = cj->progeny[1]; t->flags = 5; t->tight = 1;
                            t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[6] , cj->progeny[3] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[4] , cj->progeny[3] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
                            break;

                        case 6: /* (  1 , -1 ,  1 ) */
                            t->ci = ci->progeny[5]; t->cj = cj->progeny[2]; t->flags = 6; t->tight = 1;
                            break;

                        case 7: /* (  1 , -1 ,  0 ) */
                            t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 6; t->tight = 1;
                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[4] , cj->progeny[2] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[5] , cj->progeny[3] , 1 );
                            break;

                        case 8: /* (  1 , -1 , -1 ) */
                            t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 8; t->tight = 1;
                            break;

                        case 9: /* (  0 ,  1 ,  1 ) */
                            t->ci = ci->progeny[3]; t->cj = cj->progeny[0]; t->flags = 9; t->tight = 1;
                            t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[7] , cj->progeny[4] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[4] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
                            break;

                        case 10: /* (  0 ,  1 ,  0 ) */
                            t->ci = ci->progeny[2]; t->cj = cj->progeny[0]; t->flags = 10; t->tight = 1;
                            t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[2] , cj->progeny[1] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[3] , cj->progeny[1] , 1 );
                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
For faster browsing, not all history is shown. View entire blame