scheduler.c 54.1 KB
Newer Older
1
2
/*******************************************************************************
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
5
 *               2016 Peter W. Draper (p.w.draper@durham.ac.uk)
6
 *
7
8
9
10
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
11
 *
12
13
14
15
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
16
 *
17
18
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
 *
20
21
22
23
24
25
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
26
27
28
#include <limits.h>
#include <math.h>
#include <pthread.h>
29
30
31
32
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

33
34
/* MPI headers. */
#ifdef WITH_MPI
35
#include <mpi.h>
36
37
#endif

38
39
40
/* This object's header. */
#include "scheduler.h"

41
42
/* Local headers. */
#include "atomic.h"
43
#include "cycle.h"
44
#include "engine.h"
45
#include "error.h"
46
#include "intrinsics.h"
47
#include "kernel_hydro.h"
48
#include "queue.h"
49
#include "sort_part.h"
50
51
#include "space.h"
#include "task.h"
52
#include "timers.h"
53

54
55
56
57
58
/**
 * @brief Re-set the list of active tasks.
 */
void scheduler_clear_active(struct scheduler *s) { s->active_count = 0; }

59
60
61
62
63
64
65
/**
 * @brief Add an unlock_task to the given task.
 *
 * @param s The #scheduler.
 * @param ta The unlocking #task.
 * @param tb The #task that will be unlocked.
 */
66
67
void scheduler_addunlock(struct scheduler *s, struct task *ta,
                         struct task *tb) {
68
69
70
71
72
#ifdef SWIFT_DEBUG_CHECKS
  if (ta == NULL) error("Unlocking task is NULL.");
  if (tb == NULL) error("Unlocked task is NULL.");
#endif

73
74
75
76
77
  /* Get an index at which to store this unlock. */
  const int ind = atomic_inc(&s->nr_unlocks);

  /* Does the buffer need to be grown? */
  if (ind == s->size_unlocks) {
78
    /* Allocate the new buffer. */
79
80
81
    struct task **unlocks_new;
    int *unlock_ind_new;
    const int size_unlocks_new = s->size_unlocks * 2;
82
83
    if ((unlocks_new = (struct task **)malloc(sizeof(struct task *) *
                                              size_unlocks_new)) == NULL ||
84
85
        (unlock_ind_new = (int *)malloc(sizeof(int) * size_unlocks_new)) ==
            NULL)
86
      error("Failed to re-allocate unlocks.");
87

88
    /* Wait for all writes to the old buffer to complete. */
89
90
91
    while (s->completed_unlock_writes < ind)
      ;

92
    /* Copy the buffers. */
93
94
95
96
97
98
    memcpy(unlocks_new, s->unlocks, sizeof(struct task *) * ind);
    memcpy(unlock_ind_new, s->unlock_ind, sizeof(int) * ind);
    free(s->unlocks);
    free(s->unlock_ind);
    s->unlocks = unlocks_new;
    s->unlock_ind = unlock_ind_new;
99

100
    /* Publish the new buffer size. */
101
102
    s->size_unlocks = size_unlocks_new;
  }
103

104
  /* Wait for there to actually be space at my index. */
105
106
  while (ind > s->size_unlocks)
    ;
107
108
109
110

  /* Write the unlock to the scheduler. */
  s->unlocks[ind] = tb;
  s->unlock_ind[ind] = ta - s->tasks;
111
  atomic_inc(&s->completed_unlock_writes);
112
113
}

114
/**
115
 * @brief Split a hydrodynamic task if too large.
116
 *
117
118
 * @param t The #task
 * @param s The #scheduler we are working in.
119
 */
120
static void scheduler_splittask_hydro(struct task *t, struct scheduler *s) {
121

122
123
124
  /* Iterate on this task until we're done with it. */
  int redo = 1;
  while (redo) {
125

126
127
    /* Reset the redo flag. */
    redo = 0;
128

129
    /* Non-splittable task? */
130
131
    if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL) ||
        t->ci->count == 0 || (t->cj != NULL && t->cj->count == 0)) {
132
      t->type = task_type_none;
133
134
      t->subtype = task_subtype_none;
      t->cj = NULL;
135
136
137
      t->skip = 1;
      break;
    }
138

139
140
    /* Self-interaction? */
    if (t->type == task_type_self) {
141

142
143
144
145
146
      /* Get a handle on the cell involved. */
      struct cell *ci = t->ci;

      /* Foreign task? */
      if (ci->nodeID != s->nodeID) {
147
        t->skip = 1;
148
        break;
149
150
      }

151
      /* Is this cell even split and the task does not violate h ? */
152
      if (cell_can_split_self_task(ci)) {
153

154
        /* Make a sub? */
155
        if (scheduler_dosub && ci->count < space_subsize_self) {
156

157
158
159
160
161
          /* convert to a self-subtask. */
          t->type = task_type_sub_self;

          /* Otherwise, make tasks explicitly. */
        } else {
162

163
164
165
166
167
168
169
170
          /* Take a step back (we're going to recycle the current task)... */
          redo = 1;

          /* Add the self tasks. */
          int first_child = 0;
          while (ci->progeny[first_child] == NULL) first_child++;
          t->ci = ci->progeny[first_child];
          for (int k = first_child + 1; k < 8; k++)
171
            if (ci->progeny[k] != NULL && ci->progeny[k]->count)
172
              scheduler_splittask_hydro(
173
                  scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
174
                                    ci->progeny[k], NULL),
175
176
                  s);

177
178
          /* Make a task for each pair of progeny */
          for (int j = 0; j < 8; j++)
179
            if (ci->progeny[j] != NULL && ci->progeny[j]->count)
180
              for (int k = j + 1; k < 8; k++)
181
                if (ci->progeny[k] != NULL && ci->progeny[k]->count)
182
183
                  scheduler_splittask_hydro(
                      scheduler_addtask(s, task_type_pair, t->subtype,
184
                                        sub_sid_flag[j][k], 0, ci->progeny[j],
185
186
                                        ci->progeny[k]),
                      s);
187
        }
188
      } /* Cell is split */
189

190
    } /* Self interaction */
191

192
193
    /* Pair interaction? */
    else if (t->type == task_type_pair) {
194

195
196
197
      /* Get a handle on the cells involved. */
      struct cell *ci = t->ci;
      struct cell *cj = t->cj;
198

199
200
201
202
203
      /* Foreign task? */
      if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) {
        t->skip = 1;
        break;
      }
204

205
206
207
      /* Get the sort ID, use space_getsid and not t->flags
         to make sure we get ci and cj swapped if needed. */
      double shift[3];
Matthieu Schaller's avatar
Matthieu Schaller committed
208
      const int sid = space_getsid(s->space, &ci, &cj, shift);
209

210
      /* Should this task be split-up? */
211
      if (cell_can_split_pair_task(ci) && cell_can_split_pair_task(cj)) {
212
213

        /* Replace by a single sub-task? */
214
215
        if (scheduler_dosub && /* Use division to avoid integer overflow. */
            ci->count * sid_scale[sid] < space_subsize_pair / cj->count &&
216
            !sort_is_corner(sid)) {
217
218
219
220
221

          /* Make this task a sub task. */
          t->type = task_type_sub_pair;

          /* Otherwise, split it. */
222
223
        } else {

224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
          /* Take a step back (we're going to recycle the current task)... */
          redo = 1;

          /* For each different sorting type... */
          switch (sid) {

            case 0: /* (  1 ,  1 ,  1 ) */
              t->ci = ci->progeny[7];
              t->cj = cj->progeny[0];
              t->flags = 0;
              break;

            case 1: /* (  1 ,  1 ,  0 ) */
              t->ci = ci->progeny[6];
              t->cj = cj->progeny[0];
              t->flags = 1;
240
              scheduler_splittask_hydro(
241
                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
242
                                    ci->progeny[7], cj->progeny[1]),
243
                  s);
244
              scheduler_splittask_hydro(
245
                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
246
                                    ci->progeny[6], cj->progeny[1]),
247
                  s);
248
              scheduler_splittask_hydro(
249
                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
250
                                    ci->progeny[7], cj->progeny[0]),
251
252
253
254
255
256
257
258
259
260
261
262
263
                  s);
              break;

            case 2: /* (  1 ,  1 , -1 ) */
              t->ci = ci->progeny[6];
              t->cj = cj->progeny[1];
              t->flags = 2;
              break;

            case 3: /* (  1 ,  0 ,  1 ) */
              t->ci = ci->progeny[5];
              t->cj = cj->progeny[0];
              t->flags = 3;
264
              scheduler_splittask_hydro(
265
                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
266
                                    ci->progeny[7], cj->progeny[2]),
267
                  s);
268
              scheduler_splittask_hydro(
269
                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
270
                                    ci->progeny[5], cj->progeny[2]),
271
                  s);
272
              scheduler_splittask_hydro(
273
                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
274
                                    ci->progeny[7], cj->progeny[0]),
275
276
277
278
279
280
281
                  s);
              break;

            case 4: /* (  1 ,  0 ,  0 ) */
              t->ci = ci->progeny[4];
              t->cj = cj->progeny[0];
              t->flags = 4;
282
              scheduler_splittask_hydro(
283
                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
284
                                    ci->progeny[5], cj->progeny[0]),
285
                  s);
286
              scheduler_splittask_hydro(
287
                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
288
                                    ci->progeny[6], cj->progeny[0]),
289
                  s);
290
              scheduler_splittask_hydro(
291
                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
292
                                    ci->progeny[7], cj->progeny[0]),
293
                  s);
294
              scheduler_splittask_hydro(
295
                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
296
                                    ci->progeny[4], cj->progeny[1]),
297
                  s);
298
              scheduler_splittask_hydro(
299
                  scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
300
                                    ci->progeny[5], cj->progeny[1]),
301
                  s);
302
              scheduler_splittask_hydro(
303
                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
304
                                    ci->progeny[6], cj->progeny[1]),
305
                  s);
306
              scheduler_splittask_hydro(
307
                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
308
                                    ci->progeny[7], cj->progeny[1]),
309
                  s);
310
              scheduler_splittask_hydro(
311
                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
312
                                    ci->progeny[4], cj->progeny[2]),
313
                  s);
314
              scheduler_splittask_hydro(
315
                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
316
                                    ci->progeny[5], cj->progeny[2]),
317
                  s);
318
              scheduler_splittask_hydro(
319
                  scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
320
                                    ci->progeny[6], cj->progeny[2]),
321
                  s);
322
              scheduler_splittask_hydro(
323
                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
324
                                    ci->progeny[7], cj->progeny[2]),
325
                  s);
326
              scheduler_splittask_hydro(
327
                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
328
                                    ci->progeny[4], cj->progeny[3]),
329
                  s);
330
              scheduler_splittask_hydro(
331
                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
332
                                    ci->progeny[5], cj->progeny[3]),
333
                  s);
334
              scheduler_splittask_hydro(
335
                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
336
                                    ci->progeny[6], cj->progeny[3]),
337
                  s);
338
              scheduler_splittask_hydro(
339
                  scheduler_addtask(s, task_type_pair, t->subtype, 4, 0,
340
                                    ci->progeny[7], cj->progeny[3]),
341
342
343
344
345
346
347
                  s);
              break;

            case 5: /* (  1 ,  0 , -1 ) */
              t->ci = ci->progeny[4];
              t->cj = cj->progeny[1];
              t->flags = 5;
348
              scheduler_splittask_hydro(
349
                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
350
                                    ci->progeny[6], cj->progeny[3]),
351
                  s);
352
              scheduler_splittask_hydro(
353
                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
354
                                    ci->progeny[4], cj->progeny[3]),
355
                  s);
356
              scheduler_splittask_hydro(
357
                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
358
                                    ci->progeny[6], cj->progeny[1]),
359
360
361
362
363
364
365
366
367
368
369
370
371
                  s);
              break;

            case 6: /* (  1 , -1 ,  1 ) */
              t->ci = ci->progeny[5];
              t->cj = cj->progeny[2];
              t->flags = 6;
              break;

            case 7: /* (  1 , -1 ,  0 ) */
              t->ci = ci->progeny[4];
              t->cj = cj->progeny[3];
              t->flags = 6;
372
              scheduler_splittask_hydro(
373
                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
374
                                    ci->progeny[5], cj->progeny[2]),
375
                  s);
376
              scheduler_splittask_hydro(
377
                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
378
                                    ci->progeny[4], cj->progeny[2]),
379
                  s);
380
              scheduler_splittask_hydro(
381
                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
382
                                    ci->progeny[5], cj->progeny[3]),
383
384
385
386
387
388
389
390
391
392
393
394
395
                  s);
              break;

            case 8: /* (  1 , -1 , -1 ) */
              t->ci = ci->progeny[4];
              t->cj = cj->progeny[3];
              t->flags = 8;
              break;

            case 9: /* (  0 ,  1 ,  1 ) */
              t->ci = ci->progeny[3];
              t->cj = cj->progeny[0];
              t->flags = 9;
396
              scheduler_splittask_hydro(
397
                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
398
                                    ci->progeny[7], cj->progeny[4]),
399
                  s);
400
              scheduler_splittask_hydro(
401
                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
402
                                    ci->progeny[3], cj->progeny[4]),
403
                  s);
404
              scheduler_splittask_hydro(
405
                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
406
                                    ci->progeny[7], cj->progeny[0]),
407
408
409
410
411
412
413
                  s);
              break;

            case 10: /* (  0 ,  1 ,  0 ) */
              t->ci = ci->progeny[2];
              t->cj = cj->progeny[0];
              t->flags = 10;
414
              scheduler_splittask_hydro(
415
                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
416
                                    ci->progeny[3], cj->progeny[0]),
417
                  s);
418
              scheduler_splittask_hydro(
419
                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
420
                                    ci->progeny[6], cj->progeny[0]),
421
                  s);
422
              scheduler_splittask_hydro(
423
                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
424
                                    ci->progeny[7], cj->progeny[0]),
425
                  s);
426
              scheduler_splittask_hydro(
427
                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
428
                                    ci->progeny[2], cj->progeny[1]),
429
                  s);
430
              scheduler_splittask_hydro(
431
                  scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
432
                                    ci->progeny[3], cj->progeny[1]),
433
                  s);
434
              scheduler_splittask_hydro(
435
                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
436
                                    ci->progeny[6], cj->progeny[1]),
437
                  s);
438
              scheduler_splittask_hydro(
439
                  scheduler_addtask(s, task_type_pair, t->subtype, 7, 0,
440
                                    ci->progeny[7], cj->progeny[1]),
441
                  s);
442
              scheduler_splittask_hydro(
443
                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
444
                                    ci->progeny[2], cj->progeny[4]),
445
                  s);
446
              scheduler_splittask_hydro(
447
                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
448
                                    ci->progeny[3], cj->progeny[4]),
449
                  s);
450
              scheduler_splittask_hydro(
451
                  scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
452
                                    ci->progeny[6], cj->progeny[4]),
453
                  s);
454
              scheduler_splittask_hydro(
455
                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
456
                                    ci->progeny[7], cj->progeny[4]),
457
                  s);
458
              scheduler_splittask_hydro(
459
                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
460
                                    ci->progeny[2], cj->progeny[5]),
461
                  s);
462
              scheduler_splittask_hydro(
463
                  scheduler_addtask(s, task_type_pair, t->subtype, 1, 0,
464
                                    ci->progeny[3], cj->progeny[5]),
465
                  s);
466
              scheduler_splittask_hydro(
467
                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
468
                                    ci->progeny[6], cj->progeny[5]),
469
                  s);
470
              scheduler_splittask_hydro(
471
                  scheduler_addtask(s, task_type_pair, t->subtype, 10, 0,
472
                                    ci->progeny[7], cj->progeny[5]),
473
474
475
476
477
478
479
                  s);
              break;

            case 11: /* (  0 ,  1 , -1 ) */
              t->ci = ci->progeny[2];
              t->cj = cj->progeny[1];
              t->flags = 11;
480
              scheduler_splittask_hydro(
481
                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
482
                                    ci->progeny[6], cj->progeny[5]),
483
                  s);
484
              scheduler_splittask_hydro(
485
                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
486
                                    ci->progeny[2], cj->progeny[5]),
487
                  s);
488
              scheduler_splittask_hydro(
489
                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
490
                                    ci->progeny[6], cj->progeny[1]),
491
492
493
494
495
496
497
                  s);
              break;

            case 12: /* (  0 ,  0 ,  1 ) */
              t->ci = ci->progeny[1];
              t->cj = cj->progeny[0];
              t->flags = 12;
498
              scheduler_splittask_hydro(
499
                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
500
                                    ci->progeny[3], cj->progeny[0]),
501
                  s);
502
              scheduler_splittask_hydro(
503
                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
504
                                    ci->progeny[5], cj->progeny[0]),
505
                  s);
506
              scheduler_splittask_hydro(
507
                  scheduler_addtask(s, task_type_pair, t->subtype, 2, 0,
508
                                    ci->progeny[7], cj->progeny[0]),
509
                  s);
510
              scheduler_splittask_hydro(
511
                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
512
                                    ci->progeny[1], cj->progeny[2]),
513
                  s);
514
              scheduler_splittask_hydro(
515
                  scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
516
                                    ci->progeny[3], cj->progeny[2]),
517
                  s);
518
              scheduler_splittask_hydro(
519
                  scheduler_addtask(s, task_type_pair, t->subtype, 8, 0,
520
                                    ci->progeny[5], cj->progeny[2]),
521
                  s);
522
              scheduler_splittask_hydro(
523
                  scheduler_addtask(s, task_type_pair, t->subtype, 5, 0,
524
                                    ci->progeny[7], cj->progeny[2]),
525
                  s);
526
              scheduler_splittask_hydro(
527
                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
528
                                    ci->progeny[1], cj->progeny[4]),
529
                  s);
530
              scheduler_splittask_hydro(
531
                  scheduler_addtask(s, task_type_pair, t->subtype, 6, 0,
532
                                    ci->progeny[3], cj->progeny[4]),
533
                  s);
534
              scheduler_splittask_hydro(
535
                  scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
536
                                    ci->progeny[5], cj->progeny[4]),
537
                  s);
538
              scheduler_splittask_hydro(
539
                  scheduler_addtask(s, task_type_pair, t->subtype, 11, 0,
540
                                    ci->progeny[7], cj->progeny[4]),
541
                  s);
542
              scheduler_splittask_hydro(
543
                  scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
544
                                    ci->progeny[1], cj->progeny[6]),
545
                  s);
546
              scheduler_splittask_hydro(
547
                  scheduler_addtask(s, task_type_pair, t->subtype, 3, 0,
548
                                    ci->progeny[3], cj->progeny[6]),
549
                  s);
550
              scheduler_splittask_hydro(
551
                  scheduler_addtask(s, task_type_pair, t->subtype, 9, 0,
552
                                    ci->progeny[5], cj->progeny[6]),
553
                  s);
554
              scheduler_splittask_hydro(
555
                  scheduler_addtask(s, task_type_pair, t->subtype, 12, 0,
556
                                    ci->progeny[7], cj->progeny[6]),
557
558
559
                  s);
              break;
          } /* switch(sid) */
560
561
        }

562
563
564
565
566
567
568
569
570
571
572
        /* Otherwise, break it up if it is too large? */
      } else if (scheduler_doforcesplit && ci->split && cj->split &&
                 (ci->count > space_maxsize / cj->count)) {

        // message( "force splitting pair with %i and %i parts." , ci->count ,
        // cj->count );

        /* Replace the current task. */
        t->type = task_type_none;

        for (int j = 0; j < 8; j++)
573
          if (ci->progeny[j] != NULL && ci->progeny[j]->count)
574
            for (int k = 0; k < 8; k++)
575
              if (cj->progeny[k] != NULL && cj->progeny[k]->count) {
576
577
                struct task *tl =
                    scheduler_addtask(s, task_type_pair, t->subtype, 0, 0,
578
                                      ci->progeny[j], cj->progeny[k]);
579
                scheduler_splittask_hydro(tl, s);
580
581
582
583
584
585
586
                tl->flags = space_getsid(s->space, &t->ci, &t->cj, shift);
              }
      }
    } /* pair interaction? */
  }   /* iterate over the current task. */
}

587
588
589
590
591
592
/**
 * @brief Split a gravity task if too large.
 *
 * @param t The #task
 * @param s The #scheduler we are working in.
 */
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) {

  /* Iterate on this task until we're done with it. */
  int redo = 1;
  while (redo) {

    /* Reset the redo flag. */
    redo = 0;

    /* Non-splittable task? */
    if ((t->ci == NULL) || (t->type == task_type_pair && t->cj == NULL)) {
      t->type = task_type_none;
      t->subtype = task_subtype_none;
      t->cj = NULL;
      t->skip = 1;
      break;
    }

    /* Self-interaction? */
    if (t->type == task_type_self) {

      /* Get a handle on the cell involved. */
      struct cell *ci = t->ci;

      /* Foreign task? */
      if (ci->nodeID != s->nodeID) {
        t->skip = 1;
        break;
      }

623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
      /* Should we split this task? */
      if (ci->split && ci->gcount > space_subsize_self_grav) {

        /* Take a step back (we're going to recycle the current task)... */
        redo = 1;

        /* Add the self tasks. */
        int first_child = 0;
        while (ci->progeny[first_child] == NULL) first_child++;
        t->ci = ci->progeny[first_child];
        for (int k = first_child + 1; k < 8; k++)
          if (ci->progeny[k] != NULL)
            scheduler_splittask_gravity(
                scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
                                  ci->progeny[k], NULL),
                s);

        /* Make a task for each pair of progeny */
        if (t->subtype != task_subtype_external_grav) {
          for (int j = 0; j < 8; j++)
            if (ci->progeny[j] != NULL)
              for (int k = j + 1; k < 8; k++)
                if (ci->progeny[k] != NULL)
                  scheduler_splittask_gravity(
                      scheduler_addtask(s, task_type_pair, t->subtype,
                                        sub_sid_flag[j][k], 0, ci->progeny[j],
                                        ci->progeny[k]),
                      s);
651
        }
652
      } /* Cell is split */
653
654
655
656
657

    } /* Self interaction */

    /* Pair interaction? */
    else if (t->type == task_type_pair) {
658
659
660

      /* Get a handle on the cells involved. */
      struct cell *ci = t->ci;
661
662
663
664
665
666
667
668
      struct cell *cj = t->cj;

      /* Foreign task? */
      if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) {
        t->skip = 1;
        break;
      }
    } /* pair interaction? */
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
  }   /* iterate over the current task. */
}

/**
 * @brief Mapper function to split tasks that may be too large.
 *
 * @param map_data the tasks to process
 * @param num_elements the number of tasks.
 * @param extra_data The #scheduler we are working in.
 */
void scheduler_splittasks_mapper(void *map_data, int num_elements,
                                 void *extra_data) {

  /* Extract the parameters. */
  struct scheduler *s = (struct scheduler *)extra_data;
  struct task *tasks = (struct task *)map_data;
685

686
687
  for (int ind = 0; ind < num_elements; ind++) {
    struct task *t = &tasks[ind];
688
689
690
691
692
693
694
695
696
697

    /* Invoke the correct splitting strategy */
    if (t->subtype == task_subtype_density) {
      scheduler_splittask_hydro(t, s);
    } else if (t->subtype == task_subtype_external_grav) {
      scheduler_splittask_gravity(t, s);
    } else if (t->subtype == task_subtype_grav) {
      scheduler_splittask_gravity(t, s);
    } else if (t->type == task_type_grav_top_level ||
               t->type == task_type_grav_ghost) {
698
      /* For future use */
699
700
701
    } else {
      error("Unexpected task sub-type");
    }
702
  }
703
}
704

Matthieu Schaller's avatar
Matthieu Schaller committed
705
706
707
708
709
/**
 * @brief Splits all the tasks in the scheduler that are too large.
 *
 * @param s The #scheduler.
 */
710
void scheduler_splittasks(struct scheduler *s) {
711

712
713
  /* Call the mapper on each current task. */
  threadpool_map(s->threadpool, scheduler_splittasks_mapper, s->tasks,
714
                 s->nr_tasks, sizeof(struct task), 0, s);
715
716
}

717
718
719
720
721
722
723
/**
 * @brief Add a #task to the #scheduler.
 *
 * @param s The #scheduler we are working in.
 * @param type The type of the task.
 * @param subtype The sub-type of the task.
 * @param flags The flags of the task.
724
725
 * @param implicit If true, only use this task to unlock dependencies, i.e.
 *        this task is never enqueued.
726
727
728
 * @param ci The first cell to interact.
 * @param cj The second cell to interact.
 */
729
struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
730
731
                               enum task_subtypes subtype, int flags,
                               int implicit, struct cell *ci, struct cell *cj) {
732

733
734
735
736
737
738
#ifdef SWIFT_DEBUG_CHECKS
  if (ci == NULL && cj != NULL)
    error("Added a task with ci==NULL and cj!=NULL type=%s/%s",
          taskID_names[type], subtaskID_names[subtype]);
#endif

739
  /* Get the next free task. */
Pedro Gonnet's avatar
Pedro Gonnet committed
740
  const int ind = atomic_inc(&s->tasks_next);
Matthieu Schaller's avatar
Matthieu Schaller committed
741

742
  /* Overflow? */
743
744
745
746
747
  if (ind >= s->size)
    error(
        "Task list overflow (%d). Need to increase "
        "Scheduler:tasks_per_cell.",
        ind);
748
749

  /* Get a pointer to the new task. */
Pedro Gonnet's avatar
Pedro Gonnet committed
750
  struct task *t = &s->tasks[ind];
751
752
753
754
755

  /* Copy the data. */
  t->type = type;
  t->subtype = subtype;
  t->flags = flags;
756
  t->wait = 0;
757
758
  t->ci = ci;
  t->cj = cj;
759
  t->skip = 1; /* Mark tasks as skip by default. */
760
  t->implicit = implicit;
761
762
763
  t->weight = 0;
  t->rank = 0;
  t->nr_unlock_tasks = 0;
764
#ifdef SWIFT_DEBUG_TASKS
765
  t->rid = -1;
766
767
  t->tic = 0;
  t->toc = 0;
768
#endif
769
770
771
772
773
774
775
776
777

  /* Add an index for it. */
  // lock_lock( &s->lock );
  s->tasks_ind[atomic_inc(&s->nr_tasks)] = ind;
  // lock_unlock_blind( &s->lock );

  /* Return a pointer to the new task. */
  return t;
}
778

779
780
781
782
783
784
785
786
/**
 * @brief Set the unlock pointers in each task.
 *
 * @param s The #scheduler.
 */
void scheduler_set_unlocks(struct scheduler *s) {

  /* Store the counts for each task. */
787
788
  short int *counts;
  if ((counts = (short int *)malloc(sizeof(short int) * s->nr_tasks)) == NULL)
789
    error("Failed to allocate temporary counts array.");
790
791
792
793
794
795
796
797
  bzero(counts, sizeof(short int) * s->nr_tasks);
  for (int k = 0; k < s->nr_unlocks; k++) {
    counts[s->unlock_ind[k]] += 1;

#ifdef SWIFT_DEBUG_CHECKS
    /* Check that we are not overflowing */
    if (counts[s->unlock_ind[k]] < 0)
      error("Task unlocking more than %d other tasks!",
798
799
            (1 << (8 * sizeof(short int) - 1)) - 1);

800
801
#endif
  }
802
803
804
805
806
807

  /* Compute the offset for each unlock block. */
  int *offsets;
  if ((offsets = (int *)malloc(sizeof(int) * (s->nr_tasks + 1))) == NULL)
    error("Failed to allocate temporary offsets array.");
  offsets[0] = 0;
808
809
810
811
812
813
814
815
  for (int k = 0; k < s->nr_tasks; k++) {
    offsets[k + 1] = offsets[k] + counts[k];

#ifdef SWIFT_DEBUG_CHECKS
    /* Check that we are not overflowing */
    if (offsets[k + 1] < 0) error("Task unlock offset array overflowing");
#endif
  }
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842

  /* Create and fill a temporary array with the sorted unlocks. */
  struct task **unlocks;
  if ((unlocks = (struct task **)malloc(sizeof(struct task *) *
                                        s->size_unlocks)) == NULL)
    error("Failed to allocate temporary unlocks array.");
  for (int k = 0; k < s->nr_unlocks; k++) {
    const int ind = s->unlock_ind[k];
    unlocks[offsets[ind]] = s->unlocks[k];
    offsets[ind] += 1;
  }

  /* Swap the unlocks. */
  free(s->unlocks);
  s->unlocks = unlocks;

  /* Re-set the offsets. */
  offsets[0] = 0;
  for (int k = 1; k < s->nr_tasks; k++)
    offsets[k] = offsets[k - 1] + counts[k - 1];

  /* Set the unlocks in the tasks. */
  for (int k = 0; k < s->nr_tasks; k++) {
    struct task *t = &s->tasks[k];
    t->nr_unlock_tasks = counts[k];
    t->unlock_tasks = &s->unlocks[offsets[k]];
  }
843

844
#ifdef SWIFT_DEBUG_CHECKS
845
  /* Verify that there are no duplicate unlocks. */
846
  for (int k = 0