diff --git a/src/cell.c b/src/cell.c index e2767cdaa9e1189ec87b5ef51cc578c91f8cfe4c..db39d9d45d8cbe3f76a3dbc105ab23d4e8954e08 100644 --- a/src/cell.c +++ b/src/cell.c @@ -468,7 +468,8 @@ void cell_gunlocktree(struct cell *c) { * @param buff A buffer with at least max(c->count, c->gcount) entries, * used for sorting indices. */ -void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { +void cell_split(struct cell *c, ptrdiff_t parts_offset, struct cell_buff *buff, + struct cell_buff *gbuff) { const int count = c->count, gcount = c->gcount; struct part *parts = c->parts; @@ -480,18 +481,22 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { int bucket_count[8] = {0, 0, 0, 0, 0, 0, 0, 0}; int bucket_offset[9]; - /* If the buff is NULL, allocate it, and remember to free it. */ - const int allocate_buffer = (buff == NULL); - if (allocate_buffer && - (buff = (int *)malloc(sizeof(int) * max(count, gcount))) == NULL) - error("Failed to allocate temporary indices."); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that the buffs are OK. */ + for (int k = 0; k < count; k++) { + if (buff[k].x[0] != (float)parts[k].x[0] || + buff[k].x[1] != (float)parts[k].x[1] || + buff[k].x[2] != (float)parts[k].x[2]) + error("Inconsistent buff contents."); + } +#endif /* SWIFT_DEBUG_CHECKS */ /* Fill the buffer with the indices. */ for (int k = 0; k < count; k++) { - const int bid = (parts[k].x[0] > pivot[0]) * 4 + - (parts[k].x[1] > pivot[1]) * 2 + (parts[k].x[2] > pivot[2]); + const int bid = (buff[k].x[0] > pivot[0]) * 4 + + (buff[k].x[1] > pivot[1]) * 2 + (buff[k].x[2] > pivot[2]); bucket_count[bid]++; - buff[k] = bid; + buff[k].ind = bid; } /* Set the buffer offsets. */ @@ -505,23 +510,25 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { for (int bucket = 0; bucket < 8; bucket++) { for (int k = bucket_offset[bucket] + bucket_count[bucket]; k < bucket_offset[bucket + 1]; k++) { - int bid = buff[k]; + int bid = buff[k].ind; if (bid != bucket) { struct part part = parts[k]; struct xpart xpart = xparts[k]; + struct cell_buff temp_buff = buff[k]; while (bid != bucket) { int j = bucket_offset[bid] + bucket_count[bid]++; - while (buff[j] == bid) { + while (buff[j].ind == bid) { j++; bucket_count[bid]++; } memswap(&parts[j], &part, sizeof(struct part)); memswap(&xparts[j], &xpart, sizeof(struct xpart)); - memswap(&buff[j], &bid, sizeof(int)); + memswap(&buff[j], &temp_buff, sizeof(struct cell_buff)); + bid = temp_buff.ind; } parts[k] = part; xparts[k] = xpart; - buff[k] = bid; + buff[k] = temp_buff; } bucket_count[bid]++; } @@ -538,6 +545,14 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { if (count > 0 && gcount > 0) part_relink_gparts(parts, count, parts_offset); #ifdef SWIFT_DEBUG_CHECKS + /* Check that the buffs are OK. */ + for (int k = 0; k < count; k++) { + if (buff[k].x[0] != (float)parts[k].x[0] || + buff[k].x[1] != (float)parts[k].x[1] || + buff[k].x[2] != (float)parts[k].x[2]) + error("Inconsistent buff contents (k=%i).", k); + } + /* Verify that _all_ the parts have been assigned to a cell. */ for (int k = 1; k < 8; k++) if (&c->progeny[k - 1]->parts[c->progeny[k - 1]->count] != @@ -564,6 +579,31 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { c->progeny[2]->parts[k].x[1] <= pivot[1] || c->progeny[2]->parts[k].x[2] > pivot[2]) error("Sorting failed (progeny=2)."); + for (int k = 0; k < c->progeny[3]->count; k++) + if (c->progeny[3]->parts[k].x[0] > pivot[0] || + c->progeny[3]->parts[k].x[1] <= pivot[1] || + c->progeny[3]->parts[k].x[2] <= pivot[2]) + error("Sorting failed (progeny=3)."); + for (int k = 0; k < c->progeny[4]->count; k++) + if (c->progeny[4]->parts[k].x[0] <= pivot[0] || + c->progeny[4]->parts[k].x[1] > pivot[1] || + c->progeny[4]->parts[k].x[2] > pivot[2]) + error("Sorting failed (progeny=4)."); + for (int k = 0; k < c->progeny[5]->count; k++) + if (c->progeny[5]->parts[k].x[0] <= pivot[0] || + c->progeny[5]->parts[k].x[1] > pivot[1] || + c->progeny[5]->parts[k].x[2] <= pivot[2]) + error("Sorting failed (progeny=5)."); + for (int k = 0; k < c->progeny[6]->count; k++) + if (c->progeny[6]->parts[k].x[0] <= pivot[0] || + c->progeny[6]->parts[k].x[1] <= pivot[1] || + c->progeny[6]->parts[k].x[2] > pivot[2]) + error("Sorting failed (progeny=6)."); + for (int k = 0; k < c->progeny[7]->count; k++) + if (c->progeny[7]->parts[k].x[0] <= pivot[0] || + c->progeny[7]->parts[k].x[1] <= pivot[1] || + c->progeny[7]->parts[k].x[2] <= pivot[2]) + error("Sorting failed (progeny=7)."); #endif /* Now do the same song and dance for the gparts. */ @@ -571,11 +611,10 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { /* Fill the buffer with the indices. */ for (int k = 0; k < gcount; k++) { - const int bid = (gparts[k].x[0] > pivot[0]) * 4 + - (gparts[k].x[1] > pivot[1]) * 2 + - (gparts[k].x[2] > pivot[2]); + const int bid = (gbuff[k].x[0] > pivot[0]) * 4 + + (gbuff[k].x[1] > pivot[1]) * 2 + (gbuff[k].x[2] > pivot[2]); bucket_count[bid]++; - buff[k] = bid; + gbuff[k].ind = bid; } /* Set the buffer offsets. */ @@ -589,20 +628,22 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff) { for (int bucket = 0; bucket < 8; bucket++) { for (int k = bucket_offset[bucket] + bucket_count[bucket]; k < bucket_offset[bucket + 1]; k++) { - int bid = buff[k]; + int bid = gbuff[k].ind; if (bid != bucket) { struct gpart gpart = gparts[k]; + struct cell_buff temp_buff = gbuff[k]; while (bid != bucket) { int j = bucket_offset[bid] + bucket_count[bid]++; - while (buff[j] == bid) { + while (gbuff[j].ind == bid) { j++; bucket_count[bid]++; } memswap(&gparts[j], &gpart, sizeof(struct gpart)); - memswap(&buff[j], &bid, sizeof(int)); + memswap(&gbuff[j], &temp_buff, sizeof(struct cell_buff)); + bid = temp_buff.ind; } gparts[k] = gpart; - buff[k] = bid; + gbuff[k] = temp_buff; } bucket_count[bid]++; } diff --git a/src/cell.h b/src/cell.h index 2cd13cf2ab6b934f6aab84bcbacf510270892866..b223c839a03dac12829e853bad47deda1eb3d1f7 100644 --- a/src/cell.h +++ b/src/cell.h @@ -52,6 +52,12 @@ struct scheduler; /* Global variables. */ extern int cell_next_tag; +/* Struct to temporarily buffer the particle locations and bin id. */ +struct cell_buff { + double x[3]; + int ind; +}; + /* Mini struct to link cells to tasks. Used as a linked list. */ struct link { @@ -272,7 +278,8 @@ struct cell { ((int)(k) + (cdim)[2] * ((int)(j) + (cdim)[1] * (int)(i))) /* Function prototypes. */ -void cell_split(struct cell *c, ptrdiff_t parts_offset, int *buff); +void cell_split(struct cell *c, ptrdiff_t parts_offset, struct cell_buff *buff, + struct cell_buff *gbuff); void cell_sanitize(struct cell *c); int cell_locktree(struct cell *c); void cell_unlocktree(struct cell *c); diff --git a/src/debug.c b/src/debug.c index 48572df7f046944613d2598b0d340e949ad3ab7e..21f539b62eef3b0b36f52520486f1e725abc0cda 100644 --- a/src/debug.c +++ b/src/debug.c @@ -227,21 +227,21 @@ int checkCellhdxmax(const struct cell *c, int *depth) { if (c->h_max != h_max) { message("%d Inconsistent h_max: cell %f != parts %f", *depth, c->h_max, h_max); - message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); + error("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); result = 0; } if (c->dx_max != dx_max) { message("%d Inconsistent dx_max: %f != %f", *depth, c->dx_max, dx_max); - message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); + error("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); result = 0; } /* Check rebuild criterion. */ - if (h_max > c->dmin) { + /* if (h_max > c->dmin) { message("%d Inconsistent c->dmin: %f > %f", *depth, h_max, c->dmin); - message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); + error("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); result = 0; - } + } */ return result; } diff --git a/src/space.c b/src/space.c index 6e6a0768ff6a3a2982fd23edd84d61ac9afd5515..28f7cac0b425ca26e036332221388ccca76583da 100644 --- a/src/space.c +++ b/src/space.c @@ -1456,9 +1456,12 @@ void space_map_cells_pre(struct space *s, int full, * @param s The #space in which the cell lives. * @param c The #cell to split recursively. * @param buff A buffer for particle sorting, should be of size at least - * max(c->count, c->gount) or @c NULL. + * c->count or @c NULL. + * @param gbuff A buffer for particle sorting, should be of size at least + * c->gcount or @c NULL. */ -void space_split_recursive(struct space *s, struct cell *c, int *buff) { +void space_split_recursive(struct space *s, struct cell *c, + struct cell_buff *buff, struct cell_buff *gbuff) { const int count = c->count; const int gcount = c->gcount; @@ -1473,10 +1476,29 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) { struct engine *e = s->e; /* If the buff is NULL, allocate it, and remember to free it. */ - const int allocate_buffer = (buff == NULL); - if (allocate_buffer && - (buff = (int *)malloc(sizeof(int) * max(count, gcount))) == NULL) - error("Failed to allocate temporary indices."); + const int allocate_buffer = (buff == NULL && gbuff == NULL); + if (allocate_buffer) { + if (count > 0) { + if ((buff = (struct cell_buff *)malloc(sizeof(struct cell_buff) * + count)) == NULL) + error("Failed to allocate temporary indices."); + for (int k = 0; k < count; k++) { + buff[k].x[0] = parts[k].x[0]; + buff[k].x[1] = parts[k].x[1]; + buff[k].x[2] = parts[k].x[2]; + } + } + if (gcount > 0) { + if ((gbuff = (struct cell_buff *)malloc(sizeof(struct cell_buff) * + gcount)) == NULL) + error("Failed to allocate temporary indices."); + for (int k = 0; k < gcount; k++) { + gbuff[k].x[0] = gparts[k].x[0]; + gbuff[k].x[1] = gparts[k].x[1]; + gbuff[k].x[2] = gparts[k].x[2]; + } + } + } /* Check the depth. */ while (depth > (maxdepth = s->maxdepth)) { @@ -1522,15 +1544,18 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) { } /* Split the cell data. */ - cell_split(c, c->parts - s->parts, buff); + cell_split(c, c->parts - s->parts, buff, gbuff); /* Remove any progeny with zero parts. */ + struct cell_buff *progeny_buff = buff, *progeny_gbuff = gbuff; for (int k = 0; k < 8; k++) if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0) { space_recycle(s, c->progeny[k]); c->progeny[k] = NULL; } else { - space_split_recursive(s, c->progeny[k], buff); + space_split_recursive(s, c->progeny[k], progeny_buff, progeny_gbuff); + progeny_buff += c->progeny[k]->count; + progeny_gbuff += c->progeny[k]->gcount; h_max = max(h_max, c->progeny[k]->h_max); ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); @@ -1589,7 +1614,10 @@ void space_split_recursive(struct space *s, struct cell *c, int *buff) { c->owner = 0; /* Ok, there is really nothing on this rank... */ /* Clean up. */ - if (allocate_buffer) free(buff); + if (allocate_buffer) { + if (buff != NULL) free(buff); + if (gbuff != NULL) free(gbuff); + } } /** @@ -1608,7 +1636,7 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { for (int ind = 0; ind < num_cells; ind++) { struct cell *c = &cells_top[ind]; - space_split_recursive(s, c, NULL); + space_split_recursive(s, c, NULL, NULL); } #ifdef SWIFT_DEBUG_CHECKS @@ -1616,7 +1644,7 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { for (int ind = 0; ind < num_cells; ind++) { int depth = 0; if (!checkCellhdxmax(&cells_top[ind], &depth)) - message(" at cell depth %d", depth); + error(" at cell depth %d", depth); } #endif } diff --git a/src/threadpool.c b/src/threadpool.c index 35e5f2139de0689d9761d0d8f19030a076329cba..c11fd8121bb02f36fce1796d79a7eb55a38102c4 100644 --- a/src/threadpool.c +++ b/src/threadpool.c @@ -90,7 +90,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) { /* Initialize the thread counters. */ tp->num_threads = num_threads; tp->num_threads_waiting = 0; - + /* If there is only a single thread, do nothing more as of here as we will just do work in the (blocked) calling thread. */ if (num_threads == 1) return; @@ -147,7 +147,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) { void threadpool_map(struct threadpool *tp, threadpool_map_function map_function, void *map_data, size_t N, int stride, int chunk, void *extra_data) { - + /* If we just have a single thread, call the map function directly. */ if (tp->num_threads == 1) { map_function(map_data, N, extra_data);