diff --git a/examples/main.c b/examples/main.c index 7dcfee85478235e48351f7b8f9cf973cf5f0fda4..c2d3e3b8b926b72b0e24519da87e4139657b0bfb 100644 --- a/examples/main.c +++ b/examples/main.c @@ -335,6 +335,8 @@ int main(int argc, char *argv[]) { char ICfileName[200] = ""; parser_get_param_string(params, "InitialConditions:file_name", ICfileName); if (myrank == 0) message("Reading ICs from file '%s'", ICfileName); + fflush(stdout); + struct part *parts = NULL; struct gpart *gparts = NULL; size_t Ngas = 0, Ngpart = 0; diff --git a/examples/plot_scaling_results.py b/examples/plot_scaling_results.py index 66365bfad2edb38efa4b90c0c1602fd38bd750fd..cf9c96d41fe8f0897acb6be1f9af2269abee6174 100755 --- a/examples/plot_scaling_results.py +++ b/examples/plot_scaling_results.py @@ -17,6 +17,26 @@ import re import numpy as np import matplotlib.pyplot as plt +params = {'axes.labelsize': 14, +'axes.titlesize': 18, +'font.size': 12, +'legend.fontsize': 12, +'xtick.labelsize': 14, +'ytick.labelsize': 14, +'text.usetex': True, +'figure.subplot.left' : 0.055, +'figure.subplot.right' : 0.98 , +'figure.subplot.bottom' : 0.05 , +'figure.subplot.top' : 0.95 , +'figure.subplot.wspace' : 0.14 , +'figure.subplot.hspace' : 0.12 , +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +plt.rcParams.update(params) +plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + version = [] branch = [] revision = [] @@ -25,7 +45,10 @@ hydro_kernel = [] hydro_neighbours = [] hydro_eta = [] threadList = [] -linestyle = ('ro-','bo-','go-','yo-','mo-') +hexcols = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77', + '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466', + '#4477AA'] +linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6]) #cmdLine = './swift_fixdt -s -t 16 cosmoVolume.yml' #platform = 'KNL' @@ -106,8 +129,8 @@ def parse_files(): parse_header(file_list[0]) version.append(branch[i] + " " + revision[i] + "\n" + hydro_scheme[i] + - "\n" + hydro_kernel[i] + r", $N_{neigh}$=" + hydro_neighbours[i] + - r", $\eta$=" + hydro_eta[i] + "\n") + "\n" + hydro_kernel[i] + r", $N_{ngb}$=" + hydro_neighbours[i] + + r", $\eta$=" + hydro_eta[i]) times.append([]) totalTime.append([]) speedUp.append([]) @@ -116,7 +139,7 @@ def parse_files(): # Loop over all files for a given series and load the times for j in range(0,len(file_list)): times[i].append([]) - times[i][j].append(np.loadtxt(file_list[j],usecols=(5,))) + times[i][j].append(np.loadtxt(file_list[j],usecols=(5,), skiprows=11)) totalTime[i].append(np.sum(times[i][j])) serialTime.append(totalTime[i][0]) @@ -153,47 +176,59 @@ def print_results(times,totalTime,parallelEff,version): def plot_results(times,totalTime,speedUp,parallelEff): - fig, axarr = plt.subplots(2, 2,figsize=(15,15)) + fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=False) speedUpPlot = axarr[0, 0] parallelEffPlot = axarr[0, 1] totalTimePlot = axarr[1, 0] emptyPlot = axarr[1, 1] # Plot speed up + speedUpPlot.plot(threadList[0],threadList[0], linestyle='--', lw=1.5, color='0.2') for i in range(0,numOfSeries): speedUpPlot.plot(threadList[i],speedUp[i],linestyle[i],label=version[i]) - speedUpPlot.plot(threadList[i],threadList[i],color='k',linestyle='--') - speedUpPlot.set_ylabel("Speed Up") - speedUpPlot.set_xlabel("No. of Threads") + speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.) + speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) + speedUpPlot.set_xlim([0.7,threadList[i][-1]+1]) + speedUpPlot.set_ylim([0.7,threadList[i][-1]+1]) # Plot parallel efficiency + parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [1,1], 'k--', lw=1.5, color='0.2') + parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.9,0.9], 'k--', lw=1.5, color='0.2') + parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.75,0.75], 'k--', lw=1.5, color='0.2') + parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.5,0.5], 'k--', lw=1.5, color='0.2') for i in range(0,numOfSeries): parallelEffPlot.plot(threadList[i],parallelEff[i],linestyle[i]) - + parallelEffPlot.set_xscale('log') - parallelEffPlot.set_ylabel("Parallel Efficiency") - parallelEffPlot.set_xlabel("No. of Threads") + parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.) + parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) parallelEffPlot.set_ylim([0,1.1]) + parallelEffPlot.set_xlim([0.9,10**(np.floor(np.log10(threadList[i][-1]))+0.5)]) # Plot time to solution for i in range(0,numOfSeries): + pts = [1, 10**np.floor(np.log10(threadList[i][-1])+1)] + totalTimePlot.loglog(pts,totalTime[i][0]/pts, 'k--', lw=1., color='0.2') totalTimePlot.loglog(threadList[i],totalTime[i],linestyle[i],label=version[i]) totalTimePlot.set_xscale('log') - totalTimePlot.set_xlabel("No. of Threads") - totalTimePlot.set_ylabel("Time to Solution (ms)") + totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.) + totalTimePlot.set_ylabel("${\\rm Time~to~solution}~[{\\rm ms}]$", labelpad=0.) + totalTimePlot.set_xlim([0.9, 10**(np.floor(np.log10(threadList[i][-1]))+0.5)]) + totalTimePlot.set_ylim([10**np.floor(np.log10(np.min(totalTime)*0.6)), 1.2*10**np.floor(np.log10(np.max(totalTime) * 1.5)+1)]) - totalTimePlot.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,prop={'size':14}) + totalTimePlot.legend(bbox_to_anchor=(1.14, 1), loc=2, borderaxespad=0.,prop={'size':12}) emptyPlot.axis('off') for i, txt in enumerate(threadList[0]): - speedUpPlot.annotate(txt, (threadList[0][i],speedUp[0][i])) - parallelEffPlot.annotate(txt, (threadList[0][i],parallelEff[0][i])) - totalTimePlot.annotate(txt, (threadList[0][i],totalTime[0][i])) + if 2**np.floor(np.log2(threadList[0][i])) == threadList[0][i]: # only powers of 2 + speedUpPlot.annotate("$%s$"%txt, (threadList[0][i],speedUp[0][i]), (threadList[0][i],speedUp[0][i] + 0.3), color=hexcols[0]) + parallelEffPlot.annotate("$%s$"%txt, (threadList[0][i],parallelEff[0][i]), (threadList[0][i], parallelEff[0][i]+0.02), color=hexcols[0]) + totalTimePlot.annotate("$%s$"%txt, (threadList[0][i],totalTime[0][i]), (threadList[0][i], totalTime[0][i]*1.1), color=hexcols[0]) #fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(len(times[0][0][0]),cmdLine,platform)) - fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps".format(len(times[0][0][0]))) + fig.suptitle("${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~for}~%d~{\\rm time\\textendash steps}$"%len(times[0][0][0]), fontsize=16) return @@ -204,4 +239,5 @@ plot_results(times,totalTime,speedUp,parallelEff) print_results(times,totalTime,parallelEff,version) +# And plot plt.show() diff --git a/src/Makefile.am b/src/Makefile.am index ace9b6ff553b25d305060ba2e40affd7bdb277e9..430514528dcc919956b161d0e3879a1db1d8693c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -70,8 +70,8 @@ nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_h hydro/Gadget2/hydro_debug.h hydro/Gadget2/hydro_part.h \ hydro/Gizmo/hydro.h hydro/Gizmo/hydro_iact.h hydro/Gizmo/hydro_io.h \ hydro/Gizmo/hydro_debug.h hydro/Gizmo/hydro_part.h \ - riemann.h \ - riemann/riemann_hllc.h riemann/riemann_trrs.h riemann/riemann_exact.h + riemann.h riemann/riemann_hllc.h riemann/riemann_trrs.h \ + riemann/riemann_exact.h riemann/riemann_vacuum.h # Sources and flags for regular library libswiftsim_la_SOURCES = $(AM_SOURCES) diff --git a/src/cell.h b/src/cell.h index a2f7ab6912a6d5a9a788658e48ae160e4be049dc..9f3971dd59c943ebd49a9ac1c812b612ab68bdaf 100644 --- a/src/cell.h +++ b/src/cell.h @@ -128,9 +128,14 @@ struct cell { /* Parent cell. */ struct cell *parent; - /* Super cell, i.e. the highest-level supercell that has interactions. */ + /* Super cell, i.e. the highest-level supercell that has hydro interactions. + */ struct cell *super; + /* Super cell, i.e. the highest-level supercell that has gravity interactions. + */ + struct cell *gsuper; + /* The task computing this cell's sorts. */ struct task *sorts; int sortsize; diff --git a/src/engine.c b/src/engine.c index 2496b3ca85e997a35f8392ac106e60faf9e3766b..64d08d64f952efc4997b62ed8b346d5a8552a259 100644 --- a/src/engine.c +++ b/src/engine.c @@ -118,10 +118,10 @@ void engine_addlink(struct engine *e, struct link **l, struct task *t) { * * @param e The #engine. * @param c The #cell. - * @param super The super #cell. + * @param gsuper The gsuper #cell. */ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, - struct cell *super) { + struct cell *gsuper) { struct scheduler *s = &e->sched; const int is_with_external_gravity = @@ -130,10 +130,10 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt; /* Is this the super-cell? */ - if (super == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) { + if (gsuper == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) { /* This is the super cell, i.e. the first with gravity tasks attached. */ - super = c; + gsuper = c; /* Local tasks only... */ if (c->nodeID == e->nodeID) { @@ -161,13 +161,13 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, } /* Set the super-cell. */ - c->super = super; + c->gsuper = gsuper; /* Recurse. */ if (c->split) for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) - engine_make_gravity_hierarchical_tasks(e, c->progeny[k], super); + engine_make_gravity_hierarchical_tasks(e, c->progeny[k], gsuper); } /** @@ -1424,11 +1424,11 @@ static inline void engine_make_gravity_dependencies(struct scheduler *sched, struct cell *c) { /* init --> gravity --> kick */ - scheduler_addunlock(sched, c->super->init, gravity); - scheduler_addunlock(sched, gravity, c->super->kick); + scheduler_addunlock(sched, c->gsuper->init, gravity); + scheduler_addunlock(sched, gravity, c->gsuper->kick); /* grav_up --> gravity ( --> kick) */ - scheduler_addunlock(sched, c->super->grav_up, gravity); + scheduler_addunlock(sched, c->gsuper->grav_up, gravity); } /** @@ -1470,10 +1470,10 @@ void engine_link_gravity_tasks(struct engine *e) { /* Gather the multipoles --> mm interaction --> kick */ scheduler_addunlock(sched, gather, t); - scheduler_addunlock(sched, t, t->ci->super->kick); + scheduler_addunlock(sched, t, t->ci->gsuper->kick); /* init --> mm interaction */ - scheduler_addunlock(sched, t->ci->super->init, t); + scheduler_addunlock(sched, t->ci->gsuper->init, t); } /* Self-interaction? */ @@ -1491,7 +1491,7 @@ void engine_link_gravity_tasks(struct engine *e) { engine_make_gravity_dependencies(sched, t, t->ci); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) { engine_make_gravity_dependencies(sched, t, t->cj); } @@ -1513,7 +1513,7 @@ void engine_link_gravity_tasks(struct engine *e) { engine_make_gravity_dependencies(sched, t, t->ci); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + if (t->cj->nodeID == nodeID && t->ci->gsuper != t->cj->gsuper) { engine_make_gravity_dependencies(sched, t, t->cj); } diff --git a/src/scheduler.c b/src/scheduler.c index ef4d19107fb48684ca299f286436a155a6fe0151..f121f21d579611b76f2b4438bae560863dae727d 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -1104,10 +1104,20 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_pair: case task_type_sub_pair: - qid = t->ci->super->owner; - if (qid < 0 || - s->queues[qid].count > s->queues[t->cj->super->owner].count) - qid = t->cj->super->owner; + if (t->subtype == task_subtype_grav) { + + qid = t->ci->gsuper->owner; + if (qid < 0 || + s->queues[qid].count > s->queues[t->cj->gsuper->owner].count) + qid = t->cj->gsuper->owner; + + } else { + + qid = t->ci->super->owner; + if (qid < 0 || + s->queues[qid].count > s->queues[t->cj->super->owner].count) + qid = t->cj->super->owner; + } break; case task_type_recv: #ifdef WITH_MPI diff --git a/src/space.c b/src/space.c index a74db31f2f0c212ad09142c01c2879cbdf87beb8..4f034f4c5437fbc77c1003a576bb141e8cf7528d 100644 --- a/src/space.c +++ b/src/space.c @@ -317,6 +317,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { c->count = 0; c->gcount = 0; c->super = c; + c->gsuper = c; c->ti_old = ti_current; lock_init(&c->lock); } @@ -389,6 +390,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->cells_top[k].ghost = NULL; s->cells_top[k].kick = NULL; s->cells_top[k].super = &s->cells_top[k]; + s->cells_top[k].gsuper = &s->cells[k]; } s->maxdepth = 0; } @@ -1293,6 +1295,8 @@ void space_split_mapper(void *map_data, int num_elements, void *extra_data) { temp->dx_max = 0.f; temp->nodeID = c->nodeID; temp->parent = c; + temp->super = NULL; + temp->gsuper = NULL; c->progeny[k] = temp; } diff --git a/tests/testInteractions.c b/tests/testInteractions.c index 9d8dca79f3f84ea06e42d61390e68467a5f1b415..52ad0c54258848883a9025bbcd9d68133eddc4b9 100644 --- a/tests/testInteractions.c +++ b/tests/testInteractions.c @@ -173,7 +173,7 @@ void test_interactions(struct part *parts, int count, /* Dump state of particles before serial interaction. */ dump_indv_particle_fields(serial_filename, &pi); - for (size_t i = 1; i < count; i++) + for (int i = 1; i < count; i++) dump_indv_particle_fields(serial_filename, &parts[i]); /* Make copy of pi to be used in vectorised version. */ @@ -206,7 +206,7 @@ void test_interactions(struct part *parts, int count, /* Dump result of serial interaction. */ dump_indv_particle_fields(serial_filename, &pi); - for (size_t i = 1; i < count; i++) + for (int i = 1; i < count; i++) dump_indv_particle_fields(serial_filename, &parts[i]); /* Setup arrays for vector interaction. */