diff --git a/src/pmill-768.py b/src/PMillennium768MultiNodeWithVtuneAndAllinea.py similarity index 58% rename from src/pmill-768.py rename to src/PMillennium768MultiNodeWithVtuneAndAllinea.py index e0f732c42c97974cdcc8949dd324577edb94b9f1..056df19377131181b5e4a140d95b187d7b543960 100644 --- a/src/pmill-768.py +++ b/src/PMillennium768MultiNodeWithVtuneAndAllinea.py @@ -3,18 +3,19 @@ import reframe.core.launchers.mpi import common - -@rfm.parameterized_test(*([tasks, threads, iteration, launcher] - for tasks in [2] - for threads in list(reversed([56])) +# 4 ranks and 64 threads were found to be an optimal way to use a +# node on DINE for the pmill 768 benchmark +@rfm.parameterized_test(*([tasks, tasks_per_node, threads, iteration, launcher] + for tasks in [16] + for tasks_per_node in [4] + for threads in list(reversed([64])) for iteration in [1] - for launcher in ['mpirun','perf-report'])) -class PMillenniumTest(rfm.RegressionTest): - def __init__(self, num_tasks, num_threads, iteration, launcher): - ic_dir = common.setup(self, launcher, num_tasks, num_tasks_per_node=num_tasks) - + for launcher in ['profiler:Vtune','profiler:Allinea'])) +class PMillennium768MultiNode(rfm.RegressionTest): + def __init__(self, num_tasks, tasks_per_node, num_threads, iteration, launcher): + ic_dir = common.setup(self, launcher, num_tasks, tasks_per_node) + test_dir = 'swiftsim/examples/PMillennium/PMillennium-768' - self.cpus_per_task = 32 # added print(f'Running with threads : <{num_threads}>') self.keep_files = [test_dir] @@ -24,23 +25,22 @@ class PMillenniumTest(rfm.RegressionTest): '--with-parmetis' ] self.prerun_cmds = [f'pushd {test_dir}'] - num_iterations = 5 - + num_steps = 10 + self.time_limit = '3h' - if launcher == 'mpirun': + if launcher == 'profiler:Vtune': self.executable = 'aps' self.executable_opts = ['--collection-mode=mpi,omp', '../../swift_mpi'] - elif launcher == 'perf-report': + elif launcher == 'profiler:Allinea': self.executable = 'perf-report' self.executable_opts = ['--mpi=intel-mpi', '../../swift_mpi'] - self.time_limit = '2h40m' self.executable_opts += [ '--cosmology', '--self-gravity', '-v', '1', f'--threads={num_threads}', - '-n', f'{num_iterations}', + '-n', f'{num_steps}', '-P', 'Restarts:enable:0', f'-PInitialConditions:file_name:{ic_dir}/pmillenium/PMill-768.hdf5', 'p-mill-768.yml' diff --git a/src/common.py b/src/common.py index 2ccd53e8f39170044a21a554ae87a7807618c20e..9bd875a3ff5f6026cbefd6eca4cd335dc0540c0f 100644 --- a/src/common.py +++ b/src/common.py @@ -2,11 +2,10 @@ import reframe as rfm import reframe.utility.sanity as sn - def setup(test, launcher, num_tasks, num_tasks_per_node=1): test.time_limit = '1h' - test.num_tasks = 1 # num_tasks - test.num_tasks_per_node = 1 + test.num_tasks = num_tasks + test.num_tasks_per_node = num_tasks_per_node test.valid_prog_environs = ['*'] @@ -28,26 +27,31 @@ def setup(test, launcher, num_tasks, num_tasks_per_node=1): if rfm.utility.osext.osuser() == 'dc-turn5': if test.current_system.name == 'cosma7': ic_dir = '/cosma7/data/ds007/dc-turn5/swift_initial_conditions' - if launcher == 'mpirun': + if launcher == 'profiler:Vtune': test.valid_systems = ['cosma7:cpu_multi_node_aps'] - elif launcher == 'perf-report': + elif launcher == 'profiler:Allinea': test.valid_systems = ['cosma7:cpu_multi_node_perf_report'] elif test.current_system.name == 'dine': ic_dir = '/cosma5/data/durham/dc-turn5/swift_initial_conditions' - if launcher == 'mpirun': + if launcher == 'profiler:Vtune': test.valid_systems = ['dine:cpu_multi_node_aps'] - elif launcher == 'perf-report': + elif launcher == 'profiler:Allinea': test.valid_systems = ['dine:cpu_multi_node_perf_report'] - elif launcher == 'scalasca': + elif launcher == 'profiler:ScoreP': + # ScoreP only works on with non-mpi version of SWIFT + # because MPI_THREAD_MULTIPLE (needed by SWIFT) is not supported test.valid_systems = ['dine:cpu_single_node'] else: raise ValueError(f'Need to handle {test.current_system.name} for {rfm.utility.osext.osuser()}') + ''' TODO: ADD FILE PATHS elif rfm.utility.osext.osuser() == 'dc-fraw1': if test.current_system.name == 'cosma7' or test.current_system.name == 'dine': ic_dir = '/cosma5/data/do008/dc-fraw1/swift_initial_conditions' else: raise ValueError(f'Need to handle {test.current_system.name} for {rfm.utility.osext.osuser()}') + ''' else: raise ValueError(f'Need to handle {test.current_system.name} for {rfm.utility.osext.osuser()}') + return ic_dir diff --git a/src/eagle_6_scorep.py b/src/eagle6SingleNodeWithScoreP.py similarity index 100% rename from src/eagle_6_scorep.py rename to src/eagle6SingleNodeWithScoreP.py diff --git a/src/sodshock_3d.py b/src/sodshock3dMultiNodeWithVtuneAndAllinea.py similarity index 59% rename from src/sodshock_3d.py rename to src/sodshock3dMultiNodeWithVtuneAndAllinea.py index 73fd08972cf81165f7f3466cbb1b16b5d7123112..ab9564a0f0fb7d2b9b1553b397475ee219816ab3 100644 --- a/src/sodshock_3d.py +++ b/src/sodshock3dMultiNodeWithVtuneAndAllinea.py @@ -3,18 +3,19 @@ import reframe.core.launchers.mpi import common - -@rfm.parameterized_test(*([tasks, threads, iteration, launcher] - for tasks in [2] - for threads in list(reversed([32])) +# 4 ranks and 64 threads were found to be an optimal way to use a +# node on DINE for the pmill 768 benchmark +@rfm.parameterized_test(*([tasks, tasks_per_node, threads, iteration, launcher] + for tasks in [16] + for tasks_per_node in [4] + for threads in list(reversed([64])) for iteration in [1] - for launcher in ['mpirun', 'perf-report', 'scalasca'])) -class SodShock3dTest(rfm.RegressionTest): - def __init__(self, num_tasks, num_threads, iteration, launcher): - ic_dir = common.setup(self, launcher, num_tasks, num_tasks_per_node=num_tasks) + for launcher in ['profiler:Vtune', 'profiler:Allinea'])) +class SodShock3dTestMultiNode(rfm.RegressionTest): + def __init__(self, num_tasks, tasks_per_node, num_threads, iteration, launcher): + ic_dir = common.setup(self, launcher, num_tasks, tasks_per_node) test_dir = 'swiftsim/examples/HydroTests/SodShock_3D' - self.cpus_per_task = 32 # added print(f'Running with threads : <{num_threads}>') self.keep_files = [test_dir] @@ -26,18 +27,14 @@ class SodShock3dTest(rfm.RegressionTest): self.prerun_cmds = [f'pushd {test_dir}'] num_iterations = 10000 - if launcher == 'mpirun': - self.executable = 'aps' + if launcher == 'profiler:Vtune': + self.executable = 'aps' self.executable_opts = ['--collection-mode=mpi,omp', '../../swift_mpi', '-P', 'Snapshots:time_first:999999999'] - elif launcher == 'perf-report': + elif launcher == 'profiler:Allinea': self.executable = 'perf-report' self.executable_opts = ['--mpi=intel-mpi', '../../swift_mpi'] self.time_limit = '2h40m' - elif launcher == 'scalasca': - self.build_system.make_opts = ['CC="scorep --user --thread=pthread icc"'] - self.executable = '../../swift' - self.executable_opts = ['-P', 'Snapshots:time_first:999999999'] self.executable_opts += [ '--hydro', diff --git a/src/sodshock_scalasca.py b/src/sodshock3dSingleNodeWithScoreP.py similarity index 63% rename from src/sodshock_scalasca.py rename to src/sodshock3dSingleNodeWithScoreP.py index df4d1e14c92fe582f082da780cff6c51a0fc3fe0..21d62b2ec45cdaffb55610eed9e4a2a35e3e1121 100644 --- a/src/sodshock_scalasca.py +++ b/src/sodshock3dSingleNodeWithScoreP.py @@ -3,42 +3,38 @@ import reframe.core.launchers.mpi import common - +# ScoreP is only compatible with SWIFT if we with no MPI @rfm.parameterized_test(*([tasks, threads, iteration, launcher] - for tasks in [2] - for threads in list(reversed([32])) + for tasks in [1] + for threads in list(reversed([64])) for iteration in [1] - for launcher in ['scalasca'])) -class SodShock3dTest(rfm.RegressionTest): + for launcher in ['profiler:ScoreP'])) +class SodShock3dTestSingleNode(rfm.RegressionTest): def __init__(self, num_tasks, num_threads, iteration, launcher): ic_dir = common.setup(self, launcher, num_tasks, num_tasks_per_node=num_tasks) test_dir = 'swiftsim/examples/HydroTests/SodShock_3D' - # self.cpus_per_task = 32 # added print(f'Running with threads : <{num_threads}>') self.keep_files = [test_dir] self.build_system.config_opts = [ - '--disable-ipo', #temporary disable - support is forthcoming + '--disable-ipo', #temporary disable - ScoreP support is forthcoming '--with-tbbmalloc', '--with-parmetis' ] self.prerun_cmds = [f'pushd {test_dir}'] - num_iterations = 1 + num_steps = 10000 - # self.build_system.options = ['CC=scorep mpicc', 'CFLAGS=-fopenmp'] #Added for scalasca - #self.build_system.make_opts = ['CC="scorep --user --thread=pthread mpicc"'] #change mpicc to icc for non-mpi self.build_system.make_opts = ['CC="scorep --user --thread=pthread icc"'] - if launcher == 'scalasca': - self.executable = '../../swift' + self.executable = '../../swift' self.executable_opts += [ '-P', 'Snapshots:time_first:999999999', '--hydro', '-v', '1', f'--threads={num_threads}', - '-n', f'{num_iterations}', + '-n', f'{num_steps}', '-P', 'Restarts:enable:0', '-P', f'InitialConditions:file_name:{ic_dir}/sodshock/glassCube_64.hdf5', 'sodShock.yml'