From a23ffff1ab6444c2e83024bca0c0771c0fce4f13 Mon Sep 17 00:00:00 2001 From: Jonathan Frawley <jonathan.frawley@durham.ac.uk> Date: Thu, 21 Jan 2021 16:18:21 +0000 Subject: [PATCH] Working with two nodes with John Helly's fix - export I_MPI_FABRICS=shm:tcp required --- benchmark-fast/job-aps.sh | 6 +- benchmark-fast/job-arm.sh | 6 +- benchmark-slow/build_debug.sh | 16 +++++ benchmark-slow/job-arm.sh | 3 +- ...2.html => aps_report_20210121_161701.html} | 2 +- .../swift_mpi_1p_1n_2021-01-21_15-42.txt | 64 ----------------- ... => swift_mpi_2p_2n_2021-01-21_16-09.html} | 70 +++++++++---------- .../swift_mpi_2p_2n_2021-01-21_16-09.txt | 65 +++++++++++++++++ 8 files changed, 125 insertions(+), 107 deletions(-) create mode 100644 benchmark-slow/build_debug.sh rename report/day1/benchmark-fast/{aps_report_20210121_154732.html => aps_report_20210121_161701.html} (94%) delete mode 100644 report/day1/benchmark-fast/swift_mpi_1p_1n_2021-01-21_15-42.txt rename report/day1/benchmark-fast/{swift_mpi_1p_1n_2021-01-21_15-42.html => swift_mpi_2p_2n_2021-01-21_16-09.html} (99%) create mode 100644 report/day1/benchmark-fast/swift_mpi_2p_2n_2021-01-21_16-09.txt diff --git a/benchmark-fast/job-aps.sh b/benchmark-fast/job-aps.sh index 1ab7214..a9e123f 100644 --- a/benchmark-fast/job-aps.sh +++ b/benchmark-fast/job-aps.sh @@ -1,6 +1,6 @@ #!/bin/bash #SBATCH --job-name="swiftaps" -#SBATCH --ntasks=1 +#SBATCH --ntasks=2 #SBATCH --ntasks-per-node=1 #SBATCH --output=swiftaps.out #SBATCH --error=swiftaps.err @@ -24,6 +24,6 @@ pushd swiftsim/examples/HydroTests/SodShock_3D export MPS_STAT_LEVEL=2 export APS_ARGS="--collection-mode=mpi,omp" -export I_MPI_HYDRA_DEBUG=1 +export I_MPI_FABRICS=shm:tcp -mpirun -np $SLURM_NTASKS -iface p1p2 aps $APS_ARGS ../../swift_mpi --hydro -v 1 --threads=64 -n 10000 -P Restarts:enable:0 sodShock.yml +mpirun -np $SLURM_NTASKS -iface p1p2 aps $APS_ARGS ../../swift_mpi --hydro -v 1 --threads=64 -n 10000 -P Snapshots:time_first:999999999 sodShock.yml diff --git a/benchmark-fast/job-arm.sh b/benchmark-fast/job-arm.sh index f1ea867..f0b5e7c 100644 --- a/benchmark-fast/job-arm.sh +++ b/benchmark-fast/job-arm.sh @@ -1,6 +1,6 @@ #!/bin/bash #SBATCH --job-name="swiftarm" -#SBATCH --ntasks=1 +#SBATCH --ntasks=2 #SBATCH --ntasks-per-node=1 #SBATCH --output=swiftarm.out #SBATCH --error=swiftarm.err @@ -19,9 +19,11 @@ module load fftw/3.3.8epyc module load python/3.6.5 module load allinea/ddt/20.2.1 -pushd swiftsim/examples/HydroTests/SodShock_3D +pushd swiftsim_debug/examples/HydroTests/SodShock_3D export MPS_STAT_LEVEL=2 export APS_ARGS="--collection-mode=mpi,omp" +export I_MPI_FABRICS=shm:tcp + perf-report --mpi=intel-mpi -np $SLURM_NTASKS ../../swift_mpi --hydro -v 1 --threads=64 -n 10000 -P Restarts:enable:0 sodShock.yml diff --git a/benchmark-slow/build_debug.sh b/benchmark-slow/build_debug.sh new file mode 100644 index 0000000..1ffc149 --- /dev/null +++ b/benchmark-slow/build_debug.sh @@ -0,0 +1,16 @@ +module load intel_comp/2018 +module load intel_mpi/2018 +module load parmetis/4.0.3 +module load parallel_hdf5/1.10.3 +module load gsl/2.4 +module load fftw/3.3.8epyc +module load python/3.6.5 +rm -rf swiftsim_debug +cp -rf ../../swiftsim ./swiftsim_debug +pushd swiftsim_debug +git checkout bd36987f24e03ef35a52d40288dad77fe374dfc4 +git clean -x -f -d +./autogen.sh +cd ./ +./configure CC="icc" CXX="icpc" --enable-ipo --with-tbbmalloc --with-parmetis --enable-debug=yes +make -j 32 diff --git a/benchmark-slow/job-arm.sh b/benchmark-slow/job-arm.sh index 8f529cd..db2a892 100644 --- a/benchmark-slow/job-arm.sh +++ b/benchmark-slow/job-arm.sh @@ -18,7 +18,6 @@ module load gsl/2.4 module load fftw/3.3.8epyc module load python/3.6.5 module load allinea/ddt/20.2.1 -pushd swiftsim/examples/PMillennium/PMillennium-768 +pushd swiftsim_debug/examples/PMillennium/PMillennium-768 perf-report --mpi=intel-mpi -np $SLURM_NTASKS ../../swift_mpi --cosmology --self-gravity -v 1 --threads=64 -n 1 -P Restarts:enable:0 -PInitialConditions:file_name:/cosma5/data/do008/dc-fraw1/swift_initial_conditions/pmillenium/PMill-768.hdf5 p-mill-768.yml - diff --git a/report/day1/benchmark-fast/aps_report_20210121_154732.html b/report/day1/benchmark-fast/aps_report_20210121_161701.html similarity index 94% rename from report/day1/benchmark-fast/aps_report_20210121_154732.html rename to report/day1/benchmark-fast/aps_report_20210121_161701.html index 9cd0fc4..4ef7f6c 100644 --- a/report/day1/benchmark-fast/aps_report_20210121_154732.html +++ b/report/day1/benchmark-fast/aps_report_20210121_161701.html @@ -1 +1 @@ -<!DOCTYPE html><html lang="en"><head><title>APS report</title><link rel="icon" href="favicon.ico" type="image/x-icon"><meta name="nexus-supported-models" content="mps_plugin.Result"><link rel="nexus-home" href="?nexus={nexus}"><link rel="nexus-app" href="?nexus={nexus}&path={properties.path}"><meta name="description" content="APS viewer application"><style type="text/css">body,html{height:100%}body{margin:0;font-family:'Segoe UI',Tahoma,Geneva,Verdana,sans-serif;font-size:15px;color:#000;background-color:#f7f7f7;overflow:hidden}@media (min-width:1320px){#content{width:1190px;font-size:16.5px}}@media (min-width:1540px){#content{width:1410px;font-size:18px}}@media (min-width:1760px){#content{width:1630px;font-size:19.5px}}.ablate{display:none}#leftMenuOpenedState,#topMenuOpenedState{background-color:#00aeef;padding:.5em;color:#fff}#leftMenuOpenedState>h4,#topMenuOpenedState>h4{font-weight:700;margin-bottom:.8em}#leftMenuOpenedState>div,#leftMenuOpenedState>span,#topMenuOpenedState>div,#topMenuOpenedState>span{opacity:.75;padding-bottom:.8em;font-style:italic}#leftMenuOpenedState>a,#leftMenuOpenedState>a:visited,#topMenuOpenedState>a,#topMenuOpenedState>a:visited{font-weight:400;color:#fff;opacity:.9}#leftMenuOpenedState>a:hover,#topMenuOpenedState>a:hover{opacity:1}#topMenuOpenedState>a{padding-top:.6em;padding-left:4em}#leftMenuClosedState,#topMenuClosedState{background-color:#0071c5}#leftMenuClosedState:hover,#topMenuClosedState:hover{cursor:pointer;background-color:#00aeef}#topMenuClosedState{font-size:22.5px;line-height:60px;color:#fff;text-overflow:ellipsis;white-space:nowrap;overflow:hidden}#topMenuClosedState sup{display:none;font-size:.4em;font-weight:400;line-height:.8em}#topMenuClosedState span{padding-left:13px;padding-right:13px}#topMenuClosedState span.icon.menu:after{vertical-align:middle}#topMenuClosedState span.icon.intel{line-height:0;float:right;font-size:2em;padding-top:13px}@media (min-width:625px){#topMenuClosedState sup{display:inline}}#leftMenuClosedState div.icon{font-size:2.5em;width:1em;margin:15px auto}#leftMenuClosedState div.icon.intel{font-size:4em;position:absolute;margin:0;bottom:0;left:15px}#leftMenuClosedState,#leftMenuOpenedState{z-index:5;position:fixed}#topMenuClosedState,#topMenuOpenedState{z-index:5}#leftMenuClosedState,#leftMenuOpenedState,.feedback-panel.closed a.button{display:none}#topMenuOpenedState{display:block;position:fixed;left:0;right:0;top:-200px;bottom:0;width:100%;height:200px}#topMenuClosedState{display:block;position:absolute;left:0;right:0;top:0;height:60px}#contentWrap{position:fixed;overflow-y:auto;overflow-x:hidden;left:0;right:0;top:0;bottom:0;padding:20px;margin-top:60px}.menuOpened #topMenuOpenedState{top:0}.menuOpened #topMenuClosedState{top:-60px}.menuOpened #contentWrap{opacity:.35;margin-top:200px}@media (min-width:850px){#topMenuClosedState,#topMenuOpenedState{display:none}#leftMenuClosedState,#leftMenuOpenedState{display:block;left:-180px;top:0;bottom:0;width:180px}#leftMenuClosedState{left:0;width:90px}#contentWrap{left:0;top:0;padding-left:110px;margin-top:0}.menuOpened #leftMenuOpenedState{left:0}.menuOpened #leftMenuClosedState{left:-90px}.menuOpened #contentWrap{left:135px;margin-top:0}}.anim{-webkit-transition:.5s all cubic-bezier(.8,0,.2,1);-moz-transition:.5s all cubic-bezier(.8,0,.2,1);-o-transition:.5s all cubic-bezier(.8,0,.2,1);transition:.5s all cubic-bezier(.8,0,.2,1)}@media print{#leftMenuClosedState,#leftMenuOpenedState,#topMenuClosedState,#topMenuOpenedState{display:none}#contentWrap{position:relative}}#content{margin:0 auto;width:100%;max-height:0;overflow:hidden}#content:after{content:"Screen is too small";position:absolute;top:133.33333333px;left:0;width:100%;margin:auto;text-align:center;opacity:.5}@media (min-width:400px){#content{max-height:inherit}#content:after{content:""}}#bricks{clear:both}#bricks>article{background-color:#fff}.not-supported-browser{color:#000;background-color:#fff;padding:30px;height:100%}.feedback-panel{position:absolute;color:#fff;background-color:#0071c5;font-weight:400;padding:10px;width:200px;height:70px;transition:left ease-in-out .5s 2s,top ease-in-out .5s 2s}.feedback-panel:after{content:"";position:absolute;width:0;height:0}.feedback-panel.left{left:0;bottom:30px}.feedback-panel.left:after{left:220px;top:0;border:45px solid transparent;border-left-width:20px;border-left-color:#0071c5;border-right:0}.feedback-panel.top{right:30px;top:140px}.feedback-panel.top:after{left:100px;top:90px;border:60px solid transparent;border-top-width:20px;border-top-color:#0071c5;border-bottom-width:0}.feedback-panel.closed.left{left:-240px}.feedback-panel.closed.top{top:-110px}.feedback-panel .question{margin-bottom:.8em}.feedback-panel a.button{display:inline-block;color:#fff;padding:5px 10px;margin-bottom:5px;text-decoration:none;background-color:rgba(255,255,255,.2);border-radius:5px}.feedback-panel a.button:hover{background-color:rgba(255,255,255,.3)}.icon:after{display:inline-block;width:1em;height:1em;line-height:1em;vertical-align:bottom}.icon.menu:after{content:url(data:image/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20viewBox%3D%270%200%2020%2020%27%20fill%3D%27%23FFF%27%3E%3Crect%20x%3D%273%27%20y%3D%273%27%20width%3D%2714%27%20height%3D%273%27%2F%3E%3Crect%20x%3D%273%27%20y%3D%2713%27%20width%3D%2714%27%20height%3D%273%27%2F%3E%3Crect%20x%3D%273%27%20y%3D%278%27%20width%3D%2714%27%20height%3D%273%27%2F%3E%3C%2Fsvg%3E)}.icon.email:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3E%3Cpath fill='white' d='M10,0L2,6v9.5h16V6L10,0z M10,1.5L16,6l-6,3.2L4,6L10,1.5z M16.5,14.2h-13v-7l6.5,3.3L16.5,7V14.2z'/%3E%3C/svg%3E")}.icon.intel:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 293 293' fill='white'%3E%3Cpath d='M 291.28737,55.058565 C 277.50683,-12.11587 147.525,-16.369963 63.737895,34.808098 L 63.737895,40.460487 C 147.41497,-2.7316788 266.14859,-2.4562338 276.95315,59.422033 C 280.59385,79.920851 269.12707,101.24048 248.56679,113.50757 L 248.56679,129.56114 C 273.31695,120.47998 298.61629,91.08816 291.28737,55.058565 M 138.92617,172.67053 C 81.102616,178.02238 20.853265,169.59848 12.418485,124.23687 C 8.2296052,101.90025 18.426965,78.192925 31.877897,63.483967 L 31.877897,55.608451 C 7.6234054,76.957185 -5.5514356,103.96031 2.0555709,135.84419 C 11.756767,176.75954 63.462304,199.91949 142.39931,192.20804 C 173.65361,189.19069 214.55609,179.09205 242.94244,163.42581 L 242.94244,141.16845 C 217.14544,156.61695 174.47938,169.38024 138.92617,172.67053 z'/%3E%3Cpath d='M 238.31142,45.347552 L 223.15342,45.347552 L 223.15342,113.16 C 223.15342,121.1244 226.95767,128.05143 238.31142,129.14959'/%3E%3Cpath d='M 57.729915,70.130433 L 42.57092,70.130433 L 42.57092,114.42193 C 42.57092,122.38934 46.375174,129.31386 57.729915,130.41152'/%3E%3Cpath d='M 188.42548,81.588742 C 183.30029,81.588742 179.33248,84.253255 177.67794,87.85316 C 176.68611,90.022849 176.3545,91.672094 176.19095,94.338616 L 199.39759,94.338616 C 199.06648,87.82553 196.14302,81.588742 188.42548,81.588742 M 176.19095,104.61387 C 176.19095,112.33513 181.03904,118.01982 189.52834,118.01982 C 196.19954,118.01982 199.50663,116.15155 203.3654,112.33563 L 212.62645,121.26456 C 206.67299,127.14267 200.44444,130.71544 189.41981,130.71544 C 175.03357,130.71544 161.25152,122.82991 161.25152,99.85904 C 161.25152,80.21479 173.26798,69.115166 189.0887,69.115166 C 205.12948,69.115166 214.33351,82.110693 214.33351,99.173822 L 214.33351,104.61437 L 176.19095,104.61437'/%3E%3Cpath d='M 98.576374,82.329722 C 102.98533,82.329722 104.80493,84.500918 104.80493,88.045061 L 104.80493,129.78055 L 119.85388,129.78055 L 119.85388,87.990304 C 119.85388,79.499933 115.33339,70.129931 102.15955,70.129931 L 71.125329,70.129931 L 71.125329,129.78005 L 86.118769,129.78005 L 86.118769,82.329219'/%3E%3Cpath d='M 148.57135,129.69867 C 136.2803,129.69867 131.09859,121.12491 131.09859,112.66317 L 131.09859,53.837923 L 146.09203,53.837923 L 146.09203,70.130433 L 157.44627,70.130433 L 157.44627,82.329722 L 146.09203,82.329722 L 146.09203,111.75641 C 146.09203,115.21716 147.74557,117.11508 151.32725,117.11508 L 157.44627,117.11508 L 157.44627,129.69867 L 148.57135,129.69867'/%3E%3Crect x='42.570919' y='47.40823' width='15.158995' height='14.425744'/%3E%3Ctext x='244.26051' y='55.85825'%3ER%3C/text%3E%3C/svg%3E")}.icon.flag:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3E%3Cpolygon fill='%23BE3B26' points='4.5,0 11,3 10,6 13,11 7,9 7,7.5 4.5,7 4.5,15.5 3,15.5 3,0'/%3E%3C/svg%3E")}.icon.cross:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3E%3Cpolygon fill='%230071C5' points='1,17 3,19 10,12 17,19 19,17 12,10 19,3 17,1 10,8 3,1 1,3 8,10'/%3E%3C/svg%3E")}.icon.arrow-s-w:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3E%3Cpolygon fill='grey' points='19,3 17,1 5.5,12.5 1,8 1,19 12,19 7.5,14.5'/%3E%3C/svg%3E")}.tooltipped[data-tip-id]:not(.icon){cursor:help;border-bottom-width:1px;border-bottom-style:dashed}h3.tooltipped[data-tip-id],h4.tooltipped[data-tip-id]{display:table}.tip>div[data-id]{position:fixed;padding:.75em;display:none;max-width:35%;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box;background-color:rgba(255,255,255,.9);border:0 solid #000;box-shadow:rgba(0,0,0,.25) 0 5px 8px;z-index:10}.tip>div[data-id]:after{position:absolute;content:"";height:0;border:5px solid transparent}.tip>div[data-id].north{border-bottom-width:5px}.tip>div[data-id].north:after{bottom:-16px;border-top:6px solid #000}.tip>div[data-id].south{border-top-width:5px}.tip>div[data-id].south:after{top:-16px;border-bottom:6px solid #000}.tip>div[data-id].west:after{left:15px}.tip>div[data-id].east:after{right:1px}.tip>div[data-id].visible{display:block}.tip>div[data-id$=_ISSUE]{border-color:#bf3600}.tip>div[data-id$=_ISSUE].north:after{border-top-color:#bf3600}.tip>div[data-id$=_ISSUE].south:after{border-bottom-color:#bf3600}@media (min-width:650px){.tip>div{max-width:30%}}@media (min-width:850px){.tip>div{max-width:25%}}@media (min-width:1100px){.tip>div{max-width:20%}}body .icon.flag{color:inherit;transition:color .5s ease .5s}body .icon.flag:after{width:0;transition:width .5s ease .5s}body.withSuggestions .icon.flag{color:#bf3600}body.withSuggestions .icon.flag:after{width:1em}body article#suggestion{position:relative;padding:1.5em;margin-bottom:2.5em;margin-left:1.5em;border:2px solid gray;border-radius:4px;width:0%;max-height:0;opacity:.5;float:right;background-color:#fff;transition:max-height .75s ease 0s,width .5s ease .25s}body article#suggestion::after{content:"";position:absolute;width:0;height:0;left:10px;bottom:-10px;border-bottom:10px solid transparent;border-right:25px solid gray;transition:left .5s ease 0s,bottom .5s ease 0s,border-bottom-width .5s ease 0s,border-right-width .5s ease 0s,border-color .5s ease 0s}body article#suggestion section.aspect_map,body article#suggestion section[data-id=SUGGESTION_TEXT]{opacity:0;overflow:hidden;transition:opacity .5s ease 0s}body.withSuggestions article#suggestion{opacity:1;width:85%;max-height:700px;border-color:#0071c5;transition:max-height .5s ease .25s,width .5s ease 0s,border-color .5s ease 0s}body.withSuggestions article#suggestion::after{left:20px;bottom:-20px;border-bottom-width:20px;border-right-width:80px;border-right-color:#0071c5}body.withSuggestions article#suggestion section.aspect_map,body.withSuggestions article#suggestion section[data-id=SUGGESTION_TEXT]{opacity:1;transition:opacity .25s ease .5s}@media (min-width:625px){body.withSuggestions article#suggestion{width:50%}}a.button.suggestion{position:absolute;right:.9em;top:.75em}section[data-id=SUGGESTION_TEXT]{margin-bottom:1.5em}section[data-id=SUGGESTION_TEXT] span#first_line{font-size:150%;color:#0071c5}section.aspect_map{width:100%;display:table}section.aspect_map>section.aspect{display:table-row;height:1.6em}section.aspect_map>section.aspect>span{display:table-cell;vertical-align:middle;white-space:nowrap;padding-left:.5em}section.aspect_map>section.aspect>span:first-child{display:inline-block;padding-left:0}section.aspect_map>section.aspect>span:nth-child(2){text-align:right}section.aspect_map>section.aspect>span:not(.icon):nth-child(2){padding-right:1em}section.aspect_map>section.aspect>span:nth-child(3){padding-right:1em}section.aspect_map>section.aspect>span:last-child{position:relative;width:60%}section.aspect_map>section.aspect>span:last-child>span:not([data-tip-id]){position:absolute;left:0;right:100%;top:.5em;height:.6em;background-color:#bf3600}section.aspect_map>section.aspect:first-child{color:gray;text-align:center;font-size:.75em}article[data-id=INFO]{color:gray;margin-bottom:20px;padding-right:2em;font-size:80%}article[data-id=INFO] span[data-id]:last-child{font-style:italic;word-wrap:break-word;word-break:break-all;padding-left:.5em;color:#000}h1{font-size:2.5em;color:rgba(128,128,128,.5);margin:0 0 20px;display:none}h1 sup{display:block;font-size:.4em;font-weight:400;line-height:.8em}@media (min-width:850px){h1{display:block}}.aux-metric,.top-metric{display:inline-block;font-size:1em;font-weight:400;margin-bottom:1.5em}.aux-metric [data-id]:first-child,.top-metric [data-id]:first-child{display:block;padding-top:.75em;font-size:2.2em;font-weight:500;color:#00aeef}.top-metric{padding-right:10%}.aux-metric{padding-right:2em}.aux-metric:first-child{border-left:1px solid #00aeef}.aux-metric:last-child{border-right:1px solid #00aeef}div#content article[data-id=WALLCLOCK]>span[data-id]:first-child{font-size:3.4em;padding-top:0}div#bricks article{color:gray}div#bricks article h3{margin:0;font-size:1.5em;font-weight:400;color:#00aeef}div#bricks article h4{margin:.8em 0 0;padding:0;font-size:1em;font-weight:400;color:#000}div#bricks article .indent{margin-left:1.75em}div#bricks article table{margin-top:.8em;width:100%;border-collapse:collapse}div#bricks article table tr{border-bottom:1px solid rgba(128,128,128,.25)}div#bricks article table tr td,div#bricks article table tr th{text-align:right}div#bricks article table tr td:first-child,div#bricks article table tr th:first-child{text-align:left;white-space:normal}div#bricks article table tr td{padding:.2em;position:relative;overflow:hidden;white-space:nowrap}div#bricks article table tr th{font-weight:400;padding:.2em 0;border-bottom:2px solid rgba(128,128,128,.25)}div#bricks article table tr th:first-child{color:#000}div#bricks article table tr .progress{position:absolute;width:0%;height:100%;background:#00aeef;opacity:.15;left:0;top:0;-webkit-animation:filler 4s ease-in-out;-moz-animation:filler 4s ease-in-out;animation:filler 4s ease-in-out}div[data-id=MEMORY_INFO]>div,div[data-id=RSS_INFO]>div{margin-left:1.5em}div[data-id=MEMORY_INFO]>span,div[data-id=RSS_INFO]>span{margin-left:1em}</style><script>var json={"INFO":{"APPLICATION_NAME":"swift_mpi","NUM_RANKS":"4","RANKS_PER_NODE":"1","STAT_FILES":"/cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim/examples/HydroTests/SodShock_3D/aps_result_20210121","CREATION_DATETIME":"2021-01-21 15:42:21"},"VECTORIZATION":null,"SP_FLOPS":null,"SP_PACKED":null,"SP_128_BIT":null,"SP_256_BIT":null,"SP_512_BIT":null,"SP_SCALAR":null,"DP_FLOPS":null,"DP_PACKED":null,"DP_128_BIT":null,"DP_256_BIT":null,"DP_512_BIT":null,"DP_SCALAR":null,"X87":null,"AVG_FREQUENCY":null,"NON_FP":null,"FP_ARITH_MEM_RD":null,"FP_ARITH_MEM_WR":null,"CACHE_BOUND":null,"DRAM_BOUND":null,"DRAM_BANDWIDTH":null,"PERS_MEM_BOUND":null,"PERS_MEM_BANDWIDTH":null,"MCDRAM_BANDWIDTH":null,"DRAM_HIT_RATIO":null,"DRAM_CACHE_HIT":null,"DRAM_CACHE_MISS":null,"NUMA_USE":null,"BANDWIDTH_AVG_OUT":null,"BANDWIDTH_AVG_IN":null,"PACKRATE_AVG_OUT":null,"PACKRATE_AVG_IN":null,"CPU_USAGE":null,"AVERAGE_CPU_USE":null,"CPU_OUT_OF":null,"PHYSICAL_CORE_USAGE":null,"PH_CORE_USE":null,"PH_CORE_OUT_OF":null,"BACK_END_BOUND":null,"L2_HIT_BOUND":null,"L2_MISS_BOUND":null,"SIMD_PER_CYCLE":null,"PACKED_SIMD":null,"SCALAR_SIMD":null,"CPI_RATE":null,"SPGFLOPS":null,"DPGFLOPS":null,"X87GFLOPS":null,"WALLCLOCK":{"WALLCLOCK_SEVERITY":"severityUnknown","WALLCLOCK_VAL":"49.75s","WALLCLOCK_PERC":"0.00%","WALLCLOCK_DESCR":"Total application wall-time."},"MPI_TIME":{"MPI_TIME_SEVERITY":"severityCritical","MPI_TIME_VAL":"26.34s","MPI_TIME_PERC":"70.70%","MPI_TIME_DESCR":"Time spent inside the MPI library. Values more than 10% might need additional exploration on MPI communication efficiency. This might be caused by high wait times inside the library, active communications, non-optimal settings of the MPI library. See MPI Imbalance metric to see if the application has load balancing problem. Use <a href='https://software.intel.com/en-us/intel-trace-analyzer' target='_blank'>Intel® Trace Analyzer and Collector</a> to explore communication efficiency.","MPI_TIME_ISSUE":"Your application is MPI bound. This may be caused by high busy wait time inside the library (imbalance), non-optimal communication schema or MPI library settings. Explore the MPI Imbalance metric if it is available or use <a href='https://software.intel.com/en-us/get-started-with-itac' target='_blank'>MPI profiling tools</a> like <a href='https://software.intel.com/en-us/intel-trace-analyzer' target='_blank'>Intel® Trace Analyzer and Collector</a> to explore possible performance bottlenecks."},"MPI_IMBALANCE":{"MPI_IMBALANCE_SEVERITY":"severityUnknown","MPI_IMBALANCE_VAL":"2.25s","MPI_IMBALANCE_PERC":"6.03%","MPI_IMBALANCE_DESCR":"Mean unproductive wait time per process spent in the MPI library calls when a process is waiting for data."},"NON_MPI_TIME":null,"OMP_REGIONS_TIME":null,"OMP_IMBALANCE_TIME":null,"OMP_SERIAL_TIME":null,"MEMORY":{"PEAK_MEM_RANK":"Rank: 1","PEAK_MEM_VAL":"1084.47 MB","MEAN_MEM_VAL":"1001.44 MB","MEM_DESCR":"Per-process memory usage affects the application scalability."},"RSS":{"PEAK_RSS_RANK":"Rank: 1","PEAK_RSS_VAL":"584.81 MB","MEAN_RSS_VAL":"539.97 MB","RSS_DESCR":"RSS usage affects the application scalability."},"MEMORY_PER_NODE":{"PEAK_MEM_NODE":"Node: b106.pri.cosma7.alces.network","PEAK_MEM_PER_NODE_VAL":"1084.47 MB","MEAN_MEM_PER_NODE_VAL":"1001.44 MB","MEM_PER_NODE_DESCR":"Per-node memory usage affects the application scalability."},"RSS_PER_NODE":{"PEAK_RSS_NODE":"Node: b106.pri.cosma7.alces.network","PEAK_RSS_PER_NODE_VAL":"584.81 MB","MEAN_RSS_PER_NODE_VAL":"539.97 MB","RSS_PER_NODE_DESCR":"Per-node RSS usage affects the application scalability."},"TOP_5":{"Test":"18.90","Isend":"17.95","Irecv":"8.56","Allreduce":"2.68","Waitall":"1.61"},"MEMORY_BOUND":null,"IO":{"READ_BYTES_SEVERITY":"severityUnknown","READ_BYTES_DESCR":"Amount of bytes read by the application. High volumes of I/O operations may lead to unproductive wait.","WRITTEN_BYTES_SEVERITY":"severityUnknown","WRITTEN_BYTES_DESCR":"Amount of bytes written by the application. High volumes of I/O operations may lead to unproductive wait.","IOWAIT_USEC_SEVERITY":"severityLow","IOWAIT_USEC_VAL":"0.01s","IOWAIT_USEC_PERC":"0.03%","IOWAIT_USEC_DESCR":"This is the time the application spends waiting for an I/O operation to complete.","READ_BYTES_AVG_VAL":" 2.0 MB","READ_BYTES_MAX_VAL":" 7.9 MB","READ_BYTES_MAX_RANK":"Rank: 1","WRITTEN_BYTES_AVG_VAL":" 3.9 MB","WRITTEN_BYTES_MAX_VAL":" 15.6 MB","WRITTEN_BYTES_MAX_RANK":"Rank: 1","IOWAIT_USEC_AVG_VAL":"0.01","IOWAIT_USEC_AVG_PERC":"0.03%","IOWAIT_USEC_MAX_VAL":"0.04","IOWAIT_USEC_MAX_PERC":"0.08%","IOWAIT_USEC_MAX_RANK":"Rank: 1"},"SUGGESTION":{"TEXT":"<span id='first_line'>Your application is MPI bound.</span><br> This may be caused by high busy wait time inside the library (imbalance), non-optimal communication schema or MPI library settings. Use <a href='https://software.intel.com/en-us/get-started-with-itac' target='_blank'>MPI profiling tools</a> like <a href='https://software.intel.com/en-us/intel-trace-analyzer' target='_blank'>Intel® Trace Analyzer and Collector</a> to explore performance bottlenecks.","STYLE":"bad"}};</script><script>function drawProgress(){for(var e=document.querySelectorAll(".progress"),t=0;t<e.length;t++)e[t].style.width=e[t].dataset.progress+"%"}function drawThresholdBars(e){var t={BACK_END_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},BANDWIDTH_AVG_OUT:{threshold:8.75,lessIsBetter:!0,perc:!1},BANDWIDTH_AVG_IN:{threshold:8.75,lessIsBetter:!0,perc:!1},CACHE_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},CPI_RATE:{threshold:1,lessIsBetter:!0,perc:!1},CPU_USAGE:{threshold:90,lessIsBetter:!1,perc:!0},DP_128_BIT:{threshold:5,lessIsBetter:!0,perc:!0},DP_256_BIT:{threshold:5,lessIsBetter:!0,perc:!0},DP_SCALAR:{threshold:30,lessIsBetter:!0,perc:!0},DRAM_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},FP_ARITH_MEM_RD:{threshold:.5,lessIsBetter:!1,perc:!1},FP_ARITH_MEM_WR:{threshold:.5,lessIsBetter:!1,perc:!1},IO:{threshold:10,lessIsBetter:!0,perc:!0},L2_HIT_BOUND:{threshold:10,lessIsBetter:!0,perc:!0},L2_MISS_BOUND:{threshold:15,lessIsBetter:!0,perc:!0},MEMORY_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},MPI_IMBALANCE:{threshold:30,lessIsBetter:!0,perc:!1},MPI_TIME:{threshold:10,lessIsBetter:!0,perc:!0},NUMA_USE:{threshold:15,lessIsBetter:!0,perc:!0},OMP_IMBALANCE_TIME:{threshold:10,lessIsBetter:!0,perc:!0},OMP_SERIAL_TIME:{threshold:15,lessIsBetter:!0,perc:!0},PACKRATE_AVG_IN:{threshold:105,lessIsBetter:!0,perc:!1},PACKRATE_AVG_OUT:{threshold:105,lessIsBetter:!0,perc:!1},PERS_MEM_BANDWIDTH:{threshold:42,lessIsBetter:!0,perc:!1},PERS_MEM_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},PHYSICAL_CORE_USAGE:{threshold:80,lessIsBetter:!1,perc:!0},SCALAR_FP:{threshold:15,lessIsBetter:!0,perc:!0},SCALAR_SIMD:{threshold:15,lessIsBetter:!0,perc:!0},SIMD_PER_CYCLE:{threshold:1,lessIsBetter:!1,perc:!1},SP_128_BIT:{threshold:5,lessIsBetter:!0,perc:!0},SP_256_BIT:{threshold:5,lessIsBetter:!0,perc:!0},SP_SCALAR:{threshold:30,lessIsBetter:!0,perc:!0},VECTORIZATION:{threshold:70,lessIsBetter:!1,perc:!0}};null!=e.BACK_END_BOUND&&(t.CPI_RATE.threshold=2);var s,r,o,n=["BACK_END_BOUND","CPU_USAGE","IO","MEMORY_BOUND","MPI_TIME","OMP_IMBALANCE_TIME","OMP_SERIAL_TIME","PHYSICAL_CORE_USAGE","SIMD_PER_CYCLE","VECTORIZATION"];for(var l in t)if(null!=e[l]){var i=-1<n.indexOf(l);if(o=t[l].perc?"_PERC":"_VAL",s=parseFloat(e[l][l+o]),"MPI_IMBALANCE"==l){var a=parseFloat(e.MPI_TIME.MPI_TIME_VAL),d=parseFloat(e.WALLCLOCK.WALLCLOCK_VAL);s=0<a?100*s/a:0,0<d&&100*a/d<=15&&(t[l].threshold=100),o="_PERC"}if(null!=(r=i?document.querySelector("[data-id="+l+"_BAR]"):document.querySelector("[data-id="+l+o+"]"))&&(t[l].lessIsBetter&&s>=t[l].threshold||!t[l].lessIsBetter&&s<=t[l].threshold)){if("SP_256_BIT"==l&&null==e.SP_512_BIT||"DP_256_BIT"==l&&null==e.DP_512_BIT)continue;if(i&&(r.style.right=95-95/90*Math.abs(s-t[l].threshold)+"%"),null!=(r=document.querySelectorAll("[data-id="+l+o+"]")))for(var u=0;u<r.length;u++)r[u].classList.add("icon"),r[u].classList.add("flag"),null!=e[l][l+"_ISSUE"]&&r[u].classList.add("tooltipped")}}}function makeInvisible(e){var t=document.querySelectorAll(e);if(null!=t)for(var s=0;s<t.length;s++)t[s].style.display="none"}function render(e){var t;if(null!=e.SUGGESTION&&(null!=(t=document.querySelector("[data-id=SUGGESTION_TEXT]"))&&(t.innerHTML=e.SUGGESTION.TEXT),null!=(t=document.getElementById("suggestion"))&&t.classList.add(e.SUGGESTION.STYLE)),null!=(t=document.querySelectorAll("[data-id=INFO] span")))for(var s=0;s<t.length;s++)null==e.INFO[t[s].dataset.id]&&makeInvisible("div[data-id="+t[s].dataset.id+"_STR]");for(var r in e){var o=e[r];if(null==o)makeInvisible("MEMORY"==r?"[data-id=MEMORY]":"[data-id^="+r+"]");else if("TOP_5"==r){var n=document.querySelector("[data-id=TOP_5]");for(var l in o){var i=document.createElement("tr");i.innerHTML="<td><span class='progress' data-progress='"+o[l]+"'></span>"+l+"</td><td>"+o[l]+"</td>",n.appendChild(i)}drawProgress()}else for(var a in o)if(null!=(t=document.querySelectorAll("[data-id="+a+"]")))for(s=0;s<t.length;s++)t[s].innerHTML=o[a]}drawThresholdBars(e);for(var d=["DRAM_BANDWIDTH","MCDRAM_BANDWIDTH"],u=0;u<d.length;u++)if(null!=e[d[u]])for(t=document.querySelectorAll("[data-id="+d[u]+"_VAL]"),s=0;s<t.length;s++)t[s].classList.remove("tooltipped"),t[s].dataset.tipId=d[u]+"_ISSUE";else if(makeInvisible("[data-id="+d[u]+"_EXTRA]"),"DRAM_BANDWIDTH"==d[u]||null!=e.INFO.HW_PLATFORM&&-1<e.INFO.HW_PLATFORM.indexOf("Knight"))for(t=document.querySelectorAll("[data-id="+d[u]+"_TITLE],[data-id="+d[u]+"_VAL]"),s=0;s<t.length;s++)t[s].style.display="";if(null==e.BANDWIDTH_AVG_OUT&&null==e.PACKRATE_AVG_OUT?makeInvisible("[data-id=FABRIC_USAGE]"):null==e.BANDWIDTH_AVG_OUT?makeInvisible("[data-id=BANDWIDTH_AVG_TITLE]"):null==e.PACKRATE_AVG_OUT&&makeInvisible("[data-id=PACKRATE_AVG_TITLE]"),null!=(t=document.querySelectorAll(".tip > div[data-id$=_ISSUE]")))for(s=0;s<t.length;s++)if(""==t[s].innerHTML||null==t[s].innerHTML){var c=document.querySelectorAll("[data-tip-id="+t[s].dataset.id+"].tooltipped");if(null!=c)for(var p=0;p<c.length;p++)c[p].classList.remove("tooltipped")}else{var h=document.querySelectorAll("[data-tip-id="+t[s].dataset.id+"]:not(.tooltipped)");if(null!=h)for(p=0;p<h.length;p++)h[p].classList.add("icon"),h[p].classList.add("flag"),h[p].classList.add("tooltipped")}if(null!=(t=document.querySelectorAll(".tip > div[data-id$=_DESCR]")))for(s=0;s<t.length;s++)if("MEM_DESCR"!=t[s].dataset.id&&(""==t[s].innerHTML||null==t[s].innerHTML)){var _=t[s].dataset.id.indexOf("_DESCR");makeInvisible("[data-id^="+t[s].dataset.id.substr(0,_)+"]")}null==e.MPI_TIME?makeInvisible("[data-id$=_INFO],[data-id=NUM_RANKS_STR],[data-id=RANK_STR],[data-id=MEMORY_DATA_BLOCK]>table,[data-id=FABRIC_USAGE]"):makeInvisible("[data-id=MEMORY_APP],[data-id=RSS_APP]"),null!=e.PCONTROL&&1==e.PCONTROL?makeInvisible("[data-id$=_DATA_BLOCK],section[data-id=IO]"):makeInvisible("[data-id$=_TEXT_BLOCK]"),null!=e.SP_FLOPS&&0==parseFloat(e.SP_FLOPS.SP_FLOPS_PERC)&&makeInvisible("[data-id=SP_FLOPS_BITS]"),null!=e.DP_FLOPS&&0==parseFloat(e.DP_FLOPS.DP_FLOPS_PERC)&&makeInvisible("[data-id=DP_FLOPS_BITS]"),initTooltips()}function isBrowserSupported(){var e,t,s={"Trident/":{version:11,offset:3},"Edge/":{version:12,offset:5},Firefox:{version:17,offset:8},Chrome:{version:40,offset:7},Safari:{version:8,offset:7}},r=(navigator.appVersion,navigator.userAgent),o=(navigator.appName,""+parseFloat(navigator.appVersion)),n=parseInt(navigator.appVersion,10);for(var l in s)if(-1!=(e=r.indexOf(l))&&("Trident/"==l&&(e=r.indexOf("rv:")),-1!=(t=(o=r.substring(e+s[l].offset)).indexOf(";"))&&(o=o.substring(0,t)),-1!=(t=o.indexOf(" "))&&(o=o.substring(0,t)),-1!=(t=o.indexOf(")"))&&(o=o.substring(0,t)),n=parseInt(""+o,10),isNaN(n)&&(o=""+parseFloat(navigator.appVersion),n=parseInt(navigator.appVersion,10)),n>=s[l].version))return!0;return!1}function renderForNotSupportedBrowsers(){var e=document.querySelectorAll("body > div:not(.not-supported-browser)");for(var t in e)e[t].parentNode&&e[t].parentNode.removeChild(e[t])}function renderForSupportedBrowsers(){var e=document.querySelector("body > div.not-supported-browser");e.parentNode&&e.parentNode.removeChild(e)}function masonryLayout(e){var t=e,a={columns:4,gap:15,padding:10,breakAt:{1100:3,850:2,625:1}};function s(){var e,n=function(){var e=document.body.clientWidth,t=a.columns;for(var s in a.breakAt)if(e<s){t=a.breakAt[s];break}return t}();if(1===n)t.style.height="auto",r(t.children,function(e,t){t.removeAttribute("style"),t.style.padding=a.padding+"px",t.style.marginBottom=a.gap+"px"});else{var l="calc("+100/(e=n)+"% - "+(2*a.padding+(e-1)*a.gap/e)+"px)",i=function(e,t){for(var s=[],r=0;r<e;r++)s.push(0);return s}(n);r(t.children,function(e,t){var s,r,o;t.style.position="absolute",t.style.width=l,t.style.left=0===(o=e%n)?"0":"calc("+100/n*o+"% + "+a.gap*o/n+"px)",t.style.top=i[e%n]+"px",t.style.padding=a.padding+"px",i[e%n]+=(s=t,r=parseInt(window.getComputedStyle(s,null).getPropertyValue("height"),10),isNaN(r)?0:r+a.gap+2*a.padding)}),t.style.height=Math.max.apply(null,i)+"px"}}function r(e,t){for(var s=0,r=0,o=e.length;s<o;s++)"none"!==window.getComputedStyle(e[s],null).getPropertyValue("display")&&(t(r,e[s]),r++)}window.addEventListener("resize",s),t.style.position="relative",s()}var sendAnalyticsVersion="APS%202019%20G",sendAnalyticsReportPage="%2F";function setupAnalytics(e){sendAnalyticsReportPage=e}function sendAnalytics(e){}function feedbackInit(){document.querySelectorAll(".feedback-panel").forEach(function(e){e.classList.remove("closed"),e.querySelector(".question").innerHTML="Like this report?"})}function feedback(e){var t=document.querySelectorAll(".feedback-panel");NodeList.prototype.forEach=Array.prototype.forEach,t.forEach(function(e){e.querySelector(".question").innerHTML="Thank you",e.classList.add("closed")}),sendAnalytics(e)}function menu(){var e,t=document.body,s=document.getElementById("topMenuClosedState"),r=document.getElementById("leftMenuClosedState");function o(){t.classList.add("menuOpened"),feedbackInit()}s.onmouseenter=r.onmouseenter=function(){e=setTimeout(o,800)},s.onmouseleave=r.onmouseleave=function(){clearTimeout(e)},s.onclick=r.onclick=o,document.getElementById("content").onmouseenter=function(){t.classList.remove("menuOpened")}}function suggestionShow(){document.body.classList.add("withSuggestions");var e=document.querySelector("a.button.suggestion");e.classList.remove("arrow-s-w"),e.classList.add("cross"),e.onclick=suggestionHide,initTooltips(),sendAnalytics("suggestionShow")}function suggestionHide(){document.body.classList.remove("withSuggestions");var e=document.querySelector("a.button.suggestion");e.classList.remove("cross"),e.classList.add("arrow-s-w"),e.onclick=suggestionShow,removeTooltipFromFlags(),sendAnalytics("suggestionHide")}function initTooltips(){for(var e=document.querySelectorAll(".tooltipped[data-tip-id]"),t=0;t<e.length;t++){var o,s,r,n=e[t];n.onmouseenter=function(){clearTimeout(o),void 0!==r&&i(r),r=this,s=setTimeout(l,200,this)},n.onmouseleave=function(){clearTimeout(s),o=setTimeout(i,800,this)}}function l(e){if(void 0!==e){var t=document.querySelector(".tip > div[data-id="+e.dataset.tipId+"]");if(null!=t){t.classList.add("visible");var s=e.getBoundingClientRect(),r=t.getBoundingClientRect();0<window.innerHeight-s.top-r.height?(t.classList.add("south"),t.style.top=s.bottom+8+"px"):(t.classList.add("north"),t.style.top=s.top-r.height-8+"px"),0<window.innerWidth-s.left-r.width?(t.classList.add("west"),t.style.left=s.left+"px"):(t.classList.add("east"),t.style.right=window.innerWidth-s.right+"px"),t.onmouseenter=function(){clearTimeout(o)},t.onmouseleave=function(){i(e)}}}}function i(e){if(void 0!==e){var t=document.querySelector(".tip > div[data-id="+e.dataset.tipId+"]");null!=t&&(t.removeAttribute("style"),t.classList.remove("visible","north","south","east","west"))}}}function removeTooltipFromFlags(){for(var e=document.querySelectorAll("[data-tip-id].icon.flag"),t=0;t<e.length;t++)e[t].onmouseenter=e[t].onmouseleave=null}function getDataFromNexus(){for(var e="",t="",s=document.location.search.slice(1).split("&"),r=0;r<s.length;r++){var o=s[r].split("=");"nexus"===o[0]&&o[1]&&(e="http://"+o[1]),"path"===o[0]&&(t=o[1])}var n=e+"/repository"+t+"/data/obj.json",l=new XMLHttpRequest;l.open("GET",n,!0),l.onload=function(e){4===l.readyState&&200==l.status&&(json=JSON.parse(l.responseText),setUpAndRender())},l.send(null)}function setUpAndRender(){isBrowserSupported()?(renderForSupportedBrowsers(),sendAnalytics("pageOpen"),menu(),render(json),masonryLayout(document.querySelector("#bricks"))):renderForNotSupportedBrowsers()}window.onload=function(){NodeList.prototype.forEach=Array.prototype.forEach,setUpAndRender()};</script></head><body class="withSuggestions"><div id="leftMenuOpenedState" class="anim"><h4>Additional Performance Analysis Tools:</h4><a href="https://software.intel.com/en-us/intel-trace-analyzer">Intel® Trace Analyzer and Collector</a><div>MPI Analyzer and Profiler</div><a href="https://software.intel.com/en-us/intel-vtune-amplifier-xe">Intel® VTune™ Amplifier</a><div>Performance Profiler</div><a href="https://software.intel.com/en-us/intel-advisor-xe">Intel® Advisor</a><div>Vectorization Optimization & Thread Prototyping</div><a href="http://www.intel.com/storage-snapshot">Storage Performance Snapshot</a><div>Visualize System Storage Bottlenecks</div><section class="feedback-panel left"><div class="question"></div><a class="button like" href="#" onclick="feedback('apsLike');">Yes</a> <a class="button dislike" href="#" onclick="feedback('apsDislike');">No</a> <a class="button icon email" href="mailto:parallel.studio.support@intel.com?Subject=MPI%20Performance%20Snapshot:%20feedback" target="_top"></a></section></div><div id="leftMenuClosedState" class="anim"><div class="icon menu"></div><div class="icon intel"></div></div><div id="topMenuOpenedState" class="anim"><h4>Additional Performance Analysis Tools:</h4><a href="https://software.intel.com/en-us/intel-trace-analyzer">Intel® Trace Analyzer and Collector</a> <span>- MPI Analyzer and Profiler</span><br><a href="https://software.intel.com/en-us/intel-vtune-amplifier-xe">Intel® VTune™ Amplifier</a> <span>- Performance Profiler</span><br><a href="https://software.intel.com/en-us/intel-advisor-xe">Intel® Advisor</a> <span>- Vectorization Optimization & Thread Prototyping</span><br><a href="http://www.intel.com/storage-snapshot">Storage Performance Snapshot</a> <span>- Visualize System Storage Bottlenecks</span><section class="feedback-panel top"><div class="question"></div><a class="button like" href="#" onclick="feedback('apsLike');">Yes</a> <a class="button dislike" href="#" onclick="feedback('apsDislike');">No</a> <a class="button icon email" href="mailto:parallel.studio.support@intel.com?Subject=MPI%20Performance%20Snapshot:%20feedback" target="_top"></a></section></div><div id="topMenuClosedState" class="anim"><span class="icon intel"></span> <span class="icon menu"></span> <sup>Intel® VTune™ Amplifier</sup> Application Performance Snapshot</div><div id="contentWrap" class="anim"><div id="content"><h1><sup>Intel® VTune™ Amplifier</sup>Application Performance Snapshot</h1><article id="suggestion"><a class="button suggestion icon cross" href="#" onclick="suggestionHide();"></a><section data-id="SUGGESTION_TEXT"></section><section class="aspect_map"><section class="aspect"><span></span> <span class="aspect_value"><span class="tooltipped" data-tip-id="current_run">Current run</span></span> <span class="aspect_target"><span class="tooltipped" data-tip-id="target">Target</span></span> <span><span class="tooltipped" data-tip-id="delta">Delta</span></span></section><section class="aspect" data-id="MPI_TIME"><span class="tooltipped" data-id="MPI_TIME_TITLE" data-tip-id="MPI_TIME_DESCR">MPI Time</span> <span class="aspect_value" data-id="MPI_TIME_PERC" data-tip-id="MPI_TIME_ISSUE"></span> <span class="aspect_target"><10%</span> <span class="aspect_bar"><span data-id="MPI_TIME_BAR"></span></span></section><section class="aspect" data-id="OMP_SERIAL_TIME"><span class="tooltipped" data-id="OMP_SERIAL_TIME_TITLE" data-tip-id="OMP_SERIAL_TIME_DESCR">Serial Time</span> <span class="aspect_value" data-id="OMP_SERIAL_TIME_PERC" data-tip-id="OMP_SERIAL_TIME_ISSUE"></span> <span class="aspect_target"><15%</span> <span class="aspect_bar"><span data-id="OMP_SERIAL_TIME_BAR"></span></span></section><section class="aspect" data-id="OMP_IMBALANCE_TIME"><span class="tooltipped" data-id="OMP_IMBALANCE_TIME_TITLE" data-tip-id="OMP_IMBALANCE_TIME_DESCR">OpenMP Imbalance</span> <span class="aspect_value" data-id="OMP_IMBALANCE_TIME_PERC" data-tip-id="OMP_IMBALANCE_TIME_ISSUE"></span> <span class="aspect_target"><10%</span> <span class="aspect_bar"><span data-id="OMP_IMBALANCE_TIME_BAR"></span></span></section><section class="aspect" data-id="CPU_USAGE"><span class="tooltipped" data-id="CPU_USAGE_TITLE" data-tip-id="CPU_USAGE_DESCR">CPU Utilization</span> <span class="aspect_value" data-id="CPU_USAGE_PERC" data-tip-id="CPU_USAGE_ISSUE"></span> <span class="aspect_target">>90%</span> <span class="aspect_bar"><span data-id="CPU_USAGE_BAR"></span></span></section><section class="aspect" data-id="PHYSICAL_CORE_USAGE"><span class="tooltipped" data-id="PHYSICAL_CORE_USAGE_TITLE" data-tip-id="PHYSICAL_CORE_USAGE_DESCR">Physical Core Utilization</span> <span class="aspect_value" data-id="PHYSICAL_CORE_USAGE_PERC" data-tip-id="PHYSICAL_CORE_USAGE_ISSUE"></span> <span class="aspect_target">>80%</span> <span class="aspect_bar"><span data-id="PHYSICAL_CORE_USAGE_BAR"></span></span></section><section class="aspect" data-id="MEMORY_BOUND"><span class="tooltipped" data-id="MEMORY_BOUND_TITLE" data-tip-id="MEMORY_BOUND_DESCR">Memory Stalls</span> <span class="aspect_value" data-id="MEMORY_BOUND_PERC" data-tip-id="MEMORY_BOUND_ISSUE"></span> <span class="aspect_target"><20%</span> <span class="aspect_bar"><span data-id="MEMORY_BOUND_BAR"></span></span></section><section class="aspect" data-id="BACK_END_BOUND"><span class="tooltipped" data-id="BACK_END_BOUND_TITLE" data-tip-id="BACK_END_BOUND_DESCR">Back-End Stalls</span> <span class="aspect_value" data-id="BACK_END_BOUND_PERC" data-tip-id="BACK_END_BOUND_ISSUE"></span> <span class="aspect_target"><20%</span> <span class="aspect_bar"><span data-id="BACK_END_BOUND_BAR"></span></span></section><section class="aspect" data-id="VECTORIZATION"><span class="tooltipped" data-id="VECTORIZATION_TITLE" data-tip-id="VECTORIZATION_DESCR">Vectorization</span> <span class="aspect_value" data-id="VECTORIZATION_PERC" data-tip-id="VECTORIZATION_ISSUE"></span> <span class="aspect_target">>70%</span> <span class="aspect_bar"><span data-id="VECTORIZATION_BAR"></span></span></section><section class="aspect" data-id="SIMD_PER_CYCLE"><span class="tooltipped" data-id="SIMD_PER_CYCLE_TITLE" data-tip-id="SIMD_PER_CYCLE_DESCR">SIMD Instr. per Cycle</span> <span class="aspect_value" data-id="SIMD_PER_CYCLE_VAL" data-tip-id="SIMD_PER_CYCLE_ISSUE"></span> <span class="aspect_target">>1</span> <span class="aspect_bar"><span data-id="SIMD_PER_CYCLE_BAR"></span></span></section><section class="aspect" data-id="IO"><span class="tooltipped" data-id="IO_TITLE" data-tip-id="IOWAIT_USEC_DESCR" class="tooltipped">I/O Bound</span> <span class="aspect_value" data-id="IOWAIT_USEC_PERC"></span> <span class="aspect_target"><10%</span> <span class="aspect_bar"><span data-id="IO_BAR"></span></span></section></section></article><article data-id="INFO"><div data-id="APPLICATION_NAME_STR">Application: <span data-id="APPLICATION_NAME"></span></div><div data-id="CREATION_DATETIME_STR">Report creation date: <span data-id="CREATION_DATETIME"></span></div><div data-id="RANK_STR">Rank: <span data-id="RANK"></span></div><div data-id="NUM_RANKS_STR">Number of ranks: <span data-id="NUM_RANKS"></span></div><div data-id="RANKS_PER_NODE_STR">Ranks per node: <span data-id="RANKS_PER_NODE"></span></div><div data-id="NUM_OMP_THREADS_STR">OpenMP threads<span data-id="POSTFIX"></span>: <span data-id="NUM_OMP_THREADS"></span></div><div data-id="HW_PLATFORM_STR">HW Platform: <span data-id="HW_PLATFORM"></span></div><div data-id="FREQUENCY_STR">Frequency: <span data-id="FREQUENCY"></span></div><div data-id="NUM_LOGIC_CORE_STR">Logical Core Count per node: <span data-id="NUM_LOGIC_CORE"></span></div><div data-id="COLLECTOR_TYPE_STR">Collector type: <span data-id="COLLECTOR_TYPE"></span></div></article><article data-id="WALLCLOCK" class="top-metric"><span data-id="WALLCLOCK_VAL"></span> <span class="tooltipped" data-tip-id="WALLCLOCK_DESCR">Elapsed Time</span></article><article data-id="CPI_RATE" class="top-metric"><span data-id="CPI_RATE_VAL" data-tip-id="CPI_RATE_ISSUE"></span> <span data-tip-id="CPI_RATE_DESCR" class="tooltipped">CPI</span><br><span data-id="CPI_INFO">(<span class="tooltipped" data-tip-id="CPI_RATE_MAX_NODE">MAX</span> <span data-id="CPI_RATE_MAX_VAL"></span>, <span class="tooltipped" data-tip-id="CPI_RATE_MIN_NODE">MIN</span> <span data-id="CPI_RATE_MIN_VAL"></span>)</span></article><article data-id="SPGFLOPS" class="aux-metric"><span data-id="SPGFLOPS_TOTAL_VAL"></span> <span data-tip-id="SPGFLOPS_DESCR" class="tooltipped">Single Precision</span></article><article data-id="DPGFLOPS" class="aux-metric"><span data-id="DPGFLOPS_TOTAL_VAL"></span> <span data-tip-id="DPGFLOPS_DESCR" class="tooltipped">Double Precision</span></article><article data-id="X87GFLOPS" class="aux-metric"><span data-id="X87GFLOPS_TOTAL_VAL"></span> <span data-tip-id="X87GFLOPS_DESCR" class="tooltipped">x87</span></article><article data-id="AVG_FREQUENCY" class="aux-metric"><span data-id="AVG_FREQUENCY_VAL"></span> <span data-tip-id="AVG_FREQUENCY_DESCR" class="tooltipped">Average CPU Frequency</span></article><div id="bricks"><article data-id="MPI_TIME"><h3 data-id="MPI_TIME_TITLE" data-tip-id="MPI_TIME_DESCR" class="tooltipped">MPI Time</h3><span data-id="MPI_TIME_VAL"></span><br><span data-id="MPI_TIME_PERC" data-tip-id="MPI_TIME_ISSUE"></span> of Elapsed Time<br><div class="indent"><h4 data-id="MPI_IMBALANCE_TITLE" data-tip-id="MPI_IMBALANCE_DESCR" class="tooltipped">MPI Imbalance</h4><span data-id="MPI_IMBALANCE_VAL"></span><br><span data-id="MPI_IMBALANCE_PERC" data-tip-id="MPI_IMBALANCE_ISSUE"></span> of Elapsed Time<br></div><table data-id="TOP_5"><tr><th>TOP 5 MPI Functions</th><th><span class="tooltipped" data-tip-id="MPI_PERC_DESCR">%</span></th></tr></table><div data-id="FABRIC_USAGE"><h4>Intel Omni-Path Fabric Usage</h4><table data-id="BANDWIDTH_AVG_TITLE"><tr><th>Interconnect Bandwidth</th><th>AVG, <span data-id="BANDWIDTH_AVG_IN_UNIT"></span></th></tr><tr><td><span data-tip-id="BANDWIDTH_AVG_OUT_DESCR" class="tooltipped">Outgoing:</span></td><td class="cell"><span data-id="BANDWIDTH_AVG_OUT_VAL" data-tip-id="BANDWIDTH_AVG_OUT_ISSUE"></span></td></tr><tr><td><span data-tip-id="BANDWIDTH_AVG_IN_DESCR" class="tooltipped">Incoming:</span></td><td class="cell"><span data-id="BANDWIDTH_AVG_IN_VAL" data-tip-id="BANDWIDTH_AVG_IN_ISSUE"></span></td></tr></table><table data-id="PACKRATE_AVG_TITLE"><tr><th>Interconnect Packet Rate</th><th>AVG, <span data-id="PACKRATE_AVG_OUT_UNIT"></span></th></tr><tr><td><span data-tip-id="PACKRATE_AVG_OUT_DESCR" class="tooltipped">Outgoing:</span></td><td class="cell"><span data-id="PACKRATE_AVG_OUT_VAL" data-tip-id="PACKRATE_AVG_OUT_ISSUE"></span></td></tr><tr><td><span data-tip-id="PACKRATE_AVG_IN_DESCR" class="tooltipped">Incoming:</span></td><td class="cell"><span data-id="PACKRATE_AVG_IN_VAL" data-tip-id="PACKRATE_AVG_IN_ISSUE"></span></td></tr></table></div></article><article data-id="OMP_SERIAL_TIME"><h3 data-id="OMP_SERIAL_TIME_TITLE" data-tip-id="OMP_SERIAL_TIME_DESCR" class="tooltipped">Serial Time</h3><span data-id="OMP_SERIAL_TIME_VAL"></span><br><span data-id="OMP_SERIAL_TIME_PERC" data-tip-id="OMP_SERIAL_TIME_ISSUE"></span> of Elapsed Time<br></article><article data-id="OMP_IMBALANCE_TIME"><h3 data-id="OMP_IMBALANCE_TIME_TITLE" data-tip-id="OMP_IMBALANCE_TIME_DESCR" class="tooltipped">OpenMP Imbalance</h3><span data-id="OMP_IMBALANCE_TIME_VAL"></span><br><span data-id="OMP_IMBALANCE_TIME_PERC" data-tip-id="OMP_IMBALANCE_TIME_ISSUE"></span> of Elapsed Time<br></article><article data-id="CPU_USAGE"><h3 data-id="CPU_USAGE_TITLE" data-tip-id="CPU_USAGE_DESCR" class="tooltipped">CPU Utilization</h3><span data-id="CPU_USAGE_PERC" data-tip-id="CPU_USAGE_ISSUE"></span><div class="indent"><h4 data-id="AVERAGE_CPU_USE_DESCR" class="tooltipped">Average CPU Utilization</h4><span data-id="AVERAGE_CPU_USE_VAL"></span> Out of <span data-id="CPU_OUT_OF_VAL"></span> logical CPUs</div></article><article data-id="PHYSICAL_CORE_USAGE"><h3 data-id="PHYSICAL_CORE_USAGE_TITLE" data-tip-id="PHYSICAL_CORE_USAGE_DESCR" class="tooltipped">Physical Core Utilization</h3><span data-id="PHYSICAL_CORE_USAGE_PERC" data-tip-id="PHYSICAL_CORE_USAGE_ISSUE"></span><div class="indent"><h4 data-id="PH_CORE_USE_DESCR" class="tooltipped">Average Physical Core Utilization</h4><span data-id="PH_CORE_USE_VAL"></span> out of <span data-id="PH_CORE_OUT_OF_VAL"></span> physical cores</div></article><article data-id="MEMORY_BOUND"><h3 data-id="MEMORY_BOUND_TITLE" data-tip-id="MEMORY_BOUND_DESCR" class="tooltipped">Memory Stalls</h3><span data-id="MEMORY_BOUND_PERC" data-tip-id="MEMORY_BOUND_ISSUE"></span> of pipeline slots<div class="indent"><h4 data-id="CACHE_BOUND_TITLE" data-tip-id="CACHE_BOUND_DESCR" class="tooltipped">Cache Stalls</h4><span data-id="CACHE_BOUND_PERC" data-tip-id="CACHE_BOUND_ISSUE"></span><span data-id="CACHE_BOUND_EXTRA"> of cycles</span><h4 data-id="DRAM_BOUND_TITLE" data-tip-id="DRAM_BOUND_DESCR" class="tooltipped">DRAM Stalls</h4><span data-id="DRAM_BOUND_PERC" data-tip-id="DRAM_BOUND_ISSUE"></span> of cycles<h4 data-id="DRAM_BANDWIDTH_TITLE" data-tip-id="DRAM_BANDWIDTH_DESCR" class="tooltipped">DRAM Bandwidth</h4><span data-id="DRAM_BANDWIDTH_EXTRA">AVG </span><span data-id="DRAM_BANDWIDTH_VAL" data-tip-id="DRAM_BANDWIDTH_VAL_DESCR" class="tooltipped">Not Available</span><span data-id="DRAM_BANDWIDTH_UNIT"></span><h4 data-id="PERS_MEM_BOUND_TITLE" data-tip-id="PERS_MEM_BOUND_DESCR" class="tooltipped">Persistent Memory Stalls</h4><span data-id="PERS_MEM_BOUND_PERC" data-tip-id="PERS_MEM_BOUND_ISSUE"></span><h4 data-id="PERS_MEM_BANDWIDTH_TITLE" data-tip-id="PERS_MEM_BANDWIDTH_DESCR" class="tooltipped">Persistent Memory Bandwidth</h4><span data-id="PERS_MEM_BANDWIDTH_EXTRA">AVG </span><span data-id="PERS_MEM_BANDWIDTH_VAL" data-tip-id="PERS_MEM_BANDWIDTH_ISSUE"></span><span data-id="PERS_MEM_BANDWIDTH_UNIT"></span><h4 data-id="DRAM_HIT_RATIO_TITLE">DRAM Cache Hit Ratio</h4><span data-id="DRAM_HIT_RATIO_PERC"></span><div class="indent"><h4 data-id="DRAM_CACHE_HIT_TITLE">DRAM Cache Hits</h4><span data-id="DRAM_CACHE_HIT_VAL"></span><h4 data-id="DRAM_CACHE_MISS_TITLE">DRAM Cache Misses</h4><span data-id="DRAM_CACHE_MISS_VAL"></span></div><h4 data-id="MCDRAM_BANDWIDTH_TITLE" data-tip-id="MCDRAM_BANDWIDTH_DESCR" class="tooltipped">MCDRAM Bandwidth</h4><span data-id="MCDRAM_BANDWIDTH_EXTRA">AVG </span><span data-id="MCDRAM_BANDWIDTH_VAL" data-tip-id="MCDRAM_BANDWIDTH_VAL_DESCR" class="tooltipped">Not Available</span><span data-id="MCDRAM_BANDWIDTH_UNIT"></span><h4 data-id="NUMA_USE_TITLE" data-tip-id="NUMA_USE_DESCR" class="tooltipped">NUMA</h4><span data-id="NUMA_USE_PERC" data-tip-id="NUMA_USE_ISSUE"></span><span data-id="NUMA_USE_EXTRA"> of remote accesses</span></div></article><article data-id="BACK_END_BOUND"><h3 data-id="BACK_END_BOUND_TITLE" data-tip-id="BACK_END_BOUND_DESCR" class="tooltipped">Back-End Stalls</h3><span data-id="BACK_END_BOUND_PERC" data-tip-id="BACK_END_BOUND_ISSUE"></span> of pipeline slots<div class="indent"><h4 data-tip-id="L2_HIT_BOUND_DESCR" class="tooltipped">L2 Hit Bound</h4><span data-id="L2_HIT_BOUND_PERC" data-tip-id="L2_HIT_BOUND_ISSUE"></span> of cycles<h4 data-tip-id="L2_MISS_BOUND_DESCR" class="tooltipped">L2 Miss Bound</h4><span data-id="L2_MISS_BOUND_PERC" data-tip-id="L2_MISS_BOUND_ISSUE"></span> of cycles<h4 data-id="DRAM_BANDWIDTH_TITLE" data-tip-id="DRAM_BANDWIDTH_DESCR" class="tooltipped">DRAM Bandwidth</h4><span data-id="DRAM_BANDWIDTH_EXTRA">AVG </span><span data-id="DRAM_BANDWIDTH_VAL" data-tip-id="DRAM_BANDWIDTH_VAL_DESCR" class="tooltipped">Not Available</span><span data-id="DRAM_BANDWIDTH_UNIT"></span><h4 data-id="MCDRAM_BANDWIDTH_TITLE" data-tip-id="MCDRAM_BANDWIDTH_DESCR" class="tooltipped">MCDRAM Bandwidth</h4><span data-id="MCDRAM_BANDWIDTH_EXTRA">AVG </span><span data-id="MCDRAM_BANDWIDTH_VAL" data-tip-id="MCDRAM_BANDWIDTH_VAL_DESCR" class="tooltipped">Not Available</span><span data-id="MCDRAM_BANDWIDTH_UNIT"></span></div></article><article data-id="VECTORIZATION"><h3 data-id="VECTORIZATION_TITLE" data-tip-id="VECTORIZATION_DESCR" class="tooltipped">Vectorization</h3><span data-id="VECTORIZATION_PERC" data-tip-id="VECTORIZATION_ISSUE"></span><span data-id="VECTORIZATION_TXT">of Packed FP Operations</span><div class="indent"><h4 data-id="VECTORIZATION_DATA">Instruction Mix:</h4><h4 data-id="SP_FLOPS" data-tip-id="SP_FLOPS_DESCR" class="tooltipped">SP FLOPs</h4><span data-id="SP_FLOPS_DATA"><span data-id="SP_FLOPS_PERC" data-tip-id="SP_FLOPS_ISSUE"></span><span> of uOps</span></span><br><span data-id="SP_PACKED"><span data-tip-id="SP_PACKED_DESCR" class="tooltipped">Packed</span>: <span data-id="SP_PACKED_PERC"></span><span> from SP FP</span></span><div data-id="SP_FLOPS_BITS" class="indent"><span data-id="SP_128_BIT"><span data-tip-id="SP_128_BIT_DESCR" class="tooltipped">128-bit</span>: <span data-id="SP_128_BIT_PERC" data-tip-id="SP_128_BIT_ISSUE"></span></span><br><span data-id="SP_256_BIT"><span data-tip-id="SP_256_BIT_DESCR" class="tooltipped">256-bit</span>: <span data-id="SP_256_BIT_PERC" data-tip-id="SP_256_BIT_ISSUE"></span></span><br><span data-id="SP_512_BIT"><span data-tip-id="SP_512_BIT_DESCR" class="tooltipped">512-bit</span>: <span data-id="SP_512_BIT_PERC"></span></span></div><span data-id="SP_SCALAR"><span data-tip-id="SP_SCALAR_DESCR" class="tooltipped">Scalar</span>: <span data-id="SP_SCALAR_PERC" data-tip-id="SP_SCALAR_ISSUE"></span><span> from SP FP</span></span><h4 data-id="DP_FLOPS" data-tip-id="DP_FLOPS_DESCR" class="tooltipped">DP FLOPs</h4><span data-id="DP_FLOPS_DATA"><span data-id="DP_FLOPS_PERC" data-tip-id="DP_FLOPS_ISSUE"></span><span> of uOps</span></span><br><span data-id="DP_PACKED"><span data-tip-id="DP_PACKED_DESCR" class="tooltipped">Packed</span>: <span data-id="DP_PACKED_PERC"></span><span> from DP FP</span></span><div data-id="DP_FLOPS_BITS" class="indent"><span data-id="DP_128_BIT"><span data-tip-id="DP_128_BIT_DESCR" class="tooltipped">128-bit</span>: <span data-id="DP_128_BIT_PERC" data-tip-id="DP_128_BIT_ISSUE"></span></span><br><span data-id="DP_256_BIT"><span data-tip-id="DP_256_BIT_DESCR" class="tooltipped">256-bit</span>: <span data-id="DP_256_BIT_PERC" data-tip-id="DP_256_BIT_ISSUE"></span></span><br><span data-id="DP_512_BIT"><span data-tip-id="DP_512_BIT_DESCR" class="tooltipped">512-bit</span>: <span data-id="DP_512_BIT_PERC"></span></span></div><span data-id="DP_SCALAR"><span data-tip-id="DP_SCALAR_DESCR" class="tooltipped">Scalar</span>: <span data-id="DP_SCALAR_PERC" data-tip-id="DP_SCALAR_ISSUE"></span><span> from DP FP</span></span><h4 data-id="X87" data-tip-id="X87_DESCR" class="tooltipped">x87 FLOPs</h4><span data-id="X87_DATA"><span data-id="X87_PERC" data-tip-id="X87_ISSUE"></span><span> of uOps</span></span><h4 data-id="NON_FP" data-tip-id="NON_FP_DESCR" class="tooltipped">Non-FP</h4><span data-id="NON_FP_DATA"><span data-id="NON_FP_PERC" data-tip-id="NON_FP_ISSUE"></span><span> of uOps</span></span><h4 data-id="FP_ARITH_MEM_RD" data-tip-id="FP_ARITH_MEM_RD_DESCR" class="tooltipped">FP Arith/Mem Rd Instr. Ratio</h4><span data-id="FP_ARITH_MEM_RD_VAL" data-tip-id="FP_ARITH_MEM_RD_ISSUE"></span><h4 data-id="FP_ARITH_MEM_WR" data-tip-id="FP_ARITH_MEM_WR_DESCR" class="tooltipped">FP Arith/Mem Wr Instr. Ratio</h4><span data-id="FP_ARITH_MEM_WR_VAL" data-tip-id="FP_ARITH_MEM_WR_ISSUE"></span></div></article><article data-id="SIMD_PER_CYCLE"><h3 data-id="SIMD_PER_CYCLE_TITLE" data-tip-id="SIMD_PER_CYCLE_DESCR" class="tooltipped">SIMD Instr. per Cycle</h3><span data-id="SIMD_PER_CYCLE_VAL" data-tip-id="SIMD_PER_CYCLE_ISSUE"></span><div class="indent"><h4>FP Instruction Mix</h4><div class="indent">% of <span data-tip-id="PACKED_SIMD_DESCR" class="tooltipped">Packed SIMD Instr.</span>: <span data-id="PACKED_SIMD_PERC"></span><br>% of <span data-tip-id="SCALAR_SIMD_DESCR" class="tooltipped">Scalar SIMD Instr.</span>: <span data-id="SCALAR_SIMD_PERC" data-tip-id="SCALAR_SIMD_ISSUE"></span></div></div></article><article data-id="IO"><h3 data-id="IO_TITLE" data-tip-id="IOWAIT_USEC_DESCR" class="tooltipped">I/O Bound</h3><div data-id="IO_TEXT_BLOCK">These metrics are not available for Pcontrol.</div><div data-id="IO_DATA_BLOCK"><span data-id="IOWAIT_USEC_PERC"></span><br>(AVG <span data-id="IOWAIT_USEC_AVG_VAL"></span>, <a class="tooltipped" data-tip-id="IOWAIT_USEC_MAX_RANK">PEAK</a> <span data-id="IOWAIT_USEC_MAX_VAL"></span>)<div class="indent"><h4 data-tip-id="READ_BYTES_DESCR" class="tooltipped">Read</h4>AVG <span data-id="READ_BYTES_AVG_VAL"></span>, <a class="tooltipped" data-tip-id="READ_BYTES_MAX_RANK">MAX</a> <span data-id="READ_BYTES_MAX_VAL"></span><h4 data-tip-id="WRITTEN_BYTES_DESCR" class="tooltipped">Write</h4>AVG <span data-id="WRITTEN_BYTES_AVG_VAL"></span>, <a class="tooltipped" data-tip-id="WRITTEN_BYTES_MAX_RANK">MAX</a> <span data-id="WRITTEN_BYTES_MAX_VAL"></span></div></div></article><article data-id="MEMORY"><h3 data-tip-id="MEM_DESCR" class="tooltipped">Memory Footprint</h3><div data-id="MEMORY_TEXT_BLOCK">These metrics are not available for Pcontrol.</div><div data-id="MEMORY_DATA_BLOCK"><div data-id="RSS_APP">Resident total: <span data-id="RSS_VAL"></span></div><table><tr><th>Resident</th><th>PEAK</th><th>AVG</th></tr><tr><td>Per node:</td><td><span data-id="PEAK_RSS_PER_NODE_VAL" class="tooltipped" data-tip-id="PEAK_RSS_NODE"></span></td><td><span data-id="MEAN_RSS_PER_NODE_VAL"></span></td></tr><tr><td>Per rank:</td><td class="cell"><span data-id="PEAK_RSS_VAL" class="tooltipped" data-tip-id="PEAK_RSS_RANK"></span></td><td class="cell"><span data-id="MEAN_RSS_VAL"></span></td></tr></table><div data-id="MEMORY_APP">Virtual total: <span data-id="MEM_VAL"></span></div><table><tr><th>Virtual</th><th>PEAK</th><th>AVG</th></tr><tr><td>Per node:</td><td><span data-id="PEAK_MEM_PER_NODE_VAL" class="tooltipped" data-tip-id="PEAK_MEM_NODE"></span></td><td><span data-id="MEAN_MEM_PER_NODE_VAL"></span></td></tr><tr><td>Per rank:</td><td class="cell"><span data-id="PEAK_MEM_VAL" class="tooltipped" data-tip-id="PEAK_MEM_RANK"></span></td><td class="cell"><span data-id="MEAN_MEM_VAL"></span></td></tr></table></div></article></div></div></div><div class="tip"><div data-id="current_run">Metric <b>value</b> collected during the application profiling run.</div><div data-id="target">Metric threshold used to indicate possible performance issues. Threshold values are fixed and may not accurately reflect the nature of your application.</div><div data-id="delta">Visual representation of the current run value compared to the target threshold. The Delta is set to zero if the current run value is within the target threshold.</div><div data-id="WALLCLOCK_DESCR"></div><div data-id="SPGFLOPS_DESCR"></div><div data-id="DPGFLOPS_DESCR"></div><div data-id="X87GFLOPS_DESCR"></div><div data-id="AVG_FREQUENCY_DESCR"></div><div data-id="CPI_RATE_MIN_NODE"></div><div data-id="CPI_RATE_MAX_NODE"></div><div data-id="CPI_RATE_DESCR"></div><div data-id="CPI_RATE_ISSUE"></div><div data-id="MEM_DESCR"></div><div data-id="RSS_DESCR"></div><div data-id="MEMORY_BOUND_MIN_NODE"></div><div data-id="MEMORY_BOUND_MAX_NODE"></div><div data-id="READ_BYTES_MAX_RANK"></div><div data-id="WRITTEN_BYTES_MAX_RANK"></div><div data-id="IOWAIT_USEC_MAX_RANK"></div><div data-id="IOWAIT_USEC_DESCR"></div><div data-id="PEAK_MEM_RANK"></div><div data-id="PEAK_MEM_NODE"></div><div data-id="PEAK_RSS_RANK"></div><div data-id="PEAK_RSS_NODE"></div><div data-id="READ_BYTES_DESCR"></div><div data-id="WRITTEN_BYTES_DESCR"></div><div data-id="MPI_TIME_DESCR"></div><div data-id="MPI_TIME_ISSUE"></div><div data-id="MPI_IMBALANCE_DESCR"></div><div data-id="MPI_IMBALANCE_ISSUE"></div><div data-id="OMP_IMBALANCE_TIME_ISSUE"></div><div data-id="OMP_IMBALANCE_TIME_DESCR"></div><div data-id="OMP_SERIAL_TIME_ISSUE"></div><div data-id="OMP_SERIAL_TIME_DESCR"></div><div data-id="MEMORY_BOUND_DESCR"></div><div data-id="MEMORY_BOUND_ISSUE"></div><div data-id="VECTORIZATION_DESCR"></div><div data-id="VECTORIZATION_ISSUE"></div><div data-id="FLOPS_PER_CYCLE_DESCR"></div><div data-id="FLOPS_PER_CYCLE_ISSUE"></div><div data-id="X87_DESCR"></div><div data-id="X87_ISSUE"></div><div data-id="NON_FP_DESCR"></div><div data-id="NON_FP_ISSUE"></div><div data-id="DP_FLOPS_DESCR"></div><div data-id="DP_PACKED_DESCR"></div><div data-id="DP_128_BIT_DESCR"></div><div data-id="DP_128_BIT_ISSUE"></div><div data-id="DP_256_BIT_DESCR"></div><div data-id="DP_256_BIT_ISSUE"></div><div data-id="DP_512_BIT_DESCR"></div><div data-id="DP_SCALAR_DESCR"></div><div data-id="DP_SCALAR_ISSUE"></div><div data-id="SP_FLOPS_DESCR"></div><div data-id="SP_PACKED_DESCR"></div><div data-id="SP_128_BIT_DESCR"></div><div data-id="SP_128_BIT_ISSUE"></div><div data-id="SP_256_BIT_DESCR"></div><div data-id="SP_256_BIT_ISSUE"></div><div data-id="SP_512_BIT_DESCR"></div><div data-id="SP_SCALAR_DESCR"></div><div data-id="SP_SCALAR_ISSUE"></div><div data-id="FP_ARITH_MEM_RD_DESCR"></div><div data-id="FP_ARITH_MEM_RD_ISSUE"></div><div data-id="FP_ARITH_MEM_WR_DESCR"></div><div data-id="FP_ARITH_MEM_WR_ISSUE"></div><div data-id="CACHE_BOUND_DESCR"></div><div data-id="CACHE_BOUND_ISSUE"></div><div data-id="DRAM_BOUND_DESCR"></div><div data-id="DRAM_BOUND_ISSUE"></div><div data-id="PERS_MEM_BOUND_DESCR"></div><div data-id="PERS_MEM_BOUND_ISSUE"></div><div data-id="NUMA_USE_DESCR"></div><div data-id="NUMA_USE_ISSUE"></div><div data-id="CPU_USAGE_DESCR"></div><div data-id="CPU_USAGE_ISSUE"></div><div data-id="PHYSICAL_CORE_USAGE_DESCR"></div><div data-id="PHYSICAL_CORE_USAGE_ISSUE"></div><div data-id="AVERAGE_CPU_USE_DESCR"></div><div data-id="BACK_END_BOUND_DESCR"></div><div data-id="BACK_END_BOUND_ISSUE"></div><div data-id="L2_HIT_BOUND_DESCR"></div><div data-id="L2_MISS_BOUND_DESCR"></div><div data-id="L2_HIT_BOUND_ISSUE"></div><div data-id="L2_MISS_BOUND_ISSUE"></div><div data-id="SIMD_PER_CYCLE_DESCR"></div><div data-id="SIMD_PER_CYCLE_ISSUE"></div><div data-id="PACKED_SIMD_DESCR"></div><div data-id="SCALAR_SIMD_DESCR"></div><div data-id="SCALAR_SIMD_ISSUE"></div><div data-id="FLOPS_PER_CYCLE_ISSUE"></div><div data-id="DRAM_BANDWIDTH_DESCR">Average amount of data transferred through DRAM memory controller per second.</div><div data-id="MCDRAM_BANDWIDTH_DESCR">Average amount of data transferred through MCDRAM memory controller per second.</div><div data-id="DRAM_BANDWIDTH_VAL_DESCR">Data for this metric is not collected since it requires system-wide performance monitoring. Make sure the sampling driver is properly installed on your system: <a href="https://software.intel.com/en-us/vtune-amplifier-help-sep-driver">https://software.intel.com/en-us/vtune-amplifier-help-sep-driver</a>. Otherwise, enable a driverless Perf-based sampling collection by setting the /proc/sys/kernel/perf_even_paranoid value to 0 or less.</div><div data-id="MCDRAM_BANDWIDTH_VAL_DESCR">Data for this metric is not collected since it requires system-wide performance monitoring. Make sure the sampling driver is properly installed on your system: <a href="https://software.intel.com/en-us/vtune-amplifier-help-sep-driver">https://software.intel.com/en-us/vtune-amplifier-help-sep-driver</a>. Otherwise, enable a driverless Perf-based sampling collection by setting the /proc/sys/kernel/perf_even_paranoid value to 0 or less.</div><div data-id="PERS_MEM_BANDWIDTH_DESCR"></div><div data-id="MCDRAM_BANDWIDTH_ISSUE"></div><div data-id="DRAM_BANDWIDTH_ISSUE"></div><div data-id="PERS_MEM_BANDWIDTH_ISSUE"></div><div data-id="BANDWIDTH_AVG_OUT_DESCR"></div><div data-id="BANDWIDTH_AVG_IN_DESCR"></div><div data-id="BANDWIDTH_AVG_OUT_ISSUE"></div><div data-id="BANDWIDTH_AVG_IN_ISSUE"></div><div data-id="PACKRATE_AVG_OUT_DESCR"></div><div data-id="PACKRATE_AVG_IN_DESCR"></div><div data-id="PACKRATE_AVG_OUT_ISSUE"></div><div data-id="PACKRATE_AVG_IN_ISSUE"></div><div data-id="MPI_PERC_DESCR">Percentage from Elapsed Time</div></div><div class="not-supported-browser">Intel<sup>®</sup> MPI Performance Snapshot report cannot be opened with the current browser. Use any of these supported browsers:<ul><li>Mozilla Firefox* version 17 or later</li><li>Safari* version 8 or later</li><li>Microsoft Internet Explorer* version 11 or later</li><li>Microsoft Edge* version 12 or later</li><li>Google Chrome* version 40 or later</li></ul></div></body></html> \ No newline at end of file +<!DOCTYPE html><html lang="en"><head><title>APS report</title><link rel="icon" href="favicon.ico" type="image/x-icon"><meta name="nexus-supported-models" content="mps_plugin.Result"><link rel="nexus-home" href="?nexus={nexus}"><link rel="nexus-app" href="?nexus={nexus}&path={properties.path}"><meta name="description" content="APS viewer application"><style type="text/css">body,html{height:100%}body{margin:0;font-family:'Segoe UI',Tahoma,Geneva,Verdana,sans-serif;font-size:15px;color:#000;background-color:#f7f7f7;overflow:hidden}@media (min-width:1320px){#content{width:1190px;font-size:16.5px}}@media (min-width:1540px){#content{width:1410px;font-size:18px}}@media (min-width:1760px){#content{width:1630px;font-size:19.5px}}.ablate{display:none}#leftMenuOpenedState,#topMenuOpenedState{background-color:#00aeef;padding:.5em;color:#fff}#leftMenuOpenedState>h4,#topMenuOpenedState>h4{font-weight:700;margin-bottom:.8em}#leftMenuOpenedState>div,#leftMenuOpenedState>span,#topMenuOpenedState>div,#topMenuOpenedState>span{opacity:.75;padding-bottom:.8em;font-style:italic}#leftMenuOpenedState>a,#leftMenuOpenedState>a:visited,#topMenuOpenedState>a,#topMenuOpenedState>a:visited{font-weight:400;color:#fff;opacity:.9}#leftMenuOpenedState>a:hover,#topMenuOpenedState>a:hover{opacity:1}#topMenuOpenedState>a{padding-top:.6em;padding-left:4em}#leftMenuClosedState,#topMenuClosedState{background-color:#0071c5}#leftMenuClosedState:hover,#topMenuClosedState:hover{cursor:pointer;background-color:#00aeef}#topMenuClosedState{font-size:22.5px;line-height:60px;color:#fff;text-overflow:ellipsis;white-space:nowrap;overflow:hidden}#topMenuClosedState sup{display:none;font-size:.4em;font-weight:400;line-height:.8em}#topMenuClosedState span{padding-left:13px;padding-right:13px}#topMenuClosedState span.icon.menu:after{vertical-align:middle}#topMenuClosedState span.icon.intel{line-height:0;float:right;font-size:2em;padding-top:13px}@media (min-width:625px){#topMenuClosedState sup{display:inline}}#leftMenuClosedState div.icon{font-size:2.5em;width:1em;margin:15px auto}#leftMenuClosedState div.icon.intel{font-size:4em;position:absolute;margin:0;bottom:0;left:15px}#leftMenuClosedState,#leftMenuOpenedState{z-index:5;position:fixed}#topMenuClosedState,#topMenuOpenedState{z-index:5}#leftMenuClosedState,#leftMenuOpenedState,.feedback-panel.closed a.button{display:none}#topMenuOpenedState{display:block;position:fixed;left:0;right:0;top:-200px;bottom:0;width:100%;height:200px}#topMenuClosedState{display:block;position:absolute;left:0;right:0;top:0;height:60px}#contentWrap{position:fixed;overflow-y:auto;overflow-x:hidden;left:0;right:0;top:0;bottom:0;padding:20px;margin-top:60px}.menuOpened #topMenuOpenedState{top:0}.menuOpened #topMenuClosedState{top:-60px}.menuOpened #contentWrap{opacity:.35;margin-top:200px}@media (min-width:850px){#topMenuClosedState,#topMenuOpenedState{display:none}#leftMenuClosedState,#leftMenuOpenedState{display:block;left:-180px;top:0;bottom:0;width:180px}#leftMenuClosedState{left:0;width:90px}#contentWrap{left:0;top:0;padding-left:110px;margin-top:0}.menuOpened #leftMenuOpenedState{left:0}.menuOpened #leftMenuClosedState{left:-90px}.menuOpened #contentWrap{left:135px;margin-top:0}}.anim{-webkit-transition:.5s all cubic-bezier(.8,0,.2,1);-moz-transition:.5s all cubic-bezier(.8,0,.2,1);-o-transition:.5s all cubic-bezier(.8,0,.2,1);transition:.5s all cubic-bezier(.8,0,.2,1)}@media print{#leftMenuClosedState,#leftMenuOpenedState,#topMenuClosedState,#topMenuOpenedState{display:none}#contentWrap{position:relative}}#content{margin:0 auto;width:100%;max-height:0;overflow:hidden}#content:after{content:"Screen is too small";position:absolute;top:133.33333333px;left:0;width:100%;margin:auto;text-align:center;opacity:.5}@media (min-width:400px){#content{max-height:inherit}#content:after{content:""}}#bricks{clear:both}#bricks>article{background-color:#fff}.not-supported-browser{color:#000;background-color:#fff;padding:30px;height:100%}.feedback-panel{position:absolute;color:#fff;background-color:#0071c5;font-weight:400;padding:10px;width:200px;height:70px;transition:left ease-in-out .5s 2s,top ease-in-out .5s 2s}.feedback-panel:after{content:"";position:absolute;width:0;height:0}.feedback-panel.left{left:0;bottom:30px}.feedback-panel.left:after{left:220px;top:0;border:45px solid transparent;border-left-width:20px;border-left-color:#0071c5;border-right:0}.feedback-panel.top{right:30px;top:140px}.feedback-panel.top:after{left:100px;top:90px;border:60px solid transparent;border-top-width:20px;border-top-color:#0071c5;border-bottom-width:0}.feedback-panel.closed.left{left:-240px}.feedback-panel.closed.top{top:-110px}.feedback-panel .question{margin-bottom:.8em}.feedback-panel a.button{display:inline-block;color:#fff;padding:5px 10px;margin-bottom:5px;text-decoration:none;background-color:rgba(255,255,255,.2);border-radius:5px}.feedback-panel a.button:hover{background-color:rgba(255,255,255,.3)}.icon:after{display:inline-block;width:1em;height:1em;line-height:1em;vertical-align:bottom}.icon.menu:after{content:url(data:image/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20viewBox%3D%270%200%2020%2020%27%20fill%3D%27%23FFF%27%3E%3Crect%20x%3D%273%27%20y%3D%273%27%20width%3D%2714%27%20height%3D%273%27%2F%3E%3Crect%20x%3D%273%27%20y%3D%2713%27%20width%3D%2714%27%20height%3D%273%27%2F%3E%3Crect%20x%3D%273%27%20y%3D%278%27%20width%3D%2714%27%20height%3D%273%27%2F%3E%3C%2Fsvg%3E)}.icon.email:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3E%3Cpath fill='white' d='M10,0L2,6v9.5h16V6L10,0z M10,1.5L16,6l-6,3.2L4,6L10,1.5z M16.5,14.2h-13v-7l6.5,3.3L16.5,7V14.2z'/%3E%3C/svg%3E")}.icon.intel:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 293 293' fill='white'%3E%3Cpath d='M 291.28737,55.058565 C 277.50683,-12.11587 147.525,-16.369963 63.737895,34.808098 L 63.737895,40.460487 C 147.41497,-2.7316788 266.14859,-2.4562338 276.95315,59.422033 C 280.59385,79.920851 269.12707,101.24048 248.56679,113.50757 L 248.56679,129.56114 C 273.31695,120.47998 298.61629,91.08816 291.28737,55.058565 M 138.92617,172.67053 C 81.102616,178.02238 20.853265,169.59848 12.418485,124.23687 C 8.2296052,101.90025 18.426965,78.192925 31.877897,63.483967 L 31.877897,55.608451 C 7.6234054,76.957185 -5.5514356,103.96031 2.0555709,135.84419 C 11.756767,176.75954 63.462304,199.91949 142.39931,192.20804 C 173.65361,189.19069 214.55609,179.09205 242.94244,163.42581 L 242.94244,141.16845 C 217.14544,156.61695 174.47938,169.38024 138.92617,172.67053 z'/%3E%3Cpath d='M 238.31142,45.347552 L 223.15342,45.347552 L 223.15342,113.16 C 223.15342,121.1244 226.95767,128.05143 238.31142,129.14959'/%3E%3Cpath d='M 57.729915,70.130433 L 42.57092,70.130433 L 42.57092,114.42193 C 42.57092,122.38934 46.375174,129.31386 57.729915,130.41152'/%3E%3Cpath d='M 188.42548,81.588742 C 183.30029,81.588742 179.33248,84.253255 177.67794,87.85316 C 176.68611,90.022849 176.3545,91.672094 176.19095,94.338616 L 199.39759,94.338616 C 199.06648,87.82553 196.14302,81.588742 188.42548,81.588742 M 176.19095,104.61387 C 176.19095,112.33513 181.03904,118.01982 189.52834,118.01982 C 196.19954,118.01982 199.50663,116.15155 203.3654,112.33563 L 212.62645,121.26456 C 206.67299,127.14267 200.44444,130.71544 189.41981,130.71544 C 175.03357,130.71544 161.25152,122.82991 161.25152,99.85904 C 161.25152,80.21479 173.26798,69.115166 189.0887,69.115166 C 205.12948,69.115166 214.33351,82.110693 214.33351,99.173822 L 214.33351,104.61437 L 176.19095,104.61437'/%3E%3Cpath d='M 98.576374,82.329722 C 102.98533,82.329722 104.80493,84.500918 104.80493,88.045061 L 104.80493,129.78055 L 119.85388,129.78055 L 119.85388,87.990304 C 119.85388,79.499933 115.33339,70.129931 102.15955,70.129931 L 71.125329,70.129931 L 71.125329,129.78005 L 86.118769,129.78005 L 86.118769,82.329219'/%3E%3Cpath d='M 148.57135,129.69867 C 136.2803,129.69867 131.09859,121.12491 131.09859,112.66317 L 131.09859,53.837923 L 146.09203,53.837923 L 146.09203,70.130433 L 157.44627,70.130433 L 157.44627,82.329722 L 146.09203,82.329722 L 146.09203,111.75641 C 146.09203,115.21716 147.74557,117.11508 151.32725,117.11508 L 157.44627,117.11508 L 157.44627,129.69867 L 148.57135,129.69867'/%3E%3Crect x='42.570919' y='47.40823' width='15.158995' height='14.425744'/%3E%3Ctext x='244.26051' y='55.85825'%3ER%3C/text%3E%3C/svg%3E")}.icon.flag:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3E%3Cpolygon fill='%23BE3B26' points='4.5,0 11,3 10,6 13,11 7,9 7,7.5 4.5,7 4.5,15.5 3,15.5 3,0'/%3E%3C/svg%3E")}.icon.cross:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3E%3Cpolygon fill='%230071C5' points='1,17 3,19 10,12 17,19 19,17 12,10 19,3 17,1 10,8 3,1 1,3 8,10'/%3E%3C/svg%3E")}.icon.arrow-s-w:after{content:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3E%3Cpolygon fill='grey' points='19,3 17,1 5.5,12.5 1,8 1,19 12,19 7.5,14.5'/%3E%3C/svg%3E")}.tooltipped[data-tip-id]:not(.icon){cursor:help;border-bottom-width:1px;border-bottom-style:dashed}h3.tooltipped[data-tip-id],h4.tooltipped[data-tip-id]{display:table}.tip>div[data-id]{position:fixed;padding:.75em;display:none;max-width:35%;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box;background-color:rgba(255,255,255,.9);border:0 solid #000;box-shadow:rgba(0,0,0,.25) 0 5px 8px;z-index:10}.tip>div[data-id]:after{position:absolute;content:"";height:0;border:5px solid transparent}.tip>div[data-id].north{border-bottom-width:5px}.tip>div[data-id].north:after{bottom:-16px;border-top:6px solid #000}.tip>div[data-id].south{border-top-width:5px}.tip>div[data-id].south:after{top:-16px;border-bottom:6px solid #000}.tip>div[data-id].west:after{left:15px}.tip>div[data-id].east:after{right:1px}.tip>div[data-id].visible{display:block}.tip>div[data-id$=_ISSUE]{border-color:#bf3600}.tip>div[data-id$=_ISSUE].north:after{border-top-color:#bf3600}.tip>div[data-id$=_ISSUE].south:after{border-bottom-color:#bf3600}@media (min-width:650px){.tip>div{max-width:30%}}@media (min-width:850px){.tip>div{max-width:25%}}@media (min-width:1100px){.tip>div{max-width:20%}}body .icon.flag{color:inherit;transition:color .5s ease .5s}body .icon.flag:after{width:0;transition:width .5s ease .5s}body.withSuggestions .icon.flag{color:#bf3600}body.withSuggestions .icon.flag:after{width:1em}body article#suggestion{position:relative;padding:1.5em;margin-bottom:2.5em;margin-left:1.5em;border:2px solid gray;border-radius:4px;width:0%;max-height:0;opacity:.5;float:right;background-color:#fff;transition:max-height .75s ease 0s,width .5s ease .25s}body article#suggestion::after{content:"";position:absolute;width:0;height:0;left:10px;bottom:-10px;border-bottom:10px solid transparent;border-right:25px solid gray;transition:left .5s ease 0s,bottom .5s ease 0s,border-bottom-width .5s ease 0s,border-right-width .5s ease 0s,border-color .5s ease 0s}body article#suggestion section.aspect_map,body article#suggestion section[data-id=SUGGESTION_TEXT]{opacity:0;overflow:hidden;transition:opacity .5s ease 0s}body.withSuggestions article#suggestion{opacity:1;width:85%;max-height:700px;border-color:#0071c5;transition:max-height .5s ease .25s,width .5s ease 0s,border-color .5s ease 0s}body.withSuggestions article#suggestion::after{left:20px;bottom:-20px;border-bottom-width:20px;border-right-width:80px;border-right-color:#0071c5}body.withSuggestions article#suggestion section.aspect_map,body.withSuggestions article#suggestion section[data-id=SUGGESTION_TEXT]{opacity:1;transition:opacity .25s ease .5s}@media (min-width:625px){body.withSuggestions article#suggestion{width:50%}}a.button.suggestion{position:absolute;right:.9em;top:.75em}section[data-id=SUGGESTION_TEXT]{margin-bottom:1.5em}section[data-id=SUGGESTION_TEXT] span#first_line{font-size:150%;color:#0071c5}section.aspect_map{width:100%;display:table}section.aspect_map>section.aspect{display:table-row;height:1.6em}section.aspect_map>section.aspect>span{display:table-cell;vertical-align:middle;white-space:nowrap;padding-left:.5em}section.aspect_map>section.aspect>span:first-child{display:inline-block;padding-left:0}section.aspect_map>section.aspect>span:nth-child(2){text-align:right}section.aspect_map>section.aspect>span:not(.icon):nth-child(2){padding-right:1em}section.aspect_map>section.aspect>span:nth-child(3){padding-right:1em}section.aspect_map>section.aspect>span:last-child{position:relative;width:60%}section.aspect_map>section.aspect>span:last-child>span:not([data-tip-id]){position:absolute;left:0;right:100%;top:.5em;height:.6em;background-color:#bf3600}section.aspect_map>section.aspect:first-child{color:gray;text-align:center;font-size:.75em}article[data-id=INFO]{color:gray;margin-bottom:20px;padding-right:2em;font-size:80%}article[data-id=INFO] span[data-id]:last-child{font-style:italic;word-wrap:break-word;word-break:break-all;padding-left:.5em;color:#000}h1{font-size:2.5em;color:rgba(128,128,128,.5);margin:0 0 20px;display:none}h1 sup{display:block;font-size:.4em;font-weight:400;line-height:.8em}@media (min-width:850px){h1{display:block}}.aux-metric,.top-metric{display:inline-block;font-size:1em;font-weight:400;margin-bottom:1.5em}.aux-metric [data-id]:first-child,.top-metric [data-id]:first-child{display:block;padding-top:.75em;font-size:2.2em;font-weight:500;color:#00aeef}.top-metric{padding-right:10%}.aux-metric{padding-right:2em}.aux-metric:first-child{border-left:1px solid #00aeef}.aux-metric:last-child{border-right:1px solid #00aeef}div#content article[data-id=WALLCLOCK]>span[data-id]:first-child{font-size:3.4em;padding-top:0}div#bricks article{color:gray}div#bricks article h3{margin:0;font-size:1.5em;font-weight:400;color:#00aeef}div#bricks article h4{margin:.8em 0 0;padding:0;font-size:1em;font-weight:400;color:#000}div#bricks article .indent{margin-left:1.75em}div#bricks article table{margin-top:.8em;width:100%;border-collapse:collapse}div#bricks article table tr{border-bottom:1px solid rgba(128,128,128,.25)}div#bricks article table tr td,div#bricks article table tr th{text-align:right}div#bricks article table tr td:first-child,div#bricks article table tr th:first-child{text-align:left;white-space:normal}div#bricks article table tr td{padding:.2em;position:relative;overflow:hidden;white-space:nowrap}div#bricks article table tr th{font-weight:400;padding:.2em 0;border-bottom:2px solid rgba(128,128,128,.25)}div#bricks article table tr th:first-child{color:#000}div#bricks article table tr .progress{position:absolute;width:0%;height:100%;background:#00aeef;opacity:.15;left:0;top:0;-webkit-animation:filler 4s ease-in-out;-moz-animation:filler 4s ease-in-out;animation:filler 4s ease-in-out}div[data-id=MEMORY_INFO]>div,div[data-id=RSS_INFO]>div{margin-left:1.5em}div[data-id=MEMORY_INFO]>span,div[data-id=RSS_INFO]>span{margin-left:1em}</style><script>var json={"INFO":{"APPLICATION_NAME":"swift_mpi","NUM_RANKS":"2","RANKS_PER_NODE":"1","STAT_FILES":"/cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim/examples/HydroTests/SodShock_3D/aps_result_20210121","CREATION_DATETIME":"2021-01-21 16:07:46"},"VECTORIZATION":null,"SP_FLOPS":null,"SP_PACKED":null,"SP_128_BIT":null,"SP_256_BIT":null,"SP_512_BIT":null,"SP_SCALAR":null,"DP_FLOPS":null,"DP_PACKED":null,"DP_128_BIT":null,"DP_256_BIT":null,"DP_512_BIT":null,"DP_SCALAR":null,"X87":null,"AVG_FREQUENCY":null,"NON_FP":null,"FP_ARITH_MEM_RD":null,"FP_ARITH_MEM_WR":null,"CACHE_BOUND":null,"DRAM_BOUND":null,"DRAM_BANDWIDTH":null,"PERS_MEM_BOUND":null,"PERS_MEM_BANDWIDTH":null,"MCDRAM_BANDWIDTH":null,"DRAM_HIT_RATIO":null,"DRAM_CACHE_HIT":null,"DRAM_CACHE_MISS":null,"NUMA_USE":null,"BANDWIDTH_AVG_OUT":null,"BANDWIDTH_AVG_IN":null,"PACKRATE_AVG_OUT":null,"PACKRATE_AVG_IN":null,"CPU_USAGE":null,"AVERAGE_CPU_USE":null,"CPU_OUT_OF":null,"PHYSICAL_CORE_USAGE":null,"PH_CORE_USE":null,"PH_CORE_OUT_OF":null,"BACK_END_BOUND":null,"L2_HIT_BOUND":null,"L2_MISS_BOUND":null,"SIMD_PER_CYCLE":null,"PACKED_SIMD":null,"SCALAR_SIMD":null,"CPI_RATE":null,"SPGFLOPS":null,"DPGFLOPS":null,"X87GFLOPS":null,"WALLCLOCK":{"WALLCLOCK_SEVERITY":"severityUnknown","WALLCLOCK_VAL":"43.90s","WALLCLOCK_PERC":"0.00%","WALLCLOCK_DESCR":"Total application wall-time."},"MPI_TIME":{"MPI_TIME_SEVERITY":"severityCritical","MPI_TIME_VAL":"110.04s","MPI_TIME_PERC":"250.72%","MPI_TIME_DESCR":"Time spent inside the MPI library. Values more than 10% might need additional exploration on MPI communication efficiency. This might be caused by high wait times inside the library, active communications, non-optimal settings of the MPI library. See MPI Imbalance metric to see if the application has load balancing problem. Use <a href='https://software.intel.com/en-us/intel-trace-analyzer' target='_blank'>Intel® Trace Analyzer and Collector</a> to explore communication efficiency.","MPI_TIME_ISSUE":"Your application is MPI bound. This may be caused by high busy wait time inside the library (imbalance), non-optimal communication schema or MPI library settings. Explore the MPI Imbalance metric if it is available or use <a href='https://software.intel.com/en-us/get-started-with-itac' target='_blank'>MPI profiling tools</a> like <a href='https://software.intel.com/en-us/intel-trace-analyzer' target='_blank'>Intel® Trace Analyzer and Collector</a> to explore possible performance bottlenecks."},"MPI_IMBALANCE":{"MPI_IMBALANCE_SEVERITY":"severityUnknown","MPI_IMBALANCE_VAL":"4.24s","MPI_IMBALANCE_PERC":"9.67%","MPI_IMBALANCE_DESCR":"Mean unproductive wait time per process spent in the MPI library calls when a process is waiting for data."},"NON_MPI_TIME":null,"OMP_REGIONS_TIME":null,"OMP_IMBALANCE_TIME":null,"OMP_SERIAL_TIME":null,"MEMORY":{"PEAK_MEM_RANK":"Rank: 1","PEAK_MEM_VAL":"940.50 MB","MEAN_MEM_VAL":"931.79 MB","MEM_DESCR":"Per-process memory usage affects the application scalability."},"RSS":{"PEAK_RSS_RANK":"Rank: 1","PEAK_RSS_VAL":"543.75 MB","MEAN_RSS_VAL":"537.50 MB","RSS_DESCR":"RSS usage affects the application scalability."},"MEMORY_PER_NODE":{"PEAK_MEM_NODE":"Node: b106.pri.cosma7.alces.network","PEAK_MEM_PER_NODE_VAL":"940.50 MB","MEAN_MEM_PER_NODE_VAL":"931.79 MB","MEM_PER_NODE_DESCR":"Per-node memory usage affects the application scalability."},"RSS_PER_NODE":{"PEAK_RSS_NODE":"Node: b106.pri.cosma7.alces.network","PEAK_RSS_PER_NODE_VAL":"543.75 MB","MEAN_RSS_PER_NODE_VAL":"537.50 MB","RSS_PER_NODE_DESCR":"Per-node RSS usage affects the application scalability."},"TOP_5":{"Test":"127.23","Isend":"79.80","Irecv":"32.05","Issend":"5.63","Waitall":"2.29"},"MEMORY_BOUND":null,"IO":{"READ_BYTES_SEVERITY":"severityUnknown","READ_BYTES_DESCR":"Amount of bytes read by the application. High volumes of I/O operations may lead to unproductive wait.","WRITTEN_BYTES_SEVERITY":"severityUnknown","WRITTEN_BYTES_DESCR":"Amount of bytes written by the application. High volumes of I/O operations may lead to unproductive wait.","IOWAIT_USEC_SEVERITY":"severityMedium","IOWAIT_USEC_VAL":"0.07s","IOWAIT_USEC_PERC":"0.15%","IOWAIT_USEC_DESCR":"This is the time the application spends waiting for an I/O operation to complete.","READ_BYTES_AVG_VAL":" 8.4 GB","READ_BYTES_MAX_VAL":" 16.8 GB","READ_BYTES_MAX_RANK":"Rank: 1","WRITTEN_BYTES_AVG_VAL":" 8.1 GB","WRITTEN_BYTES_MAX_VAL":" 16.2 GB","WRITTEN_BYTES_MAX_RANK":"Rank: 1","IOWAIT_USEC_AVG_VAL":"0.07","IOWAIT_USEC_AVG_PERC":"0.15%","IOWAIT_USEC_MAX_VAL":"0.13","IOWAIT_USEC_MAX_PERC":"0.30%","IOWAIT_USEC_MAX_RANK":"Rank: 1"},"SUGGESTION":{"TEXT":"<span id='first_line'>Your application is MPI bound.</span><br> This may be caused by high busy wait time inside the library (imbalance), non-optimal communication schema or MPI library settings. Use <a href='https://software.intel.com/en-us/get-started-with-itac' target='_blank'>MPI profiling tools</a> like <a href='https://software.intel.com/en-us/intel-trace-analyzer' target='_blank'>Intel® Trace Analyzer and Collector</a> to explore performance bottlenecks.","STYLE":"bad"}};</script><script>function drawProgress(){for(var e=document.querySelectorAll(".progress"),t=0;t<e.length;t++)e[t].style.width=e[t].dataset.progress+"%"}function drawThresholdBars(e){var t={BACK_END_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},BANDWIDTH_AVG_OUT:{threshold:8.75,lessIsBetter:!0,perc:!1},BANDWIDTH_AVG_IN:{threshold:8.75,lessIsBetter:!0,perc:!1},CACHE_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},CPI_RATE:{threshold:1,lessIsBetter:!0,perc:!1},CPU_USAGE:{threshold:90,lessIsBetter:!1,perc:!0},DP_128_BIT:{threshold:5,lessIsBetter:!0,perc:!0},DP_256_BIT:{threshold:5,lessIsBetter:!0,perc:!0},DP_SCALAR:{threshold:30,lessIsBetter:!0,perc:!0},DRAM_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},FP_ARITH_MEM_RD:{threshold:.5,lessIsBetter:!1,perc:!1},FP_ARITH_MEM_WR:{threshold:.5,lessIsBetter:!1,perc:!1},IO:{threshold:10,lessIsBetter:!0,perc:!0},L2_HIT_BOUND:{threshold:10,lessIsBetter:!0,perc:!0},L2_MISS_BOUND:{threshold:15,lessIsBetter:!0,perc:!0},MEMORY_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},MPI_IMBALANCE:{threshold:30,lessIsBetter:!0,perc:!1},MPI_TIME:{threshold:10,lessIsBetter:!0,perc:!0},NUMA_USE:{threshold:15,lessIsBetter:!0,perc:!0},OMP_IMBALANCE_TIME:{threshold:10,lessIsBetter:!0,perc:!0},OMP_SERIAL_TIME:{threshold:15,lessIsBetter:!0,perc:!0},PACKRATE_AVG_IN:{threshold:105,lessIsBetter:!0,perc:!1},PACKRATE_AVG_OUT:{threshold:105,lessIsBetter:!0,perc:!1},PERS_MEM_BANDWIDTH:{threshold:42,lessIsBetter:!0,perc:!1},PERS_MEM_BOUND:{threshold:20,lessIsBetter:!0,perc:!0},PHYSICAL_CORE_USAGE:{threshold:80,lessIsBetter:!1,perc:!0},SCALAR_FP:{threshold:15,lessIsBetter:!0,perc:!0},SCALAR_SIMD:{threshold:15,lessIsBetter:!0,perc:!0},SIMD_PER_CYCLE:{threshold:1,lessIsBetter:!1,perc:!1},SP_128_BIT:{threshold:5,lessIsBetter:!0,perc:!0},SP_256_BIT:{threshold:5,lessIsBetter:!0,perc:!0},SP_SCALAR:{threshold:30,lessIsBetter:!0,perc:!0},VECTORIZATION:{threshold:70,lessIsBetter:!1,perc:!0}};null!=e.BACK_END_BOUND&&(t.CPI_RATE.threshold=2);var s,r,o,n=["BACK_END_BOUND","CPU_USAGE","IO","MEMORY_BOUND","MPI_TIME","OMP_IMBALANCE_TIME","OMP_SERIAL_TIME","PHYSICAL_CORE_USAGE","SIMD_PER_CYCLE","VECTORIZATION"];for(var l in t)if(null!=e[l]){var i=-1<n.indexOf(l);if(o=t[l].perc?"_PERC":"_VAL",s=parseFloat(e[l][l+o]),"MPI_IMBALANCE"==l){var a=parseFloat(e.MPI_TIME.MPI_TIME_VAL),d=parseFloat(e.WALLCLOCK.WALLCLOCK_VAL);s=0<a?100*s/a:0,0<d&&100*a/d<=15&&(t[l].threshold=100),o="_PERC"}if(null!=(r=i?document.querySelector("[data-id="+l+"_BAR]"):document.querySelector("[data-id="+l+o+"]"))&&(t[l].lessIsBetter&&s>=t[l].threshold||!t[l].lessIsBetter&&s<=t[l].threshold)){if("SP_256_BIT"==l&&null==e.SP_512_BIT||"DP_256_BIT"==l&&null==e.DP_512_BIT)continue;if(i&&(r.style.right=95-95/90*Math.abs(s-t[l].threshold)+"%"),null!=(r=document.querySelectorAll("[data-id="+l+o+"]")))for(var u=0;u<r.length;u++)r[u].classList.add("icon"),r[u].classList.add("flag"),null!=e[l][l+"_ISSUE"]&&r[u].classList.add("tooltipped")}}}function makeInvisible(e){var t=document.querySelectorAll(e);if(null!=t)for(var s=0;s<t.length;s++)t[s].style.display="none"}function render(e){var t;if(null!=e.SUGGESTION&&(null!=(t=document.querySelector("[data-id=SUGGESTION_TEXT]"))&&(t.innerHTML=e.SUGGESTION.TEXT),null!=(t=document.getElementById("suggestion"))&&t.classList.add(e.SUGGESTION.STYLE)),null!=(t=document.querySelectorAll("[data-id=INFO] span")))for(var s=0;s<t.length;s++)null==e.INFO[t[s].dataset.id]&&makeInvisible("div[data-id="+t[s].dataset.id+"_STR]");for(var r in e){var o=e[r];if(null==o)makeInvisible("MEMORY"==r?"[data-id=MEMORY]":"[data-id^="+r+"]");else if("TOP_5"==r){var n=document.querySelector("[data-id=TOP_5]");for(var l in o){var i=document.createElement("tr");i.innerHTML="<td><span class='progress' data-progress='"+o[l]+"'></span>"+l+"</td><td>"+o[l]+"</td>",n.appendChild(i)}drawProgress()}else for(var a in o)if(null!=(t=document.querySelectorAll("[data-id="+a+"]")))for(s=0;s<t.length;s++)t[s].innerHTML=o[a]}drawThresholdBars(e);for(var d=["DRAM_BANDWIDTH","MCDRAM_BANDWIDTH"],u=0;u<d.length;u++)if(null!=e[d[u]])for(t=document.querySelectorAll("[data-id="+d[u]+"_VAL]"),s=0;s<t.length;s++)t[s].classList.remove("tooltipped"),t[s].dataset.tipId=d[u]+"_ISSUE";else if(makeInvisible("[data-id="+d[u]+"_EXTRA]"),"DRAM_BANDWIDTH"==d[u]||null!=e.INFO.HW_PLATFORM&&-1<e.INFO.HW_PLATFORM.indexOf("Knight"))for(t=document.querySelectorAll("[data-id="+d[u]+"_TITLE],[data-id="+d[u]+"_VAL]"),s=0;s<t.length;s++)t[s].style.display="";if(null==e.BANDWIDTH_AVG_OUT&&null==e.PACKRATE_AVG_OUT?makeInvisible("[data-id=FABRIC_USAGE]"):null==e.BANDWIDTH_AVG_OUT?makeInvisible("[data-id=BANDWIDTH_AVG_TITLE]"):null==e.PACKRATE_AVG_OUT&&makeInvisible("[data-id=PACKRATE_AVG_TITLE]"),null!=(t=document.querySelectorAll(".tip > div[data-id$=_ISSUE]")))for(s=0;s<t.length;s++)if(""==t[s].innerHTML||null==t[s].innerHTML){var c=document.querySelectorAll("[data-tip-id="+t[s].dataset.id+"].tooltipped");if(null!=c)for(var p=0;p<c.length;p++)c[p].classList.remove("tooltipped")}else{var h=document.querySelectorAll("[data-tip-id="+t[s].dataset.id+"]:not(.tooltipped)");if(null!=h)for(p=0;p<h.length;p++)h[p].classList.add("icon"),h[p].classList.add("flag"),h[p].classList.add("tooltipped")}if(null!=(t=document.querySelectorAll(".tip > div[data-id$=_DESCR]")))for(s=0;s<t.length;s++)if("MEM_DESCR"!=t[s].dataset.id&&(""==t[s].innerHTML||null==t[s].innerHTML)){var _=t[s].dataset.id.indexOf("_DESCR");makeInvisible("[data-id^="+t[s].dataset.id.substr(0,_)+"]")}null==e.MPI_TIME?makeInvisible("[data-id$=_INFO],[data-id=NUM_RANKS_STR],[data-id=RANK_STR],[data-id=MEMORY_DATA_BLOCK]>table,[data-id=FABRIC_USAGE]"):makeInvisible("[data-id=MEMORY_APP],[data-id=RSS_APP]"),null!=e.PCONTROL&&1==e.PCONTROL?makeInvisible("[data-id$=_DATA_BLOCK],section[data-id=IO]"):makeInvisible("[data-id$=_TEXT_BLOCK]"),null!=e.SP_FLOPS&&0==parseFloat(e.SP_FLOPS.SP_FLOPS_PERC)&&makeInvisible("[data-id=SP_FLOPS_BITS]"),null!=e.DP_FLOPS&&0==parseFloat(e.DP_FLOPS.DP_FLOPS_PERC)&&makeInvisible("[data-id=DP_FLOPS_BITS]"),initTooltips()}function isBrowserSupported(){var e,t,s={"Trident/":{version:11,offset:3},"Edge/":{version:12,offset:5},Firefox:{version:17,offset:8},Chrome:{version:40,offset:7},Safari:{version:8,offset:7}},r=(navigator.appVersion,navigator.userAgent),o=(navigator.appName,""+parseFloat(navigator.appVersion)),n=parseInt(navigator.appVersion,10);for(var l in s)if(-1!=(e=r.indexOf(l))&&("Trident/"==l&&(e=r.indexOf("rv:")),-1!=(t=(o=r.substring(e+s[l].offset)).indexOf(";"))&&(o=o.substring(0,t)),-1!=(t=o.indexOf(" "))&&(o=o.substring(0,t)),-1!=(t=o.indexOf(")"))&&(o=o.substring(0,t)),n=parseInt(""+o,10),isNaN(n)&&(o=""+parseFloat(navigator.appVersion),n=parseInt(navigator.appVersion,10)),n>=s[l].version))return!0;return!1}function renderForNotSupportedBrowsers(){var e=document.querySelectorAll("body > div:not(.not-supported-browser)");for(var t in e)e[t].parentNode&&e[t].parentNode.removeChild(e[t])}function renderForSupportedBrowsers(){var e=document.querySelector("body > div.not-supported-browser");e.parentNode&&e.parentNode.removeChild(e)}function masonryLayout(e){var t=e,a={columns:4,gap:15,padding:10,breakAt:{1100:3,850:2,625:1}};function s(){var e,n=function(){var e=document.body.clientWidth,t=a.columns;for(var s in a.breakAt)if(e<s){t=a.breakAt[s];break}return t}();if(1===n)t.style.height="auto",r(t.children,function(e,t){t.removeAttribute("style"),t.style.padding=a.padding+"px",t.style.marginBottom=a.gap+"px"});else{var l="calc("+100/(e=n)+"% - "+(2*a.padding+(e-1)*a.gap/e)+"px)",i=function(e,t){for(var s=[],r=0;r<e;r++)s.push(0);return s}(n);r(t.children,function(e,t){var s,r,o;t.style.position="absolute",t.style.width=l,t.style.left=0===(o=e%n)?"0":"calc("+100/n*o+"% + "+a.gap*o/n+"px)",t.style.top=i[e%n]+"px",t.style.padding=a.padding+"px",i[e%n]+=(s=t,r=parseInt(window.getComputedStyle(s,null).getPropertyValue("height"),10),isNaN(r)?0:r+a.gap+2*a.padding)}),t.style.height=Math.max.apply(null,i)+"px"}}function r(e,t){for(var s=0,r=0,o=e.length;s<o;s++)"none"!==window.getComputedStyle(e[s],null).getPropertyValue("display")&&(t(r,e[s]),r++)}window.addEventListener("resize",s),t.style.position="relative",s()}var sendAnalyticsVersion="APS%202019%20G",sendAnalyticsReportPage="%2F";function setupAnalytics(e){sendAnalyticsReportPage=e}function sendAnalytics(e){}function feedbackInit(){document.querySelectorAll(".feedback-panel").forEach(function(e){e.classList.remove("closed"),e.querySelector(".question").innerHTML="Like this report?"})}function feedback(e){var t=document.querySelectorAll(".feedback-panel");NodeList.prototype.forEach=Array.prototype.forEach,t.forEach(function(e){e.querySelector(".question").innerHTML="Thank you",e.classList.add("closed")}),sendAnalytics(e)}function menu(){var e,t=document.body,s=document.getElementById("topMenuClosedState"),r=document.getElementById("leftMenuClosedState");function o(){t.classList.add("menuOpened"),feedbackInit()}s.onmouseenter=r.onmouseenter=function(){e=setTimeout(o,800)},s.onmouseleave=r.onmouseleave=function(){clearTimeout(e)},s.onclick=r.onclick=o,document.getElementById("content").onmouseenter=function(){t.classList.remove("menuOpened")}}function suggestionShow(){document.body.classList.add("withSuggestions");var e=document.querySelector("a.button.suggestion");e.classList.remove("arrow-s-w"),e.classList.add("cross"),e.onclick=suggestionHide,initTooltips(),sendAnalytics("suggestionShow")}function suggestionHide(){document.body.classList.remove("withSuggestions");var e=document.querySelector("a.button.suggestion");e.classList.remove("cross"),e.classList.add("arrow-s-w"),e.onclick=suggestionShow,removeTooltipFromFlags(),sendAnalytics("suggestionHide")}function initTooltips(){for(var e=document.querySelectorAll(".tooltipped[data-tip-id]"),t=0;t<e.length;t++){var o,s,r,n=e[t];n.onmouseenter=function(){clearTimeout(o),void 0!==r&&i(r),r=this,s=setTimeout(l,200,this)},n.onmouseleave=function(){clearTimeout(s),o=setTimeout(i,800,this)}}function l(e){if(void 0!==e){var t=document.querySelector(".tip > div[data-id="+e.dataset.tipId+"]");if(null!=t){t.classList.add("visible");var s=e.getBoundingClientRect(),r=t.getBoundingClientRect();0<window.innerHeight-s.top-r.height?(t.classList.add("south"),t.style.top=s.bottom+8+"px"):(t.classList.add("north"),t.style.top=s.top-r.height-8+"px"),0<window.innerWidth-s.left-r.width?(t.classList.add("west"),t.style.left=s.left+"px"):(t.classList.add("east"),t.style.right=window.innerWidth-s.right+"px"),t.onmouseenter=function(){clearTimeout(o)},t.onmouseleave=function(){i(e)}}}}function i(e){if(void 0!==e){var t=document.querySelector(".tip > div[data-id="+e.dataset.tipId+"]");null!=t&&(t.removeAttribute("style"),t.classList.remove("visible","north","south","east","west"))}}}function removeTooltipFromFlags(){for(var e=document.querySelectorAll("[data-tip-id].icon.flag"),t=0;t<e.length;t++)e[t].onmouseenter=e[t].onmouseleave=null}function getDataFromNexus(){for(var e="",t="",s=document.location.search.slice(1).split("&"),r=0;r<s.length;r++){var o=s[r].split("=");"nexus"===o[0]&&o[1]&&(e="http://"+o[1]),"path"===o[0]&&(t=o[1])}var n=e+"/repository"+t+"/data/obj.json",l=new XMLHttpRequest;l.open("GET",n,!0),l.onload=function(e){4===l.readyState&&200==l.status&&(json=JSON.parse(l.responseText),setUpAndRender())},l.send(null)}function setUpAndRender(){isBrowserSupported()?(renderForSupportedBrowsers(),sendAnalytics("pageOpen"),menu(),render(json),masonryLayout(document.querySelector("#bricks"))):renderForNotSupportedBrowsers()}window.onload=function(){NodeList.prototype.forEach=Array.prototype.forEach,setUpAndRender()};</script></head><body class="withSuggestions"><div id="leftMenuOpenedState" class="anim"><h4>Additional Performance Analysis Tools:</h4><a href="https://software.intel.com/en-us/intel-trace-analyzer">Intel® Trace Analyzer and Collector</a><div>MPI Analyzer and Profiler</div><a href="https://software.intel.com/en-us/intel-vtune-amplifier-xe">Intel® VTune™ Amplifier</a><div>Performance Profiler</div><a href="https://software.intel.com/en-us/intel-advisor-xe">Intel® Advisor</a><div>Vectorization Optimization & Thread Prototyping</div><a href="http://www.intel.com/storage-snapshot">Storage Performance Snapshot</a><div>Visualize System Storage Bottlenecks</div><section class="feedback-panel left"><div class="question"></div><a class="button like" href="#" onclick="feedback('apsLike');">Yes</a> <a class="button dislike" href="#" onclick="feedback('apsDislike');">No</a> <a class="button icon email" href="mailto:parallel.studio.support@intel.com?Subject=MPI%20Performance%20Snapshot:%20feedback" target="_top"></a></section></div><div id="leftMenuClosedState" class="anim"><div class="icon menu"></div><div class="icon intel"></div></div><div id="topMenuOpenedState" class="anim"><h4>Additional Performance Analysis Tools:</h4><a href="https://software.intel.com/en-us/intel-trace-analyzer">Intel® Trace Analyzer and Collector</a> <span>- MPI Analyzer and Profiler</span><br><a href="https://software.intel.com/en-us/intel-vtune-amplifier-xe">Intel® VTune™ Amplifier</a> <span>- Performance Profiler</span><br><a href="https://software.intel.com/en-us/intel-advisor-xe">Intel® Advisor</a> <span>- Vectorization Optimization & Thread Prototyping</span><br><a href="http://www.intel.com/storage-snapshot">Storage Performance Snapshot</a> <span>- Visualize System Storage Bottlenecks</span><section class="feedback-panel top"><div class="question"></div><a class="button like" href="#" onclick="feedback('apsLike');">Yes</a> <a class="button dislike" href="#" onclick="feedback('apsDislike');">No</a> <a class="button icon email" href="mailto:parallel.studio.support@intel.com?Subject=MPI%20Performance%20Snapshot:%20feedback" target="_top"></a></section></div><div id="topMenuClosedState" class="anim"><span class="icon intel"></span> <span class="icon menu"></span> <sup>Intel® VTune™ Amplifier</sup> Application Performance Snapshot</div><div id="contentWrap" class="anim"><div id="content"><h1><sup>Intel® VTune™ Amplifier</sup>Application Performance Snapshot</h1><article id="suggestion"><a class="button suggestion icon cross" href="#" onclick="suggestionHide();"></a><section data-id="SUGGESTION_TEXT"></section><section class="aspect_map"><section class="aspect"><span></span> <span class="aspect_value"><span class="tooltipped" data-tip-id="current_run">Current run</span></span> <span class="aspect_target"><span class="tooltipped" data-tip-id="target">Target</span></span> <span><span class="tooltipped" data-tip-id="delta">Delta</span></span></section><section class="aspect" data-id="MPI_TIME"><span class="tooltipped" data-id="MPI_TIME_TITLE" data-tip-id="MPI_TIME_DESCR">MPI Time</span> <span class="aspect_value" data-id="MPI_TIME_PERC" data-tip-id="MPI_TIME_ISSUE"></span> <span class="aspect_target"><10%</span> <span class="aspect_bar"><span data-id="MPI_TIME_BAR"></span></span></section><section class="aspect" data-id="OMP_SERIAL_TIME"><span class="tooltipped" data-id="OMP_SERIAL_TIME_TITLE" data-tip-id="OMP_SERIAL_TIME_DESCR">Serial Time</span> <span class="aspect_value" data-id="OMP_SERIAL_TIME_PERC" data-tip-id="OMP_SERIAL_TIME_ISSUE"></span> <span class="aspect_target"><15%</span> <span class="aspect_bar"><span data-id="OMP_SERIAL_TIME_BAR"></span></span></section><section class="aspect" data-id="OMP_IMBALANCE_TIME"><span class="tooltipped" data-id="OMP_IMBALANCE_TIME_TITLE" data-tip-id="OMP_IMBALANCE_TIME_DESCR">OpenMP Imbalance</span> <span class="aspect_value" data-id="OMP_IMBALANCE_TIME_PERC" data-tip-id="OMP_IMBALANCE_TIME_ISSUE"></span> <span class="aspect_target"><10%</span> <span class="aspect_bar"><span data-id="OMP_IMBALANCE_TIME_BAR"></span></span></section><section class="aspect" data-id="CPU_USAGE"><span class="tooltipped" data-id="CPU_USAGE_TITLE" data-tip-id="CPU_USAGE_DESCR">CPU Utilization</span> <span class="aspect_value" data-id="CPU_USAGE_PERC" data-tip-id="CPU_USAGE_ISSUE"></span> <span class="aspect_target">>90%</span> <span class="aspect_bar"><span data-id="CPU_USAGE_BAR"></span></span></section><section class="aspect" data-id="PHYSICAL_CORE_USAGE"><span class="tooltipped" data-id="PHYSICAL_CORE_USAGE_TITLE" data-tip-id="PHYSICAL_CORE_USAGE_DESCR">Physical Core Utilization</span> <span class="aspect_value" data-id="PHYSICAL_CORE_USAGE_PERC" data-tip-id="PHYSICAL_CORE_USAGE_ISSUE"></span> <span class="aspect_target">>80%</span> <span class="aspect_bar"><span data-id="PHYSICAL_CORE_USAGE_BAR"></span></span></section><section class="aspect" data-id="MEMORY_BOUND"><span class="tooltipped" data-id="MEMORY_BOUND_TITLE" data-tip-id="MEMORY_BOUND_DESCR">Memory Stalls</span> <span class="aspect_value" data-id="MEMORY_BOUND_PERC" data-tip-id="MEMORY_BOUND_ISSUE"></span> <span class="aspect_target"><20%</span> <span class="aspect_bar"><span data-id="MEMORY_BOUND_BAR"></span></span></section><section class="aspect" data-id="BACK_END_BOUND"><span class="tooltipped" data-id="BACK_END_BOUND_TITLE" data-tip-id="BACK_END_BOUND_DESCR">Back-End Stalls</span> <span class="aspect_value" data-id="BACK_END_BOUND_PERC" data-tip-id="BACK_END_BOUND_ISSUE"></span> <span class="aspect_target"><20%</span> <span class="aspect_bar"><span data-id="BACK_END_BOUND_BAR"></span></span></section><section class="aspect" data-id="VECTORIZATION"><span class="tooltipped" data-id="VECTORIZATION_TITLE" data-tip-id="VECTORIZATION_DESCR">Vectorization</span> <span class="aspect_value" data-id="VECTORIZATION_PERC" data-tip-id="VECTORIZATION_ISSUE"></span> <span class="aspect_target">>70%</span> <span class="aspect_bar"><span data-id="VECTORIZATION_BAR"></span></span></section><section class="aspect" data-id="SIMD_PER_CYCLE"><span class="tooltipped" data-id="SIMD_PER_CYCLE_TITLE" data-tip-id="SIMD_PER_CYCLE_DESCR">SIMD Instr. per Cycle</span> <span class="aspect_value" data-id="SIMD_PER_CYCLE_VAL" data-tip-id="SIMD_PER_CYCLE_ISSUE"></span> <span class="aspect_target">>1</span> <span class="aspect_bar"><span data-id="SIMD_PER_CYCLE_BAR"></span></span></section><section class="aspect" data-id="IO"><span class="tooltipped" data-id="IO_TITLE" data-tip-id="IOWAIT_USEC_DESCR" class="tooltipped">I/O Bound</span> <span class="aspect_value" data-id="IOWAIT_USEC_PERC"></span> <span class="aspect_target"><10%</span> <span class="aspect_bar"><span data-id="IO_BAR"></span></span></section></section></article><article data-id="INFO"><div data-id="APPLICATION_NAME_STR">Application: <span data-id="APPLICATION_NAME"></span></div><div data-id="CREATION_DATETIME_STR">Report creation date: <span data-id="CREATION_DATETIME"></span></div><div data-id="RANK_STR">Rank: <span data-id="RANK"></span></div><div data-id="NUM_RANKS_STR">Number of ranks: <span data-id="NUM_RANKS"></span></div><div data-id="RANKS_PER_NODE_STR">Ranks per node: <span data-id="RANKS_PER_NODE"></span></div><div data-id="NUM_OMP_THREADS_STR">OpenMP threads<span data-id="POSTFIX"></span>: <span data-id="NUM_OMP_THREADS"></span></div><div data-id="HW_PLATFORM_STR">HW Platform: <span data-id="HW_PLATFORM"></span></div><div data-id="FREQUENCY_STR">Frequency: <span data-id="FREQUENCY"></span></div><div data-id="NUM_LOGIC_CORE_STR">Logical Core Count per node: <span data-id="NUM_LOGIC_CORE"></span></div><div data-id="COLLECTOR_TYPE_STR">Collector type: <span data-id="COLLECTOR_TYPE"></span></div></article><article data-id="WALLCLOCK" class="top-metric"><span data-id="WALLCLOCK_VAL"></span> <span class="tooltipped" data-tip-id="WALLCLOCK_DESCR">Elapsed Time</span></article><article data-id="CPI_RATE" class="top-metric"><span data-id="CPI_RATE_VAL" data-tip-id="CPI_RATE_ISSUE"></span> <span data-tip-id="CPI_RATE_DESCR" class="tooltipped">CPI</span><br><span data-id="CPI_INFO">(<span class="tooltipped" data-tip-id="CPI_RATE_MAX_NODE">MAX</span> <span data-id="CPI_RATE_MAX_VAL"></span>, <span class="tooltipped" data-tip-id="CPI_RATE_MIN_NODE">MIN</span> <span data-id="CPI_RATE_MIN_VAL"></span>)</span></article><article data-id="SPGFLOPS" class="aux-metric"><span data-id="SPGFLOPS_TOTAL_VAL"></span> <span data-tip-id="SPGFLOPS_DESCR" class="tooltipped">Single Precision</span></article><article data-id="DPGFLOPS" class="aux-metric"><span data-id="DPGFLOPS_TOTAL_VAL"></span> <span data-tip-id="DPGFLOPS_DESCR" class="tooltipped">Double Precision</span></article><article data-id="X87GFLOPS" class="aux-metric"><span data-id="X87GFLOPS_TOTAL_VAL"></span> <span data-tip-id="X87GFLOPS_DESCR" class="tooltipped">x87</span></article><article data-id="AVG_FREQUENCY" class="aux-metric"><span data-id="AVG_FREQUENCY_VAL"></span> <span data-tip-id="AVG_FREQUENCY_DESCR" class="tooltipped">Average CPU Frequency</span></article><div id="bricks"><article data-id="MPI_TIME"><h3 data-id="MPI_TIME_TITLE" data-tip-id="MPI_TIME_DESCR" class="tooltipped">MPI Time</h3><span data-id="MPI_TIME_VAL"></span><br><span data-id="MPI_TIME_PERC" data-tip-id="MPI_TIME_ISSUE"></span> of Elapsed Time<br><div class="indent"><h4 data-id="MPI_IMBALANCE_TITLE" data-tip-id="MPI_IMBALANCE_DESCR" class="tooltipped">MPI Imbalance</h4><span data-id="MPI_IMBALANCE_VAL"></span><br><span data-id="MPI_IMBALANCE_PERC" data-tip-id="MPI_IMBALANCE_ISSUE"></span> of Elapsed Time<br></div><table data-id="TOP_5"><tr><th>TOP 5 MPI Functions</th><th><span class="tooltipped" data-tip-id="MPI_PERC_DESCR">%</span></th></tr></table><div data-id="FABRIC_USAGE"><h4>Intel Omni-Path Fabric Usage</h4><table data-id="BANDWIDTH_AVG_TITLE"><tr><th>Interconnect Bandwidth</th><th>AVG, <span data-id="BANDWIDTH_AVG_IN_UNIT"></span></th></tr><tr><td><span data-tip-id="BANDWIDTH_AVG_OUT_DESCR" class="tooltipped">Outgoing:</span></td><td class="cell"><span data-id="BANDWIDTH_AVG_OUT_VAL" data-tip-id="BANDWIDTH_AVG_OUT_ISSUE"></span></td></tr><tr><td><span data-tip-id="BANDWIDTH_AVG_IN_DESCR" class="tooltipped">Incoming:</span></td><td class="cell"><span data-id="BANDWIDTH_AVG_IN_VAL" data-tip-id="BANDWIDTH_AVG_IN_ISSUE"></span></td></tr></table><table data-id="PACKRATE_AVG_TITLE"><tr><th>Interconnect Packet Rate</th><th>AVG, <span data-id="PACKRATE_AVG_OUT_UNIT"></span></th></tr><tr><td><span data-tip-id="PACKRATE_AVG_OUT_DESCR" class="tooltipped">Outgoing:</span></td><td class="cell"><span data-id="PACKRATE_AVG_OUT_VAL" data-tip-id="PACKRATE_AVG_OUT_ISSUE"></span></td></tr><tr><td><span data-tip-id="PACKRATE_AVG_IN_DESCR" class="tooltipped">Incoming:</span></td><td class="cell"><span data-id="PACKRATE_AVG_IN_VAL" data-tip-id="PACKRATE_AVG_IN_ISSUE"></span></td></tr></table></div></article><article data-id="OMP_SERIAL_TIME"><h3 data-id="OMP_SERIAL_TIME_TITLE" data-tip-id="OMP_SERIAL_TIME_DESCR" class="tooltipped">Serial Time</h3><span data-id="OMP_SERIAL_TIME_VAL"></span><br><span data-id="OMP_SERIAL_TIME_PERC" data-tip-id="OMP_SERIAL_TIME_ISSUE"></span> of Elapsed Time<br></article><article data-id="OMP_IMBALANCE_TIME"><h3 data-id="OMP_IMBALANCE_TIME_TITLE" data-tip-id="OMP_IMBALANCE_TIME_DESCR" class="tooltipped">OpenMP Imbalance</h3><span data-id="OMP_IMBALANCE_TIME_VAL"></span><br><span data-id="OMP_IMBALANCE_TIME_PERC" data-tip-id="OMP_IMBALANCE_TIME_ISSUE"></span> of Elapsed Time<br></article><article data-id="CPU_USAGE"><h3 data-id="CPU_USAGE_TITLE" data-tip-id="CPU_USAGE_DESCR" class="tooltipped">CPU Utilization</h3><span data-id="CPU_USAGE_PERC" data-tip-id="CPU_USAGE_ISSUE"></span><div class="indent"><h4 data-id="AVERAGE_CPU_USE_DESCR" class="tooltipped">Average CPU Utilization</h4><span data-id="AVERAGE_CPU_USE_VAL"></span> Out of <span data-id="CPU_OUT_OF_VAL"></span> logical CPUs</div></article><article data-id="PHYSICAL_CORE_USAGE"><h3 data-id="PHYSICAL_CORE_USAGE_TITLE" data-tip-id="PHYSICAL_CORE_USAGE_DESCR" class="tooltipped">Physical Core Utilization</h3><span data-id="PHYSICAL_CORE_USAGE_PERC" data-tip-id="PHYSICAL_CORE_USAGE_ISSUE"></span><div class="indent"><h4 data-id="PH_CORE_USE_DESCR" class="tooltipped">Average Physical Core Utilization</h4><span data-id="PH_CORE_USE_VAL"></span> out of <span data-id="PH_CORE_OUT_OF_VAL"></span> physical cores</div></article><article data-id="MEMORY_BOUND"><h3 data-id="MEMORY_BOUND_TITLE" data-tip-id="MEMORY_BOUND_DESCR" class="tooltipped">Memory Stalls</h3><span data-id="MEMORY_BOUND_PERC" data-tip-id="MEMORY_BOUND_ISSUE"></span> of pipeline slots<div class="indent"><h4 data-id="CACHE_BOUND_TITLE" data-tip-id="CACHE_BOUND_DESCR" class="tooltipped">Cache Stalls</h4><span data-id="CACHE_BOUND_PERC" data-tip-id="CACHE_BOUND_ISSUE"></span><span data-id="CACHE_BOUND_EXTRA"> of cycles</span><h4 data-id="DRAM_BOUND_TITLE" data-tip-id="DRAM_BOUND_DESCR" class="tooltipped">DRAM Stalls</h4><span data-id="DRAM_BOUND_PERC" data-tip-id="DRAM_BOUND_ISSUE"></span> of cycles<h4 data-id="DRAM_BANDWIDTH_TITLE" data-tip-id="DRAM_BANDWIDTH_DESCR" class="tooltipped">DRAM Bandwidth</h4><span data-id="DRAM_BANDWIDTH_EXTRA">AVG </span><span data-id="DRAM_BANDWIDTH_VAL" data-tip-id="DRAM_BANDWIDTH_VAL_DESCR" class="tooltipped">Not Available</span><span data-id="DRAM_BANDWIDTH_UNIT"></span><h4 data-id="PERS_MEM_BOUND_TITLE" data-tip-id="PERS_MEM_BOUND_DESCR" class="tooltipped">Persistent Memory Stalls</h4><span data-id="PERS_MEM_BOUND_PERC" data-tip-id="PERS_MEM_BOUND_ISSUE"></span><h4 data-id="PERS_MEM_BANDWIDTH_TITLE" data-tip-id="PERS_MEM_BANDWIDTH_DESCR" class="tooltipped">Persistent Memory Bandwidth</h4><span data-id="PERS_MEM_BANDWIDTH_EXTRA">AVG </span><span data-id="PERS_MEM_BANDWIDTH_VAL" data-tip-id="PERS_MEM_BANDWIDTH_ISSUE"></span><span data-id="PERS_MEM_BANDWIDTH_UNIT"></span><h4 data-id="DRAM_HIT_RATIO_TITLE">DRAM Cache Hit Ratio</h4><span data-id="DRAM_HIT_RATIO_PERC"></span><div class="indent"><h4 data-id="DRAM_CACHE_HIT_TITLE">DRAM Cache Hits</h4><span data-id="DRAM_CACHE_HIT_VAL"></span><h4 data-id="DRAM_CACHE_MISS_TITLE">DRAM Cache Misses</h4><span data-id="DRAM_CACHE_MISS_VAL"></span></div><h4 data-id="MCDRAM_BANDWIDTH_TITLE" data-tip-id="MCDRAM_BANDWIDTH_DESCR" class="tooltipped">MCDRAM Bandwidth</h4><span data-id="MCDRAM_BANDWIDTH_EXTRA">AVG </span><span data-id="MCDRAM_BANDWIDTH_VAL" data-tip-id="MCDRAM_BANDWIDTH_VAL_DESCR" class="tooltipped">Not Available</span><span data-id="MCDRAM_BANDWIDTH_UNIT"></span><h4 data-id="NUMA_USE_TITLE" data-tip-id="NUMA_USE_DESCR" class="tooltipped">NUMA</h4><span data-id="NUMA_USE_PERC" data-tip-id="NUMA_USE_ISSUE"></span><span data-id="NUMA_USE_EXTRA"> of remote accesses</span></div></article><article data-id="BACK_END_BOUND"><h3 data-id="BACK_END_BOUND_TITLE" data-tip-id="BACK_END_BOUND_DESCR" class="tooltipped">Back-End Stalls</h3><span data-id="BACK_END_BOUND_PERC" data-tip-id="BACK_END_BOUND_ISSUE"></span> of pipeline slots<div class="indent"><h4 data-tip-id="L2_HIT_BOUND_DESCR" class="tooltipped">L2 Hit Bound</h4><span data-id="L2_HIT_BOUND_PERC" data-tip-id="L2_HIT_BOUND_ISSUE"></span> of cycles<h4 data-tip-id="L2_MISS_BOUND_DESCR" class="tooltipped">L2 Miss Bound</h4><span data-id="L2_MISS_BOUND_PERC" data-tip-id="L2_MISS_BOUND_ISSUE"></span> of cycles<h4 data-id="DRAM_BANDWIDTH_TITLE" data-tip-id="DRAM_BANDWIDTH_DESCR" class="tooltipped">DRAM Bandwidth</h4><span data-id="DRAM_BANDWIDTH_EXTRA">AVG </span><span data-id="DRAM_BANDWIDTH_VAL" data-tip-id="DRAM_BANDWIDTH_VAL_DESCR" class="tooltipped">Not Available</span><span data-id="DRAM_BANDWIDTH_UNIT"></span><h4 data-id="MCDRAM_BANDWIDTH_TITLE" data-tip-id="MCDRAM_BANDWIDTH_DESCR" class="tooltipped">MCDRAM Bandwidth</h4><span data-id="MCDRAM_BANDWIDTH_EXTRA">AVG </span><span data-id="MCDRAM_BANDWIDTH_VAL" data-tip-id="MCDRAM_BANDWIDTH_VAL_DESCR" class="tooltipped">Not Available</span><span data-id="MCDRAM_BANDWIDTH_UNIT"></span></div></article><article data-id="VECTORIZATION"><h3 data-id="VECTORIZATION_TITLE" data-tip-id="VECTORIZATION_DESCR" class="tooltipped">Vectorization</h3><span data-id="VECTORIZATION_PERC" data-tip-id="VECTORIZATION_ISSUE"></span><span data-id="VECTORIZATION_TXT">of Packed FP Operations</span><div class="indent"><h4 data-id="VECTORIZATION_DATA">Instruction Mix:</h4><h4 data-id="SP_FLOPS" data-tip-id="SP_FLOPS_DESCR" class="tooltipped">SP FLOPs</h4><span data-id="SP_FLOPS_DATA"><span data-id="SP_FLOPS_PERC" data-tip-id="SP_FLOPS_ISSUE"></span><span> of uOps</span></span><br><span data-id="SP_PACKED"><span data-tip-id="SP_PACKED_DESCR" class="tooltipped">Packed</span>: <span data-id="SP_PACKED_PERC"></span><span> from SP FP</span></span><div data-id="SP_FLOPS_BITS" class="indent"><span data-id="SP_128_BIT"><span data-tip-id="SP_128_BIT_DESCR" class="tooltipped">128-bit</span>: <span data-id="SP_128_BIT_PERC" data-tip-id="SP_128_BIT_ISSUE"></span></span><br><span data-id="SP_256_BIT"><span data-tip-id="SP_256_BIT_DESCR" class="tooltipped">256-bit</span>: <span data-id="SP_256_BIT_PERC" data-tip-id="SP_256_BIT_ISSUE"></span></span><br><span data-id="SP_512_BIT"><span data-tip-id="SP_512_BIT_DESCR" class="tooltipped">512-bit</span>: <span data-id="SP_512_BIT_PERC"></span></span></div><span data-id="SP_SCALAR"><span data-tip-id="SP_SCALAR_DESCR" class="tooltipped">Scalar</span>: <span data-id="SP_SCALAR_PERC" data-tip-id="SP_SCALAR_ISSUE"></span><span> from SP FP</span></span><h4 data-id="DP_FLOPS" data-tip-id="DP_FLOPS_DESCR" class="tooltipped">DP FLOPs</h4><span data-id="DP_FLOPS_DATA"><span data-id="DP_FLOPS_PERC" data-tip-id="DP_FLOPS_ISSUE"></span><span> of uOps</span></span><br><span data-id="DP_PACKED"><span data-tip-id="DP_PACKED_DESCR" class="tooltipped">Packed</span>: <span data-id="DP_PACKED_PERC"></span><span> from DP FP</span></span><div data-id="DP_FLOPS_BITS" class="indent"><span data-id="DP_128_BIT"><span data-tip-id="DP_128_BIT_DESCR" class="tooltipped">128-bit</span>: <span data-id="DP_128_BIT_PERC" data-tip-id="DP_128_BIT_ISSUE"></span></span><br><span data-id="DP_256_BIT"><span data-tip-id="DP_256_BIT_DESCR" class="tooltipped">256-bit</span>: <span data-id="DP_256_BIT_PERC" data-tip-id="DP_256_BIT_ISSUE"></span></span><br><span data-id="DP_512_BIT"><span data-tip-id="DP_512_BIT_DESCR" class="tooltipped">512-bit</span>: <span data-id="DP_512_BIT_PERC"></span></span></div><span data-id="DP_SCALAR"><span data-tip-id="DP_SCALAR_DESCR" class="tooltipped">Scalar</span>: <span data-id="DP_SCALAR_PERC" data-tip-id="DP_SCALAR_ISSUE"></span><span> from DP FP</span></span><h4 data-id="X87" data-tip-id="X87_DESCR" class="tooltipped">x87 FLOPs</h4><span data-id="X87_DATA"><span data-id="X87_PERC" data-tip-id="X87_ISSUE"></span><span> of uOps</span></span><h4 data-id="NON_FP" data-tip-id="NON_FP_DESCR" class="tooltipped">Non-FP</h4><span data-id="NON_FP_DATA"><span data-id="NON_FP_PERC" data-tip-id="NON_FP_ISSUE"></span><span> of uOps</span></span><h4 data-id="FP_ARITH_MEM_RD" data-tip-id="FP_ARITH_MEM_RD_DESCR" class="tooltipped">FP Arith/Mem Rd Instr. Ratio</h4><span data-id="FP_ARITH_MEM_RD_VAL" data-tip-id="FP_ARITH_MEM_RD_ISSUE"></span><h4 data-id="FP_ARITH_MEM_WR" data-tip-id="FP_ARITH_MEM_WR_DESCR" class="tooltipped">FP Arith/Mem Wr Instr. Ratio</h4><span data-id="FP_ARITH_MEM_WR_VAL" data-tip-id="FP_ARITH_MEM_WR_ISSUE"></span></div></article><article data-id="SIMD_PER_CYCLE"><h3 data-id="SIMD_PER_CYCLE_TITLE" data-tip-id="SIMD_PER_CYCLE_DESCR" class="tooltipped">SIMD Instr. per Cycle</h3><span data-id="SIMD_PER_CYCLE_VAL" data-tip-id="SIMD_PER_CYCLE_ISSUE"></span><div class="indent"><h4>FP Instruction Mix</h4><div class="indent">% of <span data-tip-id="PACKED_SIMD_DESCR" class="tooltipped">Packed SIMD Instr.</span>: <span data-id="PACKED_SIMD_PERC"></span><br>% of <span data-tip-id="SCALAR_SIMD_DESCR" class="tooltipped">Scalar SIMD Instr.</span>: <span data-id="SCALAR_SIMD_PERC" data-tip-id="SCALAR_SIMD_ISSUE"></span></div></div></article><article data-id="IO"><h3 data-id="IO_TITLE" data-tip-id="IOWAIT_USEC_DESCR" class="tooltipped">I/O Bound</h3><div data-id="IO_TEXT_BLOCK">These metrics are not available for Pcontrol.</div><div data-id="IO_DATA_BLOCK"><span data-id="IOWAIT_USEC_PERC"></span><br>(AVG <span data-id="IOWAIT_USEC_AVG_VAL"></span>, <a class="tooltipped" data-tip-id="IOWAIT_USEC_MAX_RANK">PEAK</a> <span data-id="IOWAIT_USEC_MAX_VAL"></span>)<div class="indent"><h4 data-tip-id="READ_BYTES_DESCR" class="tooltipped">Read</h4>AVG <span data-id="READ_BYTES_AVG_VAL"></span>, <a class="tooltipped" data-tip-id="READ_BYTES_MAX_RANK">MAX</a> <span data-id="READ_BYTES_MAX_VAL"></span><h4 data-tip-id="WRITTEN_BYTES_DESCR" class="tooltipped">Write</h4>AVG <span data-id="WRITTEN_BYTES_AVG_VAL"></span>, <a class="tooltipped" data-tip-id="WRITTEN_BYTES_MAX_RANK">MAX</a> <span data-id="WRITTEN_BYTES_MAX_VAL"></span></div></div></article><article data-id="MEMORY"><h3 data-tip-id="MEM_DESCR" class="tooltipped">Memory Footprint</h3><div data-id="MEMORY_TEXT_BLOCK">These metrics are not available for Pcontrol.</div><div data-id="MEMORY_DATA_BLOCK"><div data-id="RSS_APP">Resident total: <span data-id="RSS_VAL"></span></div><table><tr><th>Resident</th><th>PEAK</th><th>AVG</th></tr><tr><td>Per node:</td><td><span data-id="PEAK_RSS_PER_NODE_VAL" class="tooltipped" data-tip-id="PEAK_RSS_NODE"></span></td><td><span data-id="MEAN_RSS_PER_NODE_VAL"></span></td></tr><tr><td>Per rank:</td><td class="cell"><span data-id="PEAK_RSS_VAL" class="tooltipped" data-tip-id="PEAK_RSS_RANK"></span></td><td class="cell"><span data-id="MEAN_RSS_VAL"></span></td></tr></table><div data-id="MEMORY_APP">Virtual total: <span data-id="MEM_VAL"></span></div><table><tr><th>Virtual</th><th>PEAK</th><th>AVG</th></tr><tr><td>Per node:</td><td><span data-id="PEAK_MEM_PER_NODE_VAL" class="tooltipped" data-tip-id="PEAK_MEM_NODE"></span></td><td><span data-id="MEAN_MEM_PER_NODE_VAL"></span></td></tr><tr><td>Per rank:</td><td class="cell"><span data-id="PEAK_MEM_VAL" class="tooltipped" data-tip-id="PEAK_MEM_RANK"></span></td><td class="cell"><span data-id="MEAN_MEM_VAL"></span></td></tr></table></div></article></div></div></div><div class="tip"><div data-id="current_run">Metric <b>value</b> collected during the application profiling run.</div><div data-id="target">Metric threshold used to indicate possible performance issues. Threshold values are fixed and may not accurately reflect the nature of your application.</div><div data-id="delta">Visual representation of the current run value compared to the target threshold. The Delta is set to zero if the current run value is within the target threshold.</div><div data-id="WALLCLOCK_DESCR"></div><div data-id="SPGFLOPS_DESCR"></div><div data-id="DPGFLOPS_DESCR"></div><div data-id="X87GFLOPS_DESCR"></div><div data-id="AVG_FREQUENCY_DESCR"></div><div data-id="CPI_RATE_MIN_NODE"></div><div data-id="CPI_RATE_MAX_NODE"></div><div data-id="CPI_RATE_DESCR"></div><div data-id="CPI_RATE_ISSUE"></div><div data-id="MEM_DESCR"></div><div data-id="RSS_DESCR"></div><div data-id="MEMORY_BOUND_MIN_NODE"></div><div data-id="MEMORY_BOUND_MAX_NODE"></div><div data-id="READ_BYTES_MAX_RANK"></div><div data-id="WRITTEN_BYTES_MAX_RANK"></div><div data-id="IOWAIT_USEC_MAX_RANK"></div><div data-id="IOWAIT_USEC_DESCR"></div><div data-id="PEAK_MEM_RANK"></div><div data-id="PEAK_MEM_NODE"></div><div data-id="PEAK_RSS_RANK"></div><div data-id="PEAK_RSS_NODE"></div><div data-id="READ_BYTES_DESCR"></div><div data-id="WRITTEN_BYTES_DESCR"></div><div data-id="MPI_TIME_DESCR"></div><div data-id="MPI_TIME_ISSUE"></div><div data-id="MPI_IMBALANCE_DESCR"></div><div data-id="MPI_IMBALANCE_ISSUE"></div><div data-id="OMP_IMBALANCE_TIME_ISSUE"></div><div data-id="OMP_IMBALANCE_TIME_DESCR"></div><div data-id="OMP_SERIAL_TIME_ISSUE"></div><div data-id="OMP_SERIAL_TIME_DESCR"></div><div data-id="MEMORY_BOUND_DESCR"></div><div data-id="MEMORY_BOUND_ISSUE"></div><div data-id="VECTORIZATION_DESCR"></div><div data-id="VECTORIZATION_ISSUE"></div><div data-id="FLOPS_PER_CYCLE_DESCR"></div><div data-id="FLOPS_PER_CYCLE_ISSUE"></div><div data-id="X87_DESCR"></div><div data-id="X87_ISSUE"></div><div data-id="NON_FP_DESCR"></div><div data-id="NON_FP_ISSUE"></div><div data-id="DP_FLOPS_DESCR"></div><div data-id="DP_PACKED_DESCR"></div><div data-id="DP_128_BIT_DESCR"></div><div data-id="DP_128_BIT_ISSUE"></div><div data-id="DP_256_BIT_DESCR"></div><div data-id="DP_256_BIT_ISSUE"></div><div data-id="DP_512_BIT_DESCR"></div><div data-id="DP_SCALAR_DESCR"></div><div data-id="DP_SCALAR_ISSUE"></div><div data-id="SP_FLOPS_DESCR"></div><div data-id="SP_PACKED_DESCR"></div><div data-id="SP_128_BIT_DESCR"></div><div data-id="SP_128_BIT_ISSUE"></div><div data-id="SP_256_BIT_DESCR"></div><div data-id="SP_256_BIT_ISSUE"></div><div data-id="SP_512_BIT_DESCR"></div><div data-id="SP_SCALAR_DESCR"></div><div data-id="SP_SCALAR_ISSUE"></div><div data-id="FP_ARITH_MEM_RD_DESCR"></div><div data-id="FP_ARITH_MEM_RD_ISSUE"></div><div data-id="FP_ARITH_MEM_WR_DESCR"></div><div data-id="FP_ARITH_MEM_WR_ISSUE"></div><div data-id="CACHE_BOUND_DESCR"></div><div data-id="CACHE_BOUND_ISSUE"></div><div data-id="DRAM_BOUND_DESCR"></div><div data-id="DRAM_BOUND_ISSUE"></div><div data-id="PERS_MEM_BOUND_DESCR"></div><div data-id="PERS_MEM_BOUND_ISSUE"></div><div data-id="NUMA_USE_DESCR"></div><div data-id="NUMA_USE_ISSUE"></div><div data-id="CPU_USAGE_DESCR"></div><div data-id="CPU_USAGE_ISSUE"></div><div data-id="PHYSICAL_CORE_USAGE_DESCR"></div><div data-id="PHYSICAL_CORE_USAGE_ISSUE"></div><div data-id="AVERAGE_CPU_USE_DESCR"></div><div data-id="BACK_END_BOUND_DESCR"></div><div data-id="BACK_END_BOUND_ISSUE"></div><div data-id="L2_HIT_BOUND_DESCR"></div><div data-id="L2_MISS_BOUND_DESCR"></div><div data-id="L2_HIT_BOUND_ISSUE"></div><div data-id="L2_MISS_BOUND_ISSUE"></div><div data-id="SIMD_PER_CYCLE_DESCR"></div><div data-id="SIMD_PER_CYCLE_ISSUE"></div><div data-id="PACKED_SIMD_DESCR"></div><div data-id="SCALAR_SIMD_DESCR"></div><div data-id="SCALAR_SIMD_ISSUE"></div><div data-id="FLOPS_PER_CYCLE_ISSUE"></div><div data-id="DRAM_BANDWIDTH_DESCR">Average amount of data transferred through DRAM memory controller per second.</div><div data-id="MCDRAM_BANDWIDTH_DESCR">Average amount of data transferred through MCDRAM memory controller per second.</div><div data-id="DRAM_BANDWIDTH_VAL_DESCR">Data for this metric is not collected since it requires system-wide performance monitoring. Make sure the sampling driver is properly installed on your system: <a href="https://software.intel.com/en-us/vtune-amplifier-help-sep-driver">https://software.intel.com/en-us/vtune-amplifier-help-sep-driver</a>. Otherwise, enable a driverless Perf-based sampling collection by setting the /proc/sys/kernel/perf_even_paranoid value to 0 or less.</div><div data-id="MCDRAM_BANDWIDTH_VAL_DESCR">Data for this metric is not collected since it requires system-wide performance monitoring. Make sure the sampling driver is properly installed on your system: <a href="https://software.intel.com/en-us/vtune-amplifier-help-sep-driver">https://software.intel.com/en-us/vtune-amplifier-help-sep-driver</a>. Otherwise, enable a driverless Perf-based sampling collection by setting the /proc/sys/kernel/perf_even_paranoid value to 0 or less.</div><div data-id="PERS_MEM_BANDWIDTH_DESCR"></div><div data-id="MCDRAM_BANDWIDTH_ISSUE"></div><div data-id="DRAM_BANDWIDTH_ISSUE"></div><div data-id="PERS_MEM_BANDWIDTH_ISSUE"></div><div data-id="BANDWIDTH_AVG_OUT_DESCR"></div><div data-id="BANDWIDTH_AVG_IN_DESCR"></div><div data-id="BANDWIDTH_AVG_OUT_ISSUE"></div><div data-id="BANDWIDTH_AVG_IN_ISSUE"></div><div data-id="PACKRATE_AVG_OUT_DESCR"></div><div data-id="PACKRATE_AVG_IN_DESCR"></div><div data-id="PACKRATE_AVG_OUT_ISSUE"></div><div data-id="PACKRATE_AVG_IN_ISSUE"></div><div data-id="MPI_PERC_DESCR">Percentage from Elapsed Time</div></div><div class="not-supported-browser">Intel<sup>®</sup> MPI Performance Snapshot report cannot be opened with the current browser. Use any of these supported browsers:<ul><li>Mozilla Firefox* version 17 or later</li><li>Safari* version 8 or later</li><li>Microsoft Internet Explorer* version 11 or later</li><li>Microsoft Edge* version 12 or later</li><li>Google Chrome* version 40 or later</li></ul></div></body></html> \ No newline at end of file diff --git a/report/day1/benchmark-fast/swift_mpi_1p_1n_2021-01-21_15-42.txt b/report/day1/benchmark-fast/swift_mpi_1p_1n_2021-01-21_15-42.txt deleted file mode 100644 index 090ffad..0000000 --- a/report/day1/benchmark-fast/swift_mpi_1p_1n_2021-01-21_15-42.txt +++ /dev/null @@ -1,64 +0,0 @@ -Command: /cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim/examples/swift_mpi --hydro -v 1 --threads=64 -n 10000 -P Restarts:enable:0 sodShock.yml -Resources: 1 node (32 physical, 64 logical cores per node) -Memory: 503 GiB per node -Tasks: 1 process -Machine: b115.pri.cosma7.alces.network -Start time: Thu Jan 21 15:42:27 2021 -Total time: 51 seconds -Full path: /cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim/examples - -Summary: swift_mpi is Compute-bound in this configuration -Compute: 96.8% |=========| -MPI: 0.5% || -I/O: 2.8% || -This application run was Compute-bound. A breakdown of this time and advice for investigating further is in the CPU section below. -As very little time is spent in MPI calls, this code may also benefit from running at larger scales. - -CPU: -A breakdown of the 96.8% CPU time: -Scalar numeric ops: 24.3% |=| -Vector numeric ops: <0.1% || -Memory accesses: 61.8% |=====| -The per-core performance is memory-bound. Use a profiler to identify time-consuming loops and check their cache performance. -No time is spent in vectorized instructions. Check the compiler's vectorization advice to see why key loops could not be vectorized. - -MPI: -A breakdown of the 0.5% MPI time: -Time in collective calls: 100.0% |=========| -Time in point-to-point calls: 0.0% | -Effective process collective rate: 919 kB/s -Effective process point-to-point rate: 0.00 bytes/s - -I/O: -A breakdown of the 2.8% I/O time: -Time in reads: 16.7% |=| -Time in writes: 83.3% |=======| -Effective process read rate: 143 MB/s -Effective process write rate: 53.9 MB/s -Most of the time is spent in write operations with a low effective transfer rate. This may be caused by contention for the filesystem or inefficient access patterns. Use an I/O profiler to investigate which write calls are affected. - -Threads: -A breakdown of how multiple threads were used: -Computation: 53.7% |====| -Synchronization: 46.3% |====| -Physical core utilization: 197.3% |===================| -System load: 140.2% |=============| -Significant time is spent synchronizing threads. Check which locks cause the most overhead with a profiler. -Multiple threads per physical core may be causing contention. Using fewer threads may improve performance for some codes. - -Memory: -Per-process memory usage may also affect scaling: -Mean process memory usage: 394 MiB -Peak process memory usage: 442 MiB -Peak node memory usage: 0.0% | -Node memory usage metrics: - -Energy: -A breakdown of how energy was used: -CPU: not supported -System: not supported -Mean node power: not supported -Peak node power: 0.00 W -Energy metrics are not available on this system. -CPU metrics are not supported (no intel_rapl module) - diff --git a/report/day1/benchmark-fast/swift_mpi_1p_1n_2021-01-21_15-42.html b/report/day1/benchmark-fast/swift_mpi_2p_2n_2021-01-21_16-09.html similarity index 99% rename from report/day1/benchmark-fast/swift_mpi_1p_1n_2021-01-21_15-42.html rename to report/day1/benchmark-fast/swift_mpi_2p_2n_2021-01-21_16-09.html index fdc4a04..68e5ea0 100644 --- a/report/day1/benchmark-fast/swift_mpi_1p_1n_2021-01-21_15-42.html +++ b/report/day1/benchmark-fast/swift_mpi_2p_2n_2021-01-21_16-09.html @@ -730,9 +730,9 @@ Cs.call=ma.call,Cs.empty=ma.empty,Cs.node=ma.node,Cs.size=ma.size,$o.transition= }, "data": { "applicationDetails": { - "commandLine": "/cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim/examples/swift_mpi --hydro -v 1 --threads=64 -n 10000 -P Restarts:enable:0 sodShock.yml", + "commandLine": "/cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim_debug/examples/swift_mpi --hydro -v 1 --threads=64 -n 10000 -P Restarts:enable:0 sodShock.yml", "exeName": "swift_mpi", - "exePath": "/cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim/examples", + "exePath": "/cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim_debug/examples", "gpusPerNode": { "plain": { "precision": 0, @@ -751,37 +751,37 @@ Cs.call=ma.call,Cs.empty=ma.empty,Cs.node=ma.node,Cs.size=ma.size,$o.transition= "hostMemory": { "plain": { "precision": 0, - "value": 540515954688 + "value": 540515952640 } }, - "hostname": "b115.pri.cosma7.alces.network", + "hostname": "b107.pri.cosma7.alces.network", "inputFile": "", "isOpenMp": false, "logicalCoresPerNode": 64, "nodes": { - "plain": 1 + "plain": 2 }, "notes": "", "ompNumThreads": "0", "physicalCoresPerNode": 32, "processes": { - "plain": 1 + "plain": 2 }, "profiledRanks": "", - "startDate": "Thu Jan 21 15:42:27 2021", + "startDate": "Thu Jan 21 16:09:31 2021", "time": { - "plain": 51 + "plain": 167 } }, "cpu": { "advice1": "The per-core performance is <span class=\"cpu.memPercent\">memory-bound</span>. Use a profiler to identify time-consuming loops and check their cache performance.", "advice2": "No time is spent in <span class=\"cpu.vectorNumPercent\">vectorized instructions</span>. Check the compiler's vectorization advice to see why key loops could not be vectorized.", - "memPercent": 61.81853506264091, + "memPercent": 97.50226666666389, "openMpPercent": 0, - "scalarNumPercent": 24.34741734942139, + "scalarNumPercent": 2.4901118977976235, "showWaitingAcc": false, "singleCorePercent": 100, - "vectorNumPercent": 0.04209156941917592, + "vectorNumPercent": 0.007621435538478816, "waitingAccPercent": 0 }, "cuda": { @@ -807,29 +807,29 @@ Cs.call=ma.call,Cs.empty=ma.empty,Cs.node=ma.node,Cs.size=ma.size,$o.transition= } }, "io": { - "advice1": "Most of the time is spent in <span class=\"io.writePercent\">write operations</span> with <span class=\"io.writeRate\">a low</span> effective transfer rate. This may be caused by contention for the filesystem or inefficient access patterns. Use an I/O profiler to investigate which write calls are affected.", + "advice1": "Most of the time is spent in <span class=\"io.writePercent\">write operations</span> with <span class=\"io.writeRate\">a very high</span> effective transfer rate. This suggests asynchronous file operations are successfully reducing application time spent waiting. To improve further, try reducing the amount of data transfer required.", "advice2": "", "rateError": "", - "readPercent": 16.666666666666664, - "readRate": 142614217.66372672, - "writePercent": 83.33333333333333, - "writeRate": 53864243.349163905 + "readPercent": 37.5, + "readRate": 33301428491.31674, + "writePercent": 62.5, + "writeRate": 20032362396.336964 }, "memory": { - "advice1": "", + "advice1": "The <span class=\"memory.nodePeakPercent\">peak node memory usage</span> is very low. Running with fewer MPI processes and more data on each process may be more efficient.", "advice2": "", - "error": "Node memory usage metrics: ", - "mean": 413324667.2592593, - "nodePeakPercent": 0, - "peak": 463282176 + "error": "", + "mean": 365776340.8192771, + "nodePeakPercent": 1, + "peak": 502489088 }, "mpi": { - "advice1": "", + "advice1": "Most of the time is spent in <span class=\"mpi.collectivePercent\">collective calls</span> with <span class=\"mpi.collectiveRate\">a very low</span> transfer rate. This suggests load imbalance is causing synchronization overhead; use an MPI profiler to investigate.", "advice2": "", - "collectivePercent": 100, - "collectiveRate": 918598.6144589073, - "p2pPercent": 0, - "p2pRate": 0, + "collectivePercent": 67.50000000000001, + "collectiveRate": 9670388.960290618, + "p2pPercent": 32.50000000000001, + "p2pRate": 183613221.16686425, "rateError": "" }, "openmp": { @@ -857,24 +857,24 @@ Cs.call=ma.call,Cs.empty=ma.empty,Cs.node=ma.node,Cs.size=ma.size,$o.transition= }, "cpu": { "advice": "This is <span class=\"summary_class\">very high</span>; check the CPU performance section for advice", - "percent": 96.75925925925927 + "percent": 95.18072289156626 }, "io": { "advice": "This is <span class=\"summary_class\">very low</span>; however single-process I/O may cause MPI wait times", - "percent": 2.7777777777777777 + "percent": 1.8072289156626506 }, "mpi": { "advice": "This is <span class=\"summary_class\">very low</span>; this code may benefit from a higher process count", - "percent": 0.46296296296296297 + "percent": 3.0120481927710845 }, "systemLoadAvgError": "", - "systemLoadPercent": 140.2054398148148 + "systemLoadPercent": 21.81617093373494 }, "report": { "name": "com.allinea.performancereports.default", "src": { "definition": "com.allinea.performancereports.default (file:///cosma/local/arm/ddt/20.2.1/performance-reports/templates/report-default.xml)\n", - "values": "allinea:/swift_mpi_1p_1n_0t_2021-01-21_15-42" + "values": "allinea:/swift_mpi_2p_2n_0t_2021-01-21_16-09" }, "version": { "api": 1, @@ -883,10 +883,10 @@ Cs.call=ma.call,Cs.empty=ma.empty,Cs.node=ma.node,Cs.size=ma.size,$o.transition= }, "threads": { "advice1": "Significant time is spent <span class=\"threads.syncPercent\">synchronizing</span> threads. Check which locks cause the most overhead with a profiler.", - "advice2": "Multiple <span class=\"threads.physicalCoreUtilizationPercent\">threads per physical core</span> may be causing contention. Using fewer threads may improve performance for some codes.", - "computePercent": 53.728166740055784, - "physicalCoreUtilizationPercent": 197.265625, - "syncPercent": 46.27183325994422 + "advice2": "This may be a sign of overly fine-grained parallelism or of workload imbalance between threads.", + "computePercent": 6.434373918955995, + "physicalCoreUtilizationPercent": 197.77626129518075, + "syncPercent": 93.56562608104402 }, "version": 4 }, diff --git a/report/day1/benchmark-fast/swift_mpi_2p_2n_2021-01-21_16-09.txt b/report/day1/benchmark-fast/swift_mpi_2p_2n_2021-01-21_16-09.txt new file mode 100644 index 0000000..6f74bd1 --- /dev/null +++ b/report/day1/benchmark-fast/swift_mpi_2p_2n_2021-01-21_16-09.txt @@ -0,0 +1,65 @@ +Command: /cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim_debug/examples/swift_mpi --hydro -v 1 --threads=64 -n 10000 -P Restarts:enable:0 sodShock.yml +Resources: 2 nodes (32 physical, 64 logical cores per node) +Memory: 503 GiB per node +Tasks: 2 processes +Machine: b107.pri.cosma7.alces.network +Start time: Thu Jan 21 16:09:31 2021 +Total time: 167 seconds (about 3 minutes) +Full path: /cosma/home/ds007/dc-fraw1/performance_analysis_workshop/swift-cs-performance-workshop-2021/benchmark-fast/swiftsim_debug/examples + +Summary: swift_mpi is Compute-bound in this configuration +Compute: 95.2% |=========| +MPI: 3.0% || +I/O: 1.8% || +This application run was Compute-bound. A breakdown of this time and advice for investigating further is in the CPU section below. +As very little time is spent in MPI calls, this code may also benefit from running at larger scales. + +CPU: +A breakdown of the 95.2% CPU time: +Scalar numeric ops: 2.5% || +Vector numeric ops: <0.1% || +Memory accesses: 97.5% |=========| +The per-core performance is memory-bound. Use a profiler to identify time-consuming loops and check their cache performance. +No time is spent in vectorized instructions. Check the compiler's vectorization advice to see why key loops could not be vectorized. + +MPI: +A breakdown of the 3.0% MPI time: +Time in collective calls: 67.5% |======| +Time in point-to-point calls: 32.5% |==| +Effective process collective rate: 9.67 MB/s +Effective process point-to-point rate: 184 MB/s +Most of the time is spent in collective calls with a very low transfer rate. This suggests load imbalance is causing synchronization overhead; use an MPI profiler to investigate. + +I/O: +A breakdown of the 1.8% I/O time: +Time in reads: 37.5% |===| +Time in writes: 62.5% |=====| +Effective process read rate: 33.3 GB/s +Effective process write rate: 20.0 GB/s +Most of the time is spent in write operations with a very high effective transfer rate. This suggests asynchronous file operations are successfully reducing application time spent waiting. To improve further, try reducing the amount of data transfer required. + +Threads: +A breakdown of how multiple threads were used: +Computation: 6.4% || +Synchronization: 93.6% |========| +Physical core utilization: 197.8% |===================| +System load: 21.8% |=| +Significant time is spent synchronizing threads. Check which locks cause the most overhead with a profiler. +This may be a sign of overly fine-grained parallelism or of workload imbalance between threads. + +Memory: +Per-process memory usage may also affect scaling: +Mean process memory usage: 349 MiB +Peak process memory usage: 479 MiB +Peak node memory usage: 1.0% || +The peak node memory usage is very low. Running with fewer MPI processes and more data on each process may be more efficient. + +Energy: +A breakdown of how energy was used: +CPU: not supported +System: not supported +Mean node power: not supported +Peak node power: 0.00 W +Energy metrics are not available on this system. +CPU metrics are not supported (no intel_rapl module) + -- GitLab