@@ -17,23 +17,36 @@ jobs:
1717 name : [nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
1818 include :
1919 - name : nvidia-h100
20+ vendor : nvidia
2021 runner : cern-nextgen-h100
2122 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
23+ profiler : nsys profile -o nvidia-h100
24+ profiler_post : nsys stats --report cuda_gpu_kern_sum --force-export=true --format csv nvidia-h100.nsys-rep >
2225 - name : nvidia-l40s
26+ vendor : nvidia
2327 runner : cern-nextgen-l40s
2428 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
29+ profiler : nsys profile -o nvidia-l40s
30+ profiler_post : nsys stats --report cuda_gpu_kern_sum --force-export=true --format csv nvidia-l40s.nsys-rep >
2531 - name : amd-mi300x
32+ vendor : amd
2633 runner : cern-nextgen-mi300x
2734 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
35+ profiler : rocprofv2 --basenames --output-directory /root --output-file-name amd-mi300x
36+ profiler_post : touch
2837 - name : amd-w7900
38+ vendor : amd
2939 runner : cern-nextgen-w7900
3040 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
41+ profiler : rocprofv2 --basenames --output-directory /root --output-file-name amd-w7900
42+ profiler_post : touch
3143 env :
3244 WORK_DIR : /cvmfs/alice.cern.ch
3345 ALIBUILD_ARCH_PREFIX : el9-x86_64/Packages
3446 MODULEPATH : /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
3547 STANDALONE_DIR : /root/standalone
36- BENCHMARK_CSV : /root/${{ matrix.name }}.csv
48+ BENCHMARK_CSV : ${{ matrix.name }}.csv
49+ PROFILER_CSV : results_${{ matrix.name }}.csv
3750 LD_LIBRARY_PATH : /usr/local/cuda-13.0/compat
3851
3952 name : ${{ matrix.name }}
@@ -44,11 +57,18 @@ jobs:
4457 - name : Download Files
4558 run : |
4659 mkdir -p ${STANDALONE_DIR}
60+
61+ if [[ "${{ matrix.vendor }}" == "nvidia" ]]; then
62+ curl -fL --retry 3 -o ${STANDALONE_DIR}/nsys.rpm https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2026_2/NsightSystems-linux-cli-public-2026.2.1.210-3763964.rpm
63+ dnf install -y ${STANDALONE_DIR}/nsys.rpm
64+ rm -f ${STANDALONE_DIR}/nsys.rpm
65+ fi
4766
4867 curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
4968
5069 mkdir -p ${STANDALONE_DIR}/baseline
51- curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${{ matrix.name }}.csv https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${{ matrix.name }}.csv
70+ curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV}
71+ curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV}
5272
5373 mkdir -p ${STANDALONE_DIR}/events
5474 curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
@@ -87,19 +107,23 @@ jobs:
87107 source /etc/profile.d/modules.sh
88108 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
89109 cd ${STANDALONE_DIR}
90- ${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --runs 12 --debug 1 --PROCtimingCSV ${BENCHMARK_CSV}
110+ ${{ matrix.profiler }} ${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --debug 1 --runs 12 --runsInit 2 --PROCresetTimers 1 --PROCtimingCSV /root/${BENCHMARK_CSV}
111+ ${{ matrix.profiler_post }} /root/${PROFILER_CSV}
91112 rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
92113
93114 - name : Display table on GitHub web
94115 run : |
95116 source /etc/profile.d/modules.sh
96117 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
97- python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input ${BENCHMARK_CSV} --output ${BENCHMARK_CSV}
98- python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${{ matrix.name }}.csv --current ${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
118+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 12 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
119+ python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
120+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
121+ echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
122+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
99123 rm -rf ${STANDALONE_DIR}/baseline
100124
101125 - name : Upload Artifact
102126 uses : actions/upload-artifact@v6
103127 with :
104128 name : ${{ matrix.name }}-artifact
105- path : /root/${{ matrix.name }}. csv
129+ path : " /root/*. csv"
0 commit comments