forked from AliceO2Group/AliceO2
-
Notifications
You must be signed in to change notification settings - Fork 0
144 lines (126 loc) · 8.87 KB
/
Copy pathstandalone-benchmark.yml
File metadata and controls
144 lines (126 loc) · 8.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
name: Standalone Benchmark
on:
workflow_dispatch:
pull_request:
push:
branches:
- '**'
jobs:
benchmark:
runs-on: ${{ matrix.runner }}
container: registry.cern.ch/alisw/slc9-gpu-builder@sha256:ea3443f9dfbc770e4b4bce0d1a9ecc0b7a7c16e9f76e416b796d170877220820
strategy:
fail-fast: false
matrix:
name: [nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
include:
- name: nvidia-h100
runner: cern-nextgen-h100
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
- name: nvidia-l40s
runner: cern-nextgen-l40s
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
- name: amd-mi300x
runner: cern-nextgen-mi300x
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
- name: amd-w7900
runner: cern-nextgen-w7900
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
env:
WORK_DIR: /cvmfs/alice.cern.ch
ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages
MODULEPATH: /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
STANDALONE_DIR: /root/standalone
BENCHMARK_CSV: ${{ matrix.name }}.csv
PROFILER_CSV: results_${{ matrix.name }}.csv
TIMING_CA: ./ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --debug 1 # Add --PROCdebugMarkdown 1 --runs 42 --runsInit 2 --PROCresetTimers 1 for benchmark runs
LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat
name: ${{ matrix.name }}
steps:
- name: Checkout Repository
uses: actions/checkout@v6
- name: Download Files
run: |
mkdir -p ${STANDALONE_DIR}
curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
mkdir -p ${STANDALONE_DIR}/events
curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
curl -fL --retry 3 -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
tar -xf ${STANDALONE_DIR}/events/50kHz.tar.xz -C ${STANDALONE_DIR}/events
- name: Build Deterministic
run: &build |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
mkdir -p ${STANDALONE_DIR}
cmake -B ${STANDALONE_DIR}/build ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=${DETERMINISTIC_MODE} -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
cmake --build ${STANDALONE_DIR}/build --target install -j 8
env:
DETERMINISTIC_MODE: GPU
- name: Test GPU Track Reconstruction
run: |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
cmp ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
- name: Build Non-Deterministic
run: *build
env:
DETERMINISTIC_MODE: OFF
- name: Benchmark GPU Track Reconstruction
run: |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
${TIMING_CA} --debug 1 --runs 42 --runsInit 2 --PROCdebugMarkdown 1 --PROCresetTimers 1 --PROCdebugCSV /root/${BENCHMARK_CSV}
python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
- name: Profiler - Nsight Compute
if: ${{ matrix.name == 'nvidia-h100' }}
run: |
dnf install -y cuda-nsight-compute-13-1
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs 21 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.ncu-rep
ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --runs 21 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
- name: Profiler - Nsight Systems
if: ${{ matrix.name == 'nvidia-l40s' }}
run: |
dnf config-manager --add-repo "https://developer.download.nvidia.com/devtools/repos/rhel$(source /etc/os-release; echo ${VERSION_ID%%.*})/$(rpm --eval '%{_arch}' | sed s/aarch/arm/)/"
dnf install --nogpgcheck -y nsight-systems-cli-2026.2.1
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.nsys-rep
nsys stats --report cuda_gpu_kern_sum --timeunit usec --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV}
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
- name: Profiler - rocprofv2
if: ${{ matrix.name == 'amd-mi300x' || matrix.name == 'amd-w7900' }}
run: |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates results_${{ matrix.name }}.csv == ${PROFILER_CSV}
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
- name: Upload Artifact
uses: actions/upload-artifact@v6
with:
name: ${{ matrix.name }}-artifact
path: "/root/*.csv"
- name: Display table on GitHub web
run: |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
mkdir -p ${STANDALONE_DIR}/baseline
curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV}
curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV}
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
rm -rf ${STANDALONE_DIR}/baseline