Skip to content

Commit 2d00da7

Browse files
Fix bug about determinism
1 parent 3670472 commit 2d00da7

4 files changed

Lines changed: 37 additions & 24 deletions

File tree

.github/workflows/standalone-benchmark.yml

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,28 @@ on:
1010
jobs:
1111
benchmark:
1212
runs-on: ${{ matrix.runner }}
13-
container: registry.cern.ch/alisw/slc9-gpu-builder:latest
13+
container: registry.cern.ch/alisw/slc9-gpu-builder@sha256:ea3443f9dfbc770e4b4bce0d1a9ecc0b7a7c16e9f76e416b796d170877220820
1414
strategy:
15+
fail-fast: false
1516
matrix:
1617
name: [nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
1718
include:
1819
- name: nvidia-h100
1920
runner: cern-nextgen-h100
20-
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DENABLE_OPENCL=0 -DCUDA_COMPUTETARGET=90
21-
ca_args: --gpuType CUDA --gpuDevice 0
21+
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
22+
ca_args: --gpuType CUDA #--RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-h100.par
2223
- name: nvidia-l40s
2324
runner: cern-nextgen-l40s
24-
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DENABLE_OPENCL=0 -DCUDA_COMPUTETARGET=89
25-
ca_args: --gpuType CUDA --gpuDevice 0
25+
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
26+
ca_args: --gpuType CUDA #--RTCTECHloadLaunchBoundsFromFile genGPUArch/nvidia-l40s.par
2627
- name: amd-mi300x
2728
runner: cern-nextgen-mi300x
28-
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DENABLE_OPENCL=0 -DHIP_AMDGPUTARGET=gfx942
29-
ca_args: --gpuType HIP --gpuDevice 0
29+
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
30+
ca_args: --gpuType HIP
3031
- name: amd-w7900
3132
runner: cern-nextgen-w7900
32-
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DENABLE_OPENCL=0 -DHIP_AMDGPUTARGET=gfx1100
33-
ca_args: --gpuType HIP --gpuDevice 0
33+
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
34+
ca_args: --gpuType HIP --RTCTECHloadLaunchBoundsFromFile genGPUArch/amd-w7900.par
3435

3536
name: ${{ matrix.name }}
3637
steps:
@@ -39,27 +40,41 @@ jobs:
3940

4041
- name: Build and Run
4142
run: |
42-
. ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/O2/${O2_REVISION}/etc/profile.d/init.sh
43-
export ROOT_INCLUDE_PATH=$(echo "$ROOT_INCLUDE_PATH" | cut -d: -f3-)
44-
4543
mkdir -p ${STANDALONE_DIR}
46-
curl -o /root/events.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/cuQAwSojyDrl6FR/events.tar.xz
47-
tar -xf /root/events.tar.xz -C ${STANDALONE_DIR}
48-
rm /root/events.tar.xz
44+
. ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/ninja-fortran/fortran-v1.11.1.g9-3/etc/profile.d/init.sh
45+
. ${WORK_DIR}/${ALIBUILD_ARCH_PREFIX}/O2/${O2_REVISION}/etc/profile.d/init.sh
4946
50-
cmake -B ${BUILD_DIR} ${{ matrix.cmake_args }} -DGPUCA_BUILD_EVENT_DISPLAY=0 -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
47+
cmake -B ${BUILD_DIR} ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=GPU -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
5148
cd ${BUILD_DIR}
5249
make install -j8
50+
5351
cd ${STANDALONE_DIR}
54-
${STANDALONE_DIR}/ca -e o2-simple -g ${{ matrix.ca_args }} --debug 1 > ${ARTIFACT_FILE}
55-
cat ${ARTIFACT_FILE}
52+
mkdir -p ${STANDALONE_DIR}/genGPUArch
53+
curl -v -o ${STANDALONE_DIR}/genGPUArch/${{ matrix.name }}.par https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/genGPUArch/${{ matrix.name }}.par
54+
55+
mkdir -p ${STANDALONE_DIR}/events
56+
57+
curl -v -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
58+
tar -xf ${STANDALONE_DIR}/events/50kHz.tar.xz -C ${STANDALONE_DIR}/events
59+
${STANDALONE_DIR}/ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 1 ${{ matrix.ca_args }} > ${ARTIFACT_FILE}
60+
61+
curl -v -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
62+
tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
63+
${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptSpecialCode 1 --debug 6 ${{ matrix.ca_args }}
64+
65+
curl -v -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
66+
cmp ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
67+
rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out
68+
69+
rm -rf ${STANDALONE_DIR}/events
5670
env:
5771
WORK_DIR: /cvmfs/alice.cern.ch
5872
ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages
5973
O2_REVISION: daily-20260217-0000-1
6074
STANDALONE_DIR: /root/standalone
6175
BUILD_DIR: /root/standalone/build
6276
ARTIFACT_FILE: /root/artifact.txt
77+
LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat
6378

6479
- name: Upload Artifact
6580
uses: actions/upload-artifact@v4

GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1852,7 +1852,7 @@ GPUd() void GPUTPCGMMerger::PrepareForFit1(int32_t nBlocks, int32_t nThreads, in
18521852
if (CAMath::Abs(trk.GetParam().GetQPt() * Param().qptB5Scaler) <= Param().rec.tpc.rejectQPtB5 && !trk.MergedLooper() && trk.Leg() == 0) {
18531853
weight |= attachProtect;
18541854
}
1855-
mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = weight;
1855+
CAMath::AtomicMax(&mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num], weight);
18561856
CAMath::AtomicAdd(&mSharedCount[mClusters[trk.FirstClusterRef() + j].num], 1u);
18571857
}
18581858
if (!trk.CCE() && !trk.MergedLooper()) {

GPU/GPUTracking/Standalone/cmake/config.cmake

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ set(GPUCA_CONFIG_GL3W 0)
2828
set(GPUCA_CONFIG_O2 1)
2929
set(GPUCA_BUILD_DEBUG 0)
3030
set(GPUCA_BUILD_DEBUG_SANITIZE 0)
31-
set(GPUCA_BUILD_DEBUG_HOSTONLY 0)
32-
set(GPUCA_DETERMINISTIC_MODE 0) # OFF / NO_FAST_MATH / OPTO2 / GPU / WHOLEO2
31+
set(GPUCA_DETERMINISTIC_MODE 0 CACHE STRING "GPUCA_DETERMINISTIC_MODE") # OFF / NO_FAST_MATH / OPTO2 / GPU / WHOLEO2
3332
#set(GPUCA_CUDA_GCCBIN c++-14)
3433
#set(GPUCA_OPENCL_CLANGBIN clang-20)
3534
set(HIP_AMDGPUTARGET "default" CACHE STRING "HIP_AMDGPUTARGET") # "gfx906;gfx908;gfx90a"
@@ -41,4 +40,3 @@ set(CUDA_COMPUTETARGET "default" CACHE STRING "CUDA_COMPUTETARGET") # 86 89
4140
#set(GPUCA_CONFIG_COMPILER gcc) # gcc / clang
4241
#set(GPUCA_CONFIG_WERROR 1)
4342
#add_definitions(-DGPUCA_GPU_DEBUG_PRINT)
44-
#set(GPUCA_OVERRIDE_PARAMETER_FILE "foo.csv")

dependencies/FindO2GPU.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ function(detect_gpu_arch backend) # Detect GPU architecture, optionally filterri
7676
string(TOLOWER "${HIP_FIRST_TARGET}" HIP_FIRST_TARGET)
7777
string(REGEX MATCH "....$" HIP_FIRST_TARGET_PADDED "0000${HIP_FIRST_TARGET}")
7878
if(HIP_FIRST_TARGET_PADDED STRGREATER_EQUAL "1000")
79-
set(HIP_TARGET RDNA)
79+
set(HIP_TARGET MI100)
8080
elseif(HIP_FIRST_TARGET_PADDED STRGREATER_EQUAL "090a")
81-
set(HIP_TARGET MI210)
81+
set(HIP_TARGET MI100)
8282
elseif(HIP_FIRST_TARGET_PADDED STRGREATER_EQUAL "0908")
8383
set(HIP_TARGET MI100)
8484
elseif(HIP_FIRST_TARGET_PADDED STRGREATER_EQUAL "0906")

0 commit comments

Comments
 (0)