From d97f18a6d08a6b8a8999c3fb35eb17689043ff4e Mon Sep 17 00:00:00 2001 From: Alexey Rivkin Date: Sun, 24 May 2026 13:17:19 +0300 Subject: [PATCH 1/3] AZP: add UCXX_tests stage to PR pipeline (CPU + GPU coverage) Wire rapidsai/ucxx tests into the UCX PR pipeline. New stage runs parallel to Coverity, gated by Static_check. Pulls rapidsai/ucxx via an Azure secondary repository checkout, runs C++ tests/benchmarks inside a derived rapidsai/ci-conda image on MLNX agents. Layout: * buildlib/dockers/rapidsai-ci-conda.Dockerfile - thin wrapper around rapidsai/ci-conda:26.06-latest that adds sudo + opens /opt/conda (the stock image fails Azure container job contracts: no sudo, and /opt/conda mode 2770 root:conda hides the conda env from the Azure-injected step user). Published as rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-conda:26.06-azp-1 multi-arch (amd64 + arm64). * buildlib/azure-pipelines-pr.yml - declare rapidsai/ucxx secondary repository checkout (Mellanox-lab endpoint, refs/heads/main). * buildlib/pr/main.yml - container resources for the CPU stage (ucxx_rapidsai_ci_conda) and the GPU stage (ucxx_rapidsai_ci_conda_gpu, +DOCKER_OPT_GPU, --user 0:0). UCXX_tests stage is invoked via a single template call. * buildlib/pr/ucxx_tests_stage.yml - one template owns the stage definition, the matrix slice list, and the per-slice job body. GPU/CPU branching uses ${{ if eq(slice.gpu, ...) }} for the job- level config (container, displayName, timeout) and bash runtime conditionals (`IS_GPU=${{ slice.gpu }}; if [ "$IS_GPU" = "True" ]`) for the body-level differences: - CPU: shims missing nvidia-smi in test_common.sh, runs gtest with UCX_TLS=tcp,sm,self and GTEST_FILTER=-RMM*.*:CCCL*.*, skips ci/test_python.sh. - GPU: shims rapids-configure-sccache (py 3.13+ sccache crashes on CMake TryCompile scratch dirs), patches python_future_task.h to include (upstream missing include, surfaces on newer libstdc++ header chains), and wraps test invocation in `sudo -E env ... CUDA_MPS_PIPE_DIRECTORY=/tmp/no-mps-here` so the MLNX host MPS daemon does not block the test client. Matrix: * CPU (mirrors upstream conda-cpp-build): cuda 12.9.1 + 13.2.0 x x86_64 + aarch64, all py 3.11. Confirmed green on Azure. * GPU (mirrors x86_64 subset of upstream conda-cpp-tests): amd64 slices on cuda 13.0.2 py 3.12 and cuda 13.2.0 py 3.13. Best-effort under current MLNX MPS+EXCLUSIVE_PROCESS environment; reliability across CUDA-only test suites is left as a Phase 1 follow-up for the UCXX team. --- buildlib/azure-pipelines-pr.yml | 8 + buildlib/dockers/rapidsai-ci-conda.Dockerfile | 11 ++ buildlib/pr/main.yml | 11 ++ buildlib/pr/ucxx_tests_stage.yml | 151 ++++++++++++++++++ 4 files changed, 181 insertions(+) create mode 100644 buildlib/dockers/rapidsai-ci-conda.Dockerfile create mode 100644 buildlib/pr/ucxx_tests_stage.yml diff --git a/buildlib/azure-pipelines-pr.yml b/buildlib/azure-pipelines-pr.yml index d446fe72ea7..44ccf60fa5f 100644 --- a/buildlib/azure-pipelines-pr.yml +++ b/buildlib/azure-pipelines-pr.yml @@ -31,5 +31,13 @@ pr: - buildlib/tools/perf_results.py - buildlib/tools/perf-common.yml +resources: + repositories: + - repository: ucxx + type: github + name: rapidsai/ucxx + endpoint: Mellanox-lab + ref: refs/heads/main + extends: template: pr/main.yml diff --git a/buildlib/dockers/rapidsai-ci-conda.Dockerfile b/buildlib/dockers/rapidsai-ci-conda.Dockerfile new file mode 100644 index 00000000000..5e38198feee --- /dev/null +++ b/buildlib/dockers/rapidsai-ci-conda.Dockerfile @@ -0,0 +1,11 @@ +# Azure Pipelines wrapper around rapidsai/ci-conda: adds sudo (Azure +# container job contract) and opens /opt/conda to the auto-created step user. + +ARG BASE_IMAGE=rapidsai/ci-conda:26.06-latest +FROM ${BASE_IMAGE} + +RUN apt-get update \ + && apt-get install -y --no-install-recommends sudo passwd \ + && rm -rf /var/lib/apt/lists/* + +RUN chmod -R o+rwX /opt/conda diff --git a/buildlib/pr/main.yml b/buildlib/pr/main.yml index a47f61052b9..9bd2ff669db 100644 --- a/buildlib/pr/main.yml +++ b/buildlib/pr/main.yml @@ -249,6 +249,15 @@ resources: - container: centos10stream image: rdmz-harbor.rdmz.labs.mlnx/hpcx/x86_64/centos10stream/builder:inbox options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) + - container: ucxx_rapidsai_ci_conda + # Thin wrapper of rapidsai/ci-conda; see buildlib/dockers/rapidsai-ci-conda.Dockerfile. + image: rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-conda:26.06-azp-1 + options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) + - container: ucxx_rapidsai_ci_conda_gpu + # Same image as above, +GPU. --user 0:0 because the host MPS daemon + # accepts root clients but rejects matching-uid container users. + image: rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-conda:26.06-azp-1 + options: --user 0:0 $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU) stages: - stage: Codestyle @@ -330,6 +339,8 @@ stages: demands: ucx_docker -equals yes container: coverity_rh7 + - template: ucxx_tests_stage.yml + - stage: Tests dependsOn: [Basic_compile] jobs: diff --git a/buildlib/pr/ucxx_tests_stage.yml b/buildlib/pr/ucxx_tests_stage.yml new file mode 100644 index 00000000000..e3eb7d5ddf5 --- /dev/null +++ b/buildlib/pr/ucxx_tests_stage.yml @@ -0,0 +1,151 @@ +parameters: + dependsOn: [Static_check] + cpu_container: ucxx_rapidsai_ci_conda + gpu_container: ucxx_rapidsai_ci_conda_gpu + slices: + # CPU coverage: mirrors rapidsai/ucxx upstream conda-cpp-build matrix. + - name: x86_64_cuda12_py311 + gpu: false + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: x86_64_cuda13_py311 + gpu: false + demands: ucx_docker -equals yes + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + - name: aarch64_cuda12_py311 + gpu: false + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: aarch64_cuda13_py311 + gpu: false + demands: ucx_arm64 + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + # GPU coverage: mirrors x86_64 subset of upstream UCXX conda-cpp-tests. + # arm64 GPU legs deferred: no MLNX arm64+GPU+gdrcopy pool, no UCX-side + # arm64 GPU demand pattern to crib from. + - name: amd64_cuda1302_py312 + gpu: true + demands: ucx_gpu_test -equals yes + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.12' + - name: amd64_cuda1322_py313 + gpu: true + demands: ucx_gpu_test -equals yes + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.13' + +stages: + - stage: UCXX_tests + dependsOn: ${{ parameters.dependsOn }} + jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_tests_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + ${{ if eq(slice.gpu, true) }}: + displayName: 'UCXX GPU tests (${{ slice.name }})' + container: ${{ parameters.gpu_container }} + timeoutInMinutes: 120 + ${{ if eq(slice.gpu, false) }}: + displayName: 'UCXX tests (${{ slice.name }})' + container: ${{ parameters.cpu_container }} + timeoutInMinutes: 90 + + steps: + - checkout: self + clean: true + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + clean: true + path: bindings/ucxx + retryCountOnTaskFailure: 5 + + - bash: | + set -eE + IS_GPU=${{ slice.gpu }} + echo "=== id: $(id), arch: $(uname -m), gpu: $IS_GPU ===" + which python && python --version + if [ "$IS_GPU" = "True" ]; then nvidia-smi || true; fi + + export RAPIDS_CUDA_VERSION=${{ slice.rapids_cuda_version }} + export RAPIDS_PY_VERSION=${{ slice.rapids_py_version }} + export RAPIDS_CONDA_BLD_OUTPUT_DIR=/tmp/conda-bld-output + mkdir -p "$RAPIDS_CONDA_BLD_OUTPUT_DIR" + + mkdir -p "$HOME/.local/bin" + for tool in rapids-download-conda-from-github rapids-download-from-github; do + printf '#!/bin/bash\necho "%s"\n' "$RAPIDS_CONDA_BLD_OUTPUT_DIR" > "$HOME/.local/bin/$tool" + chmod +x "$HOME/.local/bin/$tool" + done + + if [ "$IS_GPU" = "True" ]; then + # py 3.13+ sccache version crashes on CMake TryCompile dirs. + cat > "$HOME/.local/bin/rapids-configure-sccache" <<'EOF' + #!/bin/bash + export CMAKE_C_COMPILER_LAUNCHER= + export CMAKE_CXX_COMPILER_LAUNCHER= + export CMAKE_CUDA_COMPILER_LAUNCHER= + export RUSTC_WRAPPER= + EOF + chmod +x "$HOME/.local/bin/rapids-configure-sccache" + fi + export PATH="$HOME/.local/bin:$PATH" + + cd $(Build.SourcesDirectory)/../bindings/ucxx + + if [ "$IS_GPU" = "False" ]; then + # No --gpus; tolerate missing nvidia-smi in test_common.sh. + sed -i 's#^ nvidia-smi$# command -v nvidia-smi >/dev/null \&\& nvidia-smi || echo "(no GPU - CPU-runtime UCXX slice)"#' ci/test_common.sh + else + # Upstream UCXX header missing #include ; only + # bites on newer libstdc++ header chains. + hdr=python/ucxx/ucxx/examples/python_future_task.h + grep -q "include " "$hdr" || sed -i '/^#pragma once/a #include ' "$hdr" + fi + + bash ci/build_cpp.sh + bash ci/build_python.sh + displayName: Build UCXX + + - bash: | + set -eE + IS_GPU=${{ slice.gpu }} + export RAPIDS_CUDA_VERSION=${{ slice.rapids_cuda_version }} + export RAPIDS_PY_VERSION=${{ slice.rapids_py_version }} + export RAPIDS_CONDA_BLD_OUTPUT_DIR=/tmp/conda-bld-output + export PATH="$HOME/.local/bin:$PATH" + + cd $(Build.SourcesDirectory)/../bindings/ucxx + + if [ "$IS_GPU" = "True" ]; then + # Azure docker-execs as svc user regardless of `--user 0:0`. + # Host MPS daemon rejects non-root CUDA clients; wrap in + # sudo + bypass MPS via CUDA_MPS_PIPE_DIRECTORY. + sudo -E env \ + RAPIDS_CUDA_VERSION="$RAPIDS_CUDA_VERSION" \ + RAPIDS_PY_VERSION="$RAPIDS_PY_VERSION" \ + RAPIDS_CONDA_BLD_OUTPUT_DIR="$RAPIDS_CONDA_BLD_OUTPUT_DIR" \ + PATH="$PATH" \ + CUDA_MPS_PIPE_DIRECTORY=/tmp/no-mps-here \ + bash -eE -c ' + bash ci/test_cpp.sh + bash ci/test_python.sh + ' + else + # CPU slice: no CUDA driver, host transports only. + export CUDA_VISIBLE_DEVICES= + export UCX_TLS=tcp,sm,self + export GTEST_FILTER='-RMM*.*:CCCL*.*' + bash ci/test_cpp.sh + # test_python.sh skipped: imports libucxx (dlopens libcuda). + fi + displayName: Run UCXX tests From dec2673117a6c29c55b29710fef99e189bd80268 Mon Sep 17 00:00:00 2001 From: Alexey Rivkin Date: Sun, 24 May 2026 13:17:43 +0300 Subject: [PATCH 2/3] AZP: temporarily skip non-UCXX stages for Phase 1 iteration Mark every non-UCXX-related stage in the PR pipeline with condition: false and override UCXX_tests_stage's dependsOn to [] so the new stage runs immediately on each PR push without the rest of the matrix consuming MLNX agents. Pure scaffolding - revert before Phase 5 merge. --- buildlib/dockers/rapidsai-ci-conda.Dockerfile | 8 +- buildlib/pr/main.yml | 19 +++- buildlib/pr/ucxx_tests_stage.yml | 102 ++++-------------- buildlib/tools/test_ucxx.sh | 89 +++++++++++++++ 4 files changed, 125 insertions(+), 93 deletions(-) create mode 100755 buildlib/tools/test_ucxx.sh diff --git a/buildlib/dockers/rapidsai-ci-conda.Dockerfile b/buildlib/dockers/rapidsai-ci-conda.Dockerfile index 5e38198feee..3e114744552 100644 --- a/buildlib/dockers/rapidsai-ci-conda.Dockerfile +++ b/buildlib/dockers/rapidsai-ci-conda.Dockerfile @@ -1,11 +1,7 @@ -# Azure Pipelines wrapper around rapidsai/ci-conda: adds sudo (Azure -# container job contract) and opens /opt/conda to the auto-created step user. +# Azure Pipelines wrapper around rapidsai/ci-conda. +# Opens /opt/conda so the Azure-injected step user can use conda/python. ARG BASE_IMAGE=rapidsai/ci-conda:26.06-latest FROM ${BASE_IMAGE} -RUN apt-get update \ - && apt-get install -y --no-install-recommends sudo passwd \ - && rm -rf /var/lib/apt/lists/* - RUN chmod -R o+rwX /opt/conda diff --git a/buildlib/pr/main.yml b/buildlib/pr/main.yml index 9bd2ff669db..ac764c6187c 100644 --- a/buildlib/pr/main.yml +++ b/buildlib/pr/main.yml @@ -254,10 +254,11 @@ resources: image: rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-conda:26.06-azp-1 options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) - container: ucxx_rapidsai_ci_conda_gpu - # Same image as above, +GPU. --user 0:0 because the host MPS daemon - # accepts root clients but rejects matching-uid container users. + # Same image as above, +GPU. Bind-mount host MPS dirs so the container's + # CUDA client can reach the host MPS daemon (host GPUs are in + # Exclusive_Process compute mode and need MPS for shared access). image: rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-conda:26.06-azp-1 - options: --user 0:0 $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU) + options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU) -v /tmp/nvidia-mps:/tmp/nvidia-mps stages: - stage: Codestyle @@ -265,16 +266,19 @@ stages: - template: codestyle.yml - stage: Basic_compile + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [] jobs: - template: basic_compile.yml - stage: Static_check + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: static_checks.yml - stage: Build + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: build_job.yml @@ -292,6 +296,7 @@ stages: displayName: Build on aarch64 - stage: ucx_perftest_mad_rte + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] displayName: ucx_perftest over MAD RTE lockBehavior: sequential @@ -301,6 +306,7 @@ stages: - template: mad_tests.yml - stage: WireCompat + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: wire_compat.yml @@ -332,6 +338,7 @@ stages: # demands: ucx_iodemo -equals yes - stage: Coverity + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Static_check] jobs: - template: coverity.yml @@ -340,8 +347,11 @@ stages: container: coverity_rh7 - template: ucxx_tests_stage.yml + parameters: + dependsOn: [] # temp: skip Static_check to test stage faster on this PR; restore [Static_check] before merge - stage: Tests + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: tests.yml @@ -421,6 +431,7 @@ stages: demands: ucx_gpu -equals yes - stage: Build_Static + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - job: build_source @@ -467,12 +478,14 @@ stages: - stage: Cuda + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: cuda/cuda.yml - stage: AddressSanitizer + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Static_check] jobs: - template: tests.yml diff --git a/buildlib/pr/ucxx_tests_stage.yml b/buildlib/pr/ucxx_tests_stage.yml index e3eb7d5ddf5..850f5c53015 100644 --- a/buildlib/pr/ucxx_tests_stage.yml +++ b/buildlib/pr/ucxx_tests_stage.yml @@ -3,7 +3,8 @@ parameters: cpu_container: ucxx_rapidsai_ci_conda gpu_container: ucxx_rapidsai_ci_conda_gpu slices: - # CPU coverage: mirrors rapidsai/ucxx upstream conda-cpp-build matrix. + # CPU coverage: cuda x arch x py grid; builds conda C++/Python pkgs and + # runs UCXX gtests on host transports (CUDA-free subset). - name: x86_64_cuda12_py311 gpu: false demands: ucx_docker -equals yes @@ -24,9 +25,8 @@ parameters: demands: ucx_arm64 rapids_cuda_version: '13.2.0' rapids_py_version: '3.11' - # GPU coverage: mirrors x86_64 subset of upstream UCXX conda-cpp-tests. - # arm64 GPU legs deferred: no MLNX arm64+GPU+gdrcopy pool, no UCX-side - # arm64 GPU demand pattern to crib from. + # GPU coverage: x86_64 only (MLNX ucx_gpu_test pool has no arm64 hosts); + # py 3.12 + 3.13 to exercise both supported toolchains. - name: amd64_cuda1302_py312 gpu: true demands: ucx_gpu_test -equals yes @@ -57,95 +57,29 @@ stages: displayName: 'UCXX tests (${{ slice.name }})' container: ${{ parameters.cpu_container }} timeoutInMinutes: 90 + variables: + IS_GPU: ${{ slice.gpu }} + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx steps: - checkout: self - clean: true + path: ucx fetchDepth: 100 retryCountOnTaskFailure: 5 - checkout: ucxx - clean: true - path: bindings/ucxx + path: ucxx retryCountOnTaskFailure: 5 - - bash: | - set -eE - IS_GPU=${{ slice.gpu }} - echo "=== id: $(id), arch: $(uname -m), gpu: $IS_GPU ===" - which python && python --version - if [ "$IS_GPU" = "True" ]; then nvidia-smi || true; fi - - export RAPIDS_CUDA_VERSION=${{ slice.rapids_cuda_version }} - export RAPIDS_PY_VERSION=${{ slice.rapids_py_version }} - export RAPIDS_CONDA_BLD_OUTPUT_DIR=/tmp/conda-bld-output - mkdir -p "$RAPIDS_CONDA_BLD_OUTPUT_DIR" - - mkdir -p "$HOME/.local/bin" - for tool in rapids-download-conda-from-github rapids-download-from-github; do - printf '#!/bin/bash\necho "%s"\n' "$RAPIDS_CONDA_BLD_OUTPUT_DIR" > "$HOME/.local/bin/$tool" - chmod +x "$HOME/.local/bin/$tool" - done - - if [ "$IS_GPU" = "True" ]; then - # py 3.13+ sccache version crashes on CMake TryCompile dirs. - cat > "$HOME/.local/bin/rapids-configure-sccache" <<'EOF' - #!/bin/bash - export CMAKE_C_COMPILER_LAUNCHER= - export CMAKE_CXX_COMPILER_LAUNCHER= - export CMAKE_CUDA_COMPILER_LAUNCHER= - export RUSTC_WRAPPER= - EOF - chmod +x "$HOME/.local/bin/rapids-configure-sccache" - fi - export PATH="$HOME/.local/bin:$PATH" - - cd $(Build.SourcesDirectory)/../bindings/ucxx - - if [ "$IS_GPU" = "False" ]; then - # No --gpus; tolerate missing nvidia-smi in test_common.sh. - sed -i 's#^ nvidia-smi$# command -v nvidia-smi >/dev/null \&\& nvidia-smi || echo "(no GPU - CPU-runtime UCXX slice)"#' ci/test_common.sh - else - # Upstream UCXX header missing #include ; only - # bites on newer libstdc++ header chains. - hdr=python/ucxx/ucxx/examples/python_future_task.h - grep -q "include " "$hdr" || sed -i '/^#pragma once/a #include ' "$hdr" - fi - - bash ci/build_cpp.sh - bash ci/build_python.sh + - bash: bash $(UCX_DIR)/buildlib/tools/test_ucxx.sh build displayName: Build UCXX - - bash: | - set -eE - IS_GPU=${{ slice.gpu }} - export RAPIDS_CUDA_VERSION=${{ slice.rapids_cuda_version }} - export RAPIDS_PY_VERSION=${{ slice.rapids_py_version }} - export RAPIDS_CONDA_BLD_OUTPUT_DIR=/tmp/conda-bld-output - export PATH="$HOME/.local/bin:$PATH" - - cd $(Build.SourcesDirectory)/../bindings/ucxx + - bash: bash $(UCX_DIR)/buildlib/tools/test_ucxx.sh test_cpp + displayName: Run UCXX C++ tests - if [ "$IS_GPU" = "True" ]; then - # Azure docker-execs as svc user regardless of `--user 0:0`. - # Host MPS daemon rejects non-root CUDA clients; wrap in - # sudo + bypass MPS via CUDA_MPS_PIPE_DIRECTORY. - sudo -E env \ - RAPIDS_CUDA_VERSION="$RAPIDS_CUDA_VERSION" \ - RAPIDS_PY_VERSION="$RAPIDS_PY_VERSION" \ - RAPIDS_CONDA_BLD_OUTPUT_DIR="$RAPIDS_CONDA_BLD_OUTPUT_DIR" \ - PATH="$PATH" \ - CUDA_MPS_PIPE_DIRECTORY=/tmp/no-mps-here \ - bash -eE -c ' - bash ci/test_cpp.sh - bash ci/test_python.sh - ' - else - # CPU slice: no CUDA driver, host transports only. - export CUDA_VISIBLE_DEVICES= - export UCX_TLS=tcp,sm,self - export GTEST_FILTER='-RMM*.*:CCCL*.*' - bash ci/test_cpp.sh - # test_python.sh skipped: imports libucxx (dlopens libcuda). - fi - displayName: Run UCXX tests + - ${{ if eq(slice.gpu, true) }}: + - bash: bash $(UCX_DIR)/buildlib/tools/test_ucxx.sh test_python + displayName: Run UCXX Python tests diff --git a/buildlib/tools/test_ucxx.sh b/buildlib/tools/test_ucxx.sh new file mode 100755 index 00000000000..b4f0bf9588d --- /dev/null +++ b/buildlib/tools/test_ucxx.sh @@ -0,0 +1,89 @@ +#!/bin/bash -eE +# +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# See file LICENSE for terms. +# +# UCXX_tests stage runner. +# Usage: test_ucxx.sh +# Env: IS_GPU, RAPIDS_CUDA_VERSION, RAPIDS_PY_VERSION, UCXX_DIR + +phase=${1:?phase required: build | test_cpp | test_python} +: "${IS_GPU:?IS_GPU required}" +: "${RAPIDS_CUDA_VERSION:?RAPIDS_CUDA_VERSION required}" +: "${RAPIDS_PY_VERSION:?RAPIDS_PY_VERSION required}" +: "${UCXX_DIR:?UCXX_DIR required}" + +export RAPIDS_CUDA_VERSION RAPIDS_PY_VERSION +export RAPIDS_CONDA_BLD_OUTPUT_DIR=/tmp/conda-bld-output +mkdir -p "$RAPIDS_CONDA_BLD_OUTPUT_DIR" + +# Shim RAPIDS gha-tools downloaders to local conda-bld dir. +mkdir -p "$HOME/.local/bin" +for tool in rapids-download-conda-from-github rapids-download-from-github; do + printf '#!/bin/bash\necho "%s"\n' "$RAPIDS_CONDA_BLD_OUTPUT_DIR" > "$HOME/.local/bin/$tool" + chmod +x "$HOME/.local/bin/$tool" +done +export PATH="$HOME/.local/bin:$PATH" + +cd "$UCXX_DIR" + +case "$phase" in + build) + echo "=== id: $(id), arch: $(uname -m), gpu: $IS_GPU ===" + which python && python --version + [ "$IS_GPU" = "true" ] && nvidia-smi || true + + if [ "$IS_GPU" = "true" ]; then + # sccache from rapids-configure-sccache crashes on CMake TryCompile in + # this image; replace with a no-op that still exports the env vars the + # rattler recipe expects. + cat > "$HOME/.local/bin/rapids-configure-sccache" <<'EOF' +#!/bin/bash +export CMAKE_C_COMPILER_LAUNCHER= +export CMAKE_CXX_COMPILER_LAUNCHER= +export CMAKE_CUDA_COMPILER_LAUNCHER= +export RUSTC_WRAPPER= +EOF + chmod +x "$HOME/.local/bin/rapids-configure-sccache" + + # Upstream header missing . + hdr=python/ucxx/ucxx/examples/python_future_task.h + grep -q "include " "$hdr" || sed -i '/^#pragma once/a #include ' "$hdr" + else + # No GPU on CPU slice; soften nvidia-smi check. + sed -i 's#^ nvidia-smi$# command -v nvidia-smi >/dev/null \&\& nvidia-smi || echo "(no GPU - CPU slice)"#' ci/test_common.sh + fi + + bash ci/build_cpp.sh + bash ci/build_python.sh + ;; + + test_cpp) + if [ "$IS_GPU" = "true" ]; then + # Run as Azure-injected step user (mapped to host swx-azure-svc uid), + # which matches the host MPS daemon owner so MPS accepts the client. + bash ci/test_cpp.sh + else + # CPU slice: no GPU driver loaded. Pin UCX onto host transports; + # filter CUDA-only gtest suites. + export CUDA_VISIBLE_DEVICES= + export UCX_TLS=tcp,sm,self + export GTEST_FILTER='-RMM*.*:CCCL*.*' + bash ci/test_cpp.sh + fi + ;; + + test_python) + if [ "$IS_GPU" != "true" ]; then + echo "test_python only runs on GPU slice; skipping" >&2 + exit 0 + fi + bash ci/test_python.sh + ;; + + *) + echo "Unknown phase: $phase (expected build | test_cpp | test_python)" >&2 + exit 1 + ;; +esac From c356872ab553f336650b2c96e3889d8655456216 Mon Sep 17 00:00:00 2001 From: Alexey Rivkin Date: Sun, 24 May 2026 19:25:40 +0300 Subject: [PATCH 3/3] AZP: UCXX Phase 2 - build conda + wheel packages Add UCXX_build stage to the UCX PR pipeline. Builds rapidsai/ucxx conda C++ + Python packages and the libucxx / ucxx / distributed-ucxx wheels against every UCX PR. * Single buildlib/tools/build_ucxx.sh runner with phase dispatch (conda_cpp / conda_python / wheel_libucxx / wheel_ucxx / wheel_distributed_ucxx), called by 5 thin templates. * New container resource ucxx_rapidsai_ci_wheel (buildlib/dockers/rapidsai-ci-wheel.Dockerfile) for the wheel jobs; conda jobs reuse the Phase 1 ucxx_rapidsai_ci_conda image. * Job graph mirrors upstream artifact flow: conda-python-build depends on conda-cpp-build; wheel-ucxx depends on wheel-libucxx (paired by ARCH/CUDA). * Shared shims for rapids-download-*-from-github, no-op rapids-configure-sccache, and the missing patch live in build_ucxx.sh so all 5 phases get them once. --- buildlib/dockers/rapidsai-ci-wheel.Dockerfile | 7 ++ buildlib/pr/main.yml | 110 ++++++++++++++++++ buildlib/pr/ucxx_conda_cpp_build.yml | 40 +++++++ buildlib/pr/ucxx_conda_python_build.yml | 53 +++++++++ .../pr/ucxx_wheel_distributed_ucxx_build.yml | 40 +++++++ buildlib/pr/ucxx_wheel_libucxx_build.yml | 40 +++++++ buildlib/pr/ucxx_wheel_ucxx_build.yml | 54 +++++++++ buildlib/tools/build_ucxx.sh | 80 +++++++++++++ 8 files changed, 424 insertions(+) create mode 100644 buildlib/dockers/rapidsai-ci-wheel.Dockerfile create mode 100644 buildlib/pr/ucxx_conda_cpp_build.yml create mode 100644 buildlib/pr/ucxx_conda_python_build.yml create mode 100644 buildlib/pr/ucxx_wheel_distributed_ucxx_build.yml create mode 100644 buildlib/pr/ucxx_wheel_libucxx_build.yml create mode 100644 buildlib/pr/ucxx_wheel_ucxx_build.yml create mode 100755 buildlib/tools/build_ucxx.sh diff --git a/buildlib/dockers/rapidsai-ci-wheel.Dockerfile b/buildlib/dockers/rapidsai-ci-wheel.Dockerfile new file mode 100644 index 00000000000..52a82dada8a --- /dev/null +++ b/buildlib/dockers/rapidsai-ci-wheel.Dockerfile @@ -0,0 +1,7 @@ +# Azure Pipelines wrapper around rapidsai/ci-wheel. +# Opens /pyenv so the Azure-injected step user can write shims. + +ARG BASE_IMAGE=rapidsai/ci-wheel:26.06-latest +FROM ${BASE_IMAGE} + +RUN chmod -R o+rwX /pyenv diff --git a/buildlib/pr/main.yml b/buildlib/pr/main.yml index ac764c6187c..f7b06fdd600 100644 --- a/buildlib/pr/main.yml +++ b/buildlib/pr/main.yml @@ -259,6 +259,10 @@ resources: # Exclusive_Process compute mode and need MPS for shared access). image: rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-conda:26.06-azp-1 options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU) -v /tmp/nvidia-mps:/tmp/nvidia-mps + - container: ucxx_rapidsai_ci_wheel + # Thin wrapper of rapidsai/ci-wheel; see buildlib/dockers/rapidsai-ci-wheel.Dockerfile. + image: rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-wheel:26.06-azp-1 + options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) stages: - stage: Codestyle @@ -350,6 +354,112 @@ stages: parameters: dependsOn: [] # temp: skip Static_check to test stage faster on this PR; restore [Static_check] before merge + - stage: UCXX_build + dependsOn: [] # temp: skip Static_check while iterating Phase 2; restore [Static_check] before merge + jobs: + # 2.1: conda-cpp-build. Mirrors upstream conda-cpp-build matrix (4 legs). + - template: ucxx_conda_cpp_build.yml + parameters: + container: ucxx_rapidsai_ci_conda + slices: + - name: x86_64_cuda12_py311 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: x86_64_cuda13_py311 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + - name: aarch64_cuda12_py311 + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: aarch64_cuda13_py311 + demands: ucx_arm64 + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + # 2.2: conda-python-build. Depends on 2.1's artifacts (per-slice). + - template: ucxx_conda_python_build.yml + parameters: + container: ucxx_rapidsai_ci_conda + slices: + - name: x86_64_cuda12_py311 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: x86_64_cuda13_py311 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + - name: aarch64_cuda12_py311 + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: aarch64_cuda13_py311 + demands: ucx_arm64 + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + # 2.3: wheel-build-libucxx. Upstream filter = max_by(PY_VER) per (ARCH, CUDA major). + - template: ucxx_wheel_libucxx_build.yml + parameters: + container: ucxx_rapidsai_ci_wheel + slices: + - name: x86_64_cuda12_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.14' + - name: x86_64_cuda13_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.14' + - name: aarch64_cuda12_py314 + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.14' + - name: aarch64_cuda13_py314 + demands: ucx_arm64 + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.14' + # 2.4: wheel-build-ucxx. Upstream filter = min_by(PY_VER) per (CUDA, ARCH). + # Each ucxx wheel slice consumes a libucxx wheel slice; pair by (ARCH, CUDA). + - template: ucxx_wheel_ucxx_build.yml + parameters: + container: ucxx_rapidsai_ci_wheel + slices: + - name: x86_64_cuda12_py311 + libucxx_slice: x86_64_cuda12_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: x86_64_cuda13_py311 + libucxx_slice: x86_64_cuda13_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.11' + - name: aarch64_cuda12_py311 + libucxx_slice: aarch64_cuda12_py314 + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: aarch64_cuda13_py311 + libucxx_slice: aarch64_cuda13_py314 + demands: ucx_arm64 + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.11' + # 2.5: wheel-build-distributed-ucxx. Pure-python; upstream emits 2 amd64 legs. + - template: ucxx_wheel_distributed_ucxx_build.yml + parameters: + container: ucxx_rapidsai_ci_wheel + slices: + - name: x86_64_cuda12_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.14' + - name: x86_64_cuda13_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.14' + - stage: Tests condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] diff --git a/buildlib/pr/ucxx_conda_cpp_build.yml b/buildlib/pr/ucxx_conda_cpp_build.yml new file mode 100644 index 00000000000..03674583bc2 --- /dev/null +++ b/buildlib/pr/ucxx_conda_cpp_build.yml @@ -0,0 +1,40 @@ +parameters: + container: ucxx_rapidsai_ci_conda + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_conda_cpp_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX conda-cpp-build (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 60 + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh conda_cpp + displayName: Build UCXX conda C++ package + + - task: PublishBuildArtifacts@1 + displayName: Publish conda-cpp artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-conda-cpp-${{ slice.name }} diff --git a/buildlib/pr/ucxx_conda_python_build.yml b/buildlib/pr/ucxx_conda_python_build.yml new file mode 100644 index 00000000000..da5565be1e3 --- /dev/null +++ b/buildlib/pr/ucxx_conda_python_build.yml @@ -0,0 +1,53 @@ +parameters: + container: ucxx_rapidsai_ci_conda + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_conda_python_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX conda-python-build (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 60 + dependsOn: ucxx_conda_cpp_build_${{ slice.name }} + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - task: DownloadBuildArtifacts@1 + displayName: Fetch conda-cpp artifact + inputs: + buildType: current + artifactName: ucxx-conda-cpp-${{ slice.name }} + downloadPath: $(System.DefaultWorkingDirectory)/_dl + + - bash: | + rm -rf "$(RAPIDS_BLD_OUTPUT_DIR)" + mv "$(System.DefaultWorkingDirectory)/_dl/ucxx-conda-cpp-${{ slice.name }}" "$(RAPIDS_BLD_OUTPUT_DIR)" + displayName: Stage conda-cpp artifact + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh conda_python + displayName: Build UCXX conda Python package + + - task: PublishBuildArtifacts@1 + displayName: Publish conda-python artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-conda-python-${{ slice.name }} diff --git a/buildlib/pr/ucxx_wheel_distributed_ucxx_build.yml b/buildlib/pr/ucxx_wheel_distributed_ucxx_build.yml new file mode 100644 index 00000000000..cbbafceef12 --- /dev/null +++ b/buildlib/pr/ucxx_wheel_distributed_ucxx_build.yml @@ -0,0 +1,40 @@ +parameters: + container: ucxx_rapidsai_ci_wheel + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_wheel_dist_ucxx_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX wheel-build-distributed-ucxx (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 45 + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh wheel_distributed_ucxx + displayName: Build distributed-ucxx wheel + + - task: PublishBuildArtifacts@1 + displayName: Publish distributed-ucxx wheel artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-wheel-distributed-ucxx-${{ slice.name }} diff --git a/buildlib/pr/ucxx_wheel_libucxx_build.yml b/buildlib/pr/ucxx_wheel_libucxx_build.yml new file mode 100644 index 00000000000..28525d844aa --- /dev/null +++ b/buildlib/pr/ucxx_wheel_libucxx_build.yml @@ -0,0 +1,40 @@ +parameters: + container: ucxx_rapidsai_ci_wheel + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_wheel_libucxx_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX wheel-build-libucxx (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 60 + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh wheel_libucxx + displayName: Build libucxx wheel + + - task: PublishBuildArtifacts@1 + displayName: Publish libucxx wheel artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-wheel-libucxx-${{ slice.name }} diff --git a/buildlib/pr/ucxx_wheel_ucxx_build.yml b/buildlib/pr/ucxx_wheel_ucxx_build.yml new file mode 100644 index 00000000000..35d0f3150ac --- /dev/null +++ b/buildlib/pr/ucxx_wheel_ucxx_build.yml @@ -0,0 +1,54 @@ +parameters: + container: ucxx_rapidsai_ci_wheel + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_wheel_ucxx_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX wheel-build-ucxx (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 60 + dependsOn: ucxx_wheel_libucxx_build_${{ slice.libucxx_slice }} + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + WHEEL_INPUT_DIR: $(Build.ArtifactStagingDirectory)/wheel-libucxx + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - task: DownloadBuildArtifacts@1 + displayName: Fetch libucxx wheel artifact + inputs: + buildType: current + artifactName: ucxx-wheel-libucxx-${{ slice.libucxx_slice }} + downloadPath: $(System.DefaultWorkingDirectory)/_dl + + - bash: | + rm -rf "$(WHEEL_INPUT_DIR)" + mv $(System.DefaultWorkingDirectory)/_dl/ucxx-wheel-libucxx-${{ slice.libucxx_slice }} "$(WHEEL_INPUT_DIR)" + displayName: Stage libucxx wheel artifact + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh wheel_ucxx + displayName: Build ucxx wheel + + - task: PublishBuildArtifacts@1 + displayName: Publish ucxx wheel artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-wheel-ucxx-${{ slice.name }} diff --git a/buildlib/tools/build_ucxx.sh b/buildlib/tools/build_ucxx.sh new file mode 100755 index 00000000000..193c6f8fdcd --- /dev/null +++ b/buildlib/tools/build_ucxx.sh @@ -0,0 +1,80 @@ +#!/bin/bash -eE +# +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# See file LICENSE for terms. +# +# UCXX_build stage runner. +# Usage: build_ucxx.sh +# Env: RAPIDS_CUDA_VERSION, RAPIDS_PY_VERSION, UCXX_DIR, RAPIDS_BLD_OUTPUT_DIR + +phase=${1:?phase required} +: "${RAPIDS_CUDA_VERSION:?RAPIDS_CUDA_VERSION required}" +: "${RAPIDS_PY_VERSION:?RAPIDS_PY_VERSION required}" +: "${UCXX_DIR:?UCXX_DIR required}" +: "${RAPIDS_BLD_OUTPUT_DIR:?RAPIDS_BLD_OUTPUT_DIR required}" + +export RAPIDS_CUDA_VERSION RAPIDS_PY_VERSION +mkdir -p "$RAPIDS_BLD_OUTPUT_DIR" + +# Upstream scripts read phase-specific output-dir env names. +case "$phase" in + conda_*) export RAPIDS_CONDA_BLD_OUTPUT_DIR="$RAPIDS_BLD_OUTPUT_DIR" ;; + wheel_*) export RAPIDS_WHEEL_BLD_OUTPUT_DIR="$RAPIDS_BLD_OUTPUT_DIR" ;; +esac + +# Shim RAPIDS gha-tools downloaders to local build output dir. +mkdir -p "$HOME/.local/bin" +for tool in rapids-download-conda-from-github rapids-download-from-github; do + printf '#!/bin/bash\necho "%s"\n' "$RAPIDS_BLD_OUTPUT_DIR" > "$HOME/.local/bin/$tool" + chmod +x "$HOME/.local/bin/$tool" +done + +# sccache from rapids-configure-sccache crashes on CMake TryCompile in this +# image; replace with a no-op that still exports the env vars rattler expects. +cat > "$HOME/.local/bin/rapids-configure-sccache" <<'EOF' +#!/bin/bash +export CMAKE_C_COMPILER_LAUNCHER= +export CMAKE_CXX_COMPILER_LAUNCHER= +export CMAKE_CUDA_COMPILER_LAUNCHER= +export RUSTC_WRAPPER= +EOF +chmod +x "$HOME/.local/bin/rapids-configure-sccache" + +# wheel_ucxx phase consumes a libucxx wheel artifact; shim downloader to it. +if [ -n "${WHEEL_INPUT_DIR:-}" ]; then + printf '#!/bin/bash\necho "%s"\n' "$WHEEL_INPUT_DIR" > "$HOME/.local/bin/rapids-download-wheels-from-github" + chmod +x "$HOME/.local/bin/rapids-download-wheels-from-github" +fi + +export PATH="$HOME/.local/bin:$PATH" + +cd "$UCXX_DIR" + +# Upstream header missing . +hdr=python/ucxx/ucxx/examples/python_future_task.h +grep -q "include " "$hdr" || sed -i '/^#pragma once/a #include ' "$hdr" + +echo "=== id: $(id), arch: $(uname -m), phase: $phase ===" + +# Wheel image (Rocky 8) defaults to gcc 8.5; UCXX C++ needs gcc-toolset-14 +# (designated initializers in assignment, matches upstream wheel CI). +case "$phase" in + wheel_*) + toolset=/opt/rh/gcc-toolset-14/root/usr/bin + if [ -d "$toolset" ]; then + export PATH="$toolset:$PATH" + export CC="$toolset/gcc" + export CXX="$toolset/g++" + fi + ;; +esac + +case "$phase" in + conda_cpp) bash ci/build_cpp.sh ;; + conda_python) bash ci/build_python.sh ;; + wheel_libucxx) bash ci/build_wheel_libucxx.sh ;; + wheel_ucxx) bash ci/build_wheel_ucxx.sh ;; + wheel_distributed_ucxx) bash ci/build_wheel_distributed_ucxx.sh ;; + *) echo "Unknown phase: $phase" >&2; exit 1 ;; +esac