diff --git a/buildlib/azure-pipelines-pr.yml b/buildlib/azure-pipelines-pr.yml index d446fe72ea7..44ccf60fa5f 100644 --- a/buildlib/azure-pipelines-pr.yml +++ b/buildlib/azure-pipelines-pr.yml @@ -31,5 +31,13 @@ pr: - buildlib/tools/perf_results.py - buildlib/tools/perf-common.yml +resources: + repositories: + - repository: ucxx + type: github + name: rapidsai/ucxx + endpoint: Mellanox-lab + ref: refs/heads/main + extends: template: pr/main.yml diff --git a/buildlib/dockers/rapidsai-ci-conda.Dockerfile b/buildlib/dockers/rapidsai-ci-conda.Dockerfile new file mode 100644 index 00000000000..3e114744552 --- /dev/null +++ b/buildlib/dockers/rapidsai-ci-conda.Dockerfile @@ -0,0 +1,7 @@ +# Azure Pipelines wrapper around rapidsai/ci-conda. +# Opens /opt/conda so the Azure-injected step user can use conda/python. + +ARG BASE_IMAGE=rapidsai/ci-conda:26.06-latest +FROM ${BASE_IMAGE} + +RUN chmod -R o+rwX /opt/conda diff --git a/buildlib/dockers/rapidsai-ci-wheel.Dockerfile b/buildlib/dockers/rapidsai-ci-wheel.Dockerfile new file mode 100644 index 00000000000..52a82dada8a --- /dev/null +++ b/buildlib/dockers/rapidsai-ci-wheel.Dockerfile @@ -0,0 +1,7 @@ +# Azure Pipelines wrapper around rapidsai/ci-wheel. +# Opens /pyenv so the Azure-injected step user can write shims. + +ARG BASE_IMAGE=rapidsai/ci-wheel:26.06-latest +FROM ${BASE_IMAGE} + +RUN chmod -R o+rwX /pyenv diff --git a/buildlib/pr/main.yml b/buildlib/pr/main.yml index a47f61052b9..f7b06fdd600 100644 --- a/buildlib/pr/main.yml +++ b/buildlib/pr/main.yml @@ -249,6 +249,20 @@ resources: - container: centos10stream image: rdmz-harbor.rdmz.labs.mlnx/hpcx/x86_64/centos10stream/builder:inbox options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) + - container: ucxx_rapidsai_ci_conda + # Thin wrapper of rapidsai/ci-conda; see buildlib/dockers/rapidsai-ci-conda.Dockerfile. + image: rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-conda:26.06-azp-1 + options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) + - container: ucxx_rapidsai_ci_conda_gpu + # Same image as above, +GPU. Bind-mount host MPS dirs so the container's + # CUDA client can reach the host MPS daemon (host GPUs are in + # Exclusive_Process compute mode and need MPS for shared access). + image: rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-conda:26.06-azp-1 + options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU) -v /tmp/nvidia-mps:/tmp/nvidia-mps + - container: ucxx_rapidsai_ci_wheel + # Thin wrapper of rapidsai/ci-wheel; see buildlib/dockers/rapidsai-ci-wheel.Dockerfile. + image: rdmz-harbor.rdmz.labs.mlnx/ucx/rapidsai-ci-wheel:26.06-azp-1 + options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) stages: - stage: Codestyle @@ -256,16 +270,19 @@ stages: - template: codestyle.yml - stage: Basic_compile + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [] jobs: - template: basic_compile.yml - stage: Static_check + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: static_checks.yml - stage: Build + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: build_job.yml @@ -283,6 +300,7 @@ stages: displayName: Build on aarch64 - stage: ucx_perftest_mad_rte + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] displayName: ucx_perftest over MAD RTE lockBehavior: sequential @@ -292,6 +310,7 @@ stages: - template: mad_tests.yml - stage: WireCompat + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: wire_compat.yml @@ -323,6 +342,7 @@ stages: # demands: ucx_iodemo -equals yes - stage: Coverity + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Static_check] jobs: - template: coverity.yml @@ -330,7 +350,118 @@ stages: demands: ucx_docker -equals yes container: coverity_rh7 + - template: ucxx_tests_stage.yml + parameters: + dependsOn: [] # temp: skip Static_check to test stage faster on this PR; restore [Static_check] before merge + + - stage: UCXX_build + dependsOn: [] # temp: skip Static_check while iterating Phase 2; restore [Static_check] before merge + jobs: + # 2.1: conda-cpp-build. Mirrors upstream conda-cpp-build matrix (4 legs). + - template: ucxx_conda_cpp_build.yml + parameters: + container: ucxx_rapidsai_ci_conda + slices: + - name: x86_64_cuda12_py311 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: x86_64_cuda13_py311 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + - name: aarch64_cuda12_py311 + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: aarch64_cuda13_py311 + demands: ucx_arm64 + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + # 2.2: conda-python-build. Depends on 2.1's artifacts (per-slice). + - template: ucxx_conda_python_build.yml + parameters: + container: ucxx_rapidsai_ci_conda + slices: + - name: x86_64_cuda12_py311 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: x86_64_cuda13_py311 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + - name: aarch64_cuda12_py311 + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: aarch64_cuda13_py311 + demands: ucx_arm64 + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + # 2.3: wheel-build-libucxx. Upstream filter = max_by(PY_VER) per (ARCH, CUDA major). + - template: ucxx_wheel_libucxx_build.yml + parameters: + container: ucxx_rapidsai_ci_wheel + slices: + - name: x86_64_cuda12_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.14' + - name: x86_64_cuda13_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.14' + - name: aarch64_cuda12_py314 + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.14' + - name: aarch64_cuda13_py314 + demands: ucx_arm64 + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.14' + # 2.4: wheel-build-ucxx. Upstream filter = min_by(PY_VER) per (CUDA, ARCH). + # Each ucxx wheel slice consumes a libucxx wheel slice; pair by (ARCH, CUDA). + - template: ucxx_wheel_ucxx_build.yml + parameters: + container: ucxx_rapidsai_ci_wheel + slices: + - name: x86_64_cuda12_py311 + libucxx_slice: x86_64_cuda12_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: x86_64_cuda13_py311 + libucxx_slice: x86_64_cuda13_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.11' + - name: aarch64_cuda12_py311 + libucxx_slice: aarch64_cuda12_py314 + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: aarch64_cuda13_py311 + libucxx_slice: aarch64_cuda13_py314 + demands: ucx_arm64 + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.11' + # 2.5: wheel-build-distributed-ucxx. Pure-python; upstream emits 2 amd64 legs. + - template: ucxx_wheel_distributed_ucxx_build.yml + parameters: + container: ucxx_rapidsai_ci_wheel + slices: + - name: x86_64_cuda12_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.14' + - name: x86_64_cuda13_py314 + demands: ucx_docker -equals yes + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.14' + - stage: Tests + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: tests.yml @@ -410,6 +541,7 @@ stages: demands: ucx_gpu -equals yes - stage: Build_Static + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - job: build_source @@ -456,12 +588,14 @@ stages: - stage: Cuda + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Basic_compile] jobs: - template: cuda/cuda.yml - stage: AddressSanitizer + condition: false # temp: skip non-UCXX stage; restore before merge dependsOn: [Static_check] jobs: - template: tests.yml diff --git a/buildlib/pr/ucxx_conda_cpp_build.yml b/buildlib/pr/ucxx_conda_cpp_build.yml new file mode 100644 index 00000000000..03674583bc2 --- /dev/null +++ b/buildlib/pr/ucxx_conda_cpp_build.yml @@ -0,0 +1,40 @@ +parameters: + container: ucxx_rapidsai_ci_conda + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_conda_cpp_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX conda-cpp-build (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 60 + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh conda_cpp + displayName: Build UCXX conda C++ package + + - task: PublishBuildArtifacts@1 + displayName: Publish conda-cpp artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-conda-cpp-${{ slice.name }} diff --git a/buildlib/pr/ucxx_conda_python_build.yml b/buildlib/pr/ucxx_conda_python_build.yml new file mode 100644 index 00000000000..da5565be1e3 --- /dev/null +++ b/buildlib/pr/ucxx_conda_python_build.yml @@ -0,0 +1,53 @@ +parameters: + container: ucxx_rapidsai_ci_conda + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_conda_python_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX conda-python-build (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 60 + dependsOn: ucxx_conda_cpp_build_${{ slice.name }} + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - task: DownloadBuildArtifacts@1 + displayName: Fetch conda-cpp artifact + inputs: + buildType: current + artifactName: ucxx-conda-cpp-${{ slice.name }} + downloadPath: $(System.DefaultWorkingDirectory)/_dl + + - bash: | + rm -rf "$(RAPIDS_BLD_OUTPUT_DIR)" + mv "$(System.DefaultWorkingDirectory)/_dl/ucxx-conda-cpp-${{ slice.name }}" "$(RAPIDS_BLD_OUTPUT_DIR)" + displayName: Stage conda-cpp artifact + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh conda_python + displayName: Build UCXX conda Python package + + - task: PublishBuildArtifacts@1 + displayName: Publish conda-python artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-conda-python-${{ slice.name }} diff --git a/buildlib/pr/ucxx_tests_stage.yml b/buildlib/pr/ucxx_tests_stage.yml new file mode 100644 index 00000000000..850f5c53015 --- /dev/null +++ b/buildlib/pr/ucxx_tests_stage.yml @@ -0,0 +1,85 @@ +parameters: + dependsOn: [Static_check] + cpu_container: ucxx_rapidsai_ci_conda + gpu_container: ucxx_rapidsai_ci_conda_gpu + slices: + # CPU coverage: cuda x arch x py grid; builds conda C++/Python pkgs and + # runs UCXX gtests on host transports (CUDA-free subset). + - name: x86_64_cuda12_py311 + gpu: false + demands: ucx_docker -equals yes + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: x86_64_cuda13_py311 + gpu: false + demands: ucx_docker -equals yes + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + - name: aarch64_cuda12_py311 + gpu: false + demands: ucx_arm64 + rapids_cuda_version: '12.9.1' + rapids_py_version: '3.11' + - name: aarch64_cuda13_py311 + gpu: false + demands: ucx_arm64 + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.11' + # GPU coverage: x86_64 only (MLNX ucx_gpu_test pool has no arm64 hosts); + # py 3.12 + 3.13 to exercise both supported toolchains. + - name: amd64_cuda1302_py312 + gpu: true + demands: ucx_gpu_test -equals yes + rapids_cuda_version: '13.0.2' + rapids_py_version: '3.12' + - name: amd64_cuda1322_py313 + gpu: true + demands: ucx_gpu_test -equals yes + rapids_cuda_version: '13.2.0' + rapids_py_version: '3.13' + +stages: + - stage: UCXX_tests + dependsOn: ${{ parameters.dependsOn }} + jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_tests_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + ${{ if eq(slice.gpu, true) }}: + displayName: 'UCXX GPU tests (${{ slice.name }})' + container: ${{ parameters.gpu_container }} + timeoutInMinutes: 120 + ${{ if eq(slice.gpu, false) }}: + displayName: 'UCXX tests (${{ slice.name }})' + container: ${{ parameters.cpu_container }} + timeoutInMinutes: 90 + variables: + IS_GPU: ${{ slice.gpu }} + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - bash: bash $(UCX_DIR)/buildlib/tools/test_ucxx.sh build + displayName: Build UCXX + + - bash: bash $(UCX_DIR)/buildlib/tools/test_ucxx.sh test_cpp + displayName: Run UCXX C++ tests + + - ${{ if eq(slice.gpu, true) }}: + - bash: bash $(UCX_DIR)/buildlib/tools/test_ucxx.sh test_python + displayName: Run UCXX Python tests diff --git a/buildlib/pr/ucxx_wheel_distributed_ucxx_build.yml b/buildlib/pr/ucxx_wheel_distributed_ucxx_build.yml new file mode 100644 index 00000000000..cbbafceef12 --- /dev/null +++ b/buildlib/pr/ucxx_wheel_distributed_ucxx_build.yml @@ -0,0 +1,40 @@ +parameters: + container: ucxx_rapidsai_ci_wheel + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_wheel_dist_ucxx_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX wheel-build-distributed-ucxx (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 45 + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh wheel_distributed_ucxx + displayName: Build distributed-ucxx wheel + + - task: PublishBuildArtifacts@1 + displayName: Publish distributed-ucxx wheel artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-wheel-distributed-ucxx-${{ slice.name }} diff --git a/buildlib/pr/ucxx_wheel_libucxx_build.yml b/buildlib/pr/ucxx_wheel_libucxx_build.yml new file mode 100644 index 00000000000..28525d844aa --- /dev/null +++ b/buildlib/pr/ucxx_wheel_libucxx_build.yml @@ -0,0 +1,40 @@ +parameters: + container: ucxx_rapidsai_ci_wheel + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_wheel_libucxx_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX wheel-build-libucxx (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 60 + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh wheel_libucxx + displayName: Build libucxx wheel + + - task: PublishBuildArtifacts@1 + displayName: Publish libucxx wheel artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-wheel-libucxx-${{ slice.name }} diff --git a/buildlib/pr/ucxx_wheel_ucxx_build.yml b/buildlib/pr/ucxx_wheel_ucxx_build.yml new file mode 100644 index 00000000000..35d0f3150ac --- /dev/null +++ b/buildlib/pr/ucxx_wheel_ucxx_build.yml @@ -0,0 +1,54 @@ +parameters: + container: ucxx_rapidsai_ci_wheel + slices: [] + +jobs: + - ${{ each slice in parameters.slices }}: + - job: ucxx_wheel_ucxx_build_${{ slice.name }} + workspace: + clean: all + pool: + name: MLNX + demands: ${{ slice.demands }} + displayName: 'UCXX wheel-build-ucxx (${{ slice.name }})' + container: ${{ parameters.container }} + timeoutInMinutes: 60 + dependsOn: ucxx_wheel_libucxx_build_${{ slice.libucxx_slice }} + variables: + UCX_DIR: $(Agent.BuildDirectory)/ucx + UCXX_DIR: $(Agent.BuildDirectory)/ucxx + RAPIDS_CUDA_VERSION: ${{ slice.rapids_cuda_version }} + RAPIDS_PY_VERSION: ${{ slice.rapids_py_version }} + RAPIDS_BLD_OUTPUT_DIR: $(Build.ArtifactStagingDirectory) + WHEEL_INPUT_DIR: $(Build.ArtifactStagingDirectory)/wheel-libucxx + + steps: + - checkout: self + path: ucx + fetchDepth: 100 + retryCountOnTaskFailure: 5 + + - checkout: ucxx + path: ucxx + retryCountOnTaskFailure: 5 + + - task: DownloadBuildArtifacts@1 + displayName: Fetch libucxx wheel artifact + inputs: + buildType: current + artifactName: ucxx-wheel-libucxx-${{ slice.libucxx_slice }} + downloadPath: $(System.DefaultWorkingDirectory)/_dl + + - bash: | + rm -rf "$(WHEEL_INPUT_DIR)" + mv $(System.DefaultWorkingDirectory)/_dl/ucxx-wheel-libucxx-${{ slice.libucxx_slice }} "$(WHEEL_INPUT_DIR)" + displayName: Stage libucxx wheel artifact + + - bash: bash $(UCX_DIR)/buildlib/tools/build_ucxx.sh wheel_ucxx + displayName: Build ucxx wheel + + - task: PublishBuildArtifacts@1 + displayName: Publish ucxx wheel artifact + inputs: + pathToPublish: $(RAPIDS_BLD_OUTPUT_DIR) + artifactName: ucxx-wheel-ucxx-${{ slice.name }} diff --git a/buildlib/tools/build_ucxx.sh b/buildlib/tools/build_ucxx.sh new file mode 100755 index 00000000000..193c6f8fdcd --- /dev/null +++ b/buildlib/tools/build_ucxx.sh @@ -0,0 +1,80 @@ +#!/bin/bash -eE +# +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# See file LICENSE for terms. +# +# UCXX_build stage runner. +# Usage: build_ucxx.sh +# Env: RAPIDS_CUDA_VERSION, RAPIDS_PY_VERSION, UCXX_DIR, RAPIDS_BLD_OUTPUT_DIR + +phase=${1:?phase required} +: "${RAPIDS_CUDA_VERSION:?RAPIDS_CUDA_VERSION required}" +: "${RAPIDS_PY_VERSION:?RAPIDS_PY_VERSION required}" +: "${UCXX_DIR:?UCXX_DIR required}" +: "${RAPIDS_BLD_OUTPUT_DIR:?RAPIDS_BLD_OUTPUT_DIR required}" + +export RAPIDS_CUDA_VERSION RAPIDS_PY_VERSION +mkdir -p "$RAPIDS_BLD_OUTPUT_DIR" + +# Upstream scripts read phase-specific output-dir env names. +case "$phase" in + conda_*) export RAPIDS_CONDA_BLD_OUTPUT_DIR="$RAPIDS_BLD_OUTPUT_DIR" ;; + wheel_*) export RAPIDS_WHEEL_BLD_OUTPUT_DIR="$RAPIDS_BLD_OUTPUT_DIR" ;; +esac + +# Shim RAPIDS gha-tools downloaders to local build output dir. +mkdir -p "$HOME/.local/bin" +for tool in rapids-download-conda-from-github rapids-download-from-github; do + printf '#!/bin/bash\necho "%s"\n' "$RAPIDS_BLD_OUTPUT_DIR" > "$HOME/.local/bin/$tool" + chmod +x "$HOME/.local/bin/$tool" +done + +# sccache from rapids-configure-sccache crashes on CMake TryCompile in this +# image; replace with a no-op that still exports the env vars rattler expects. +cat > "$HOME/.local/bin/rapids-configure-sccache" <<'EOF' +#!/bin/bash +export CMAKE_C_COMPILER_LAUNCHER= +export CMAKE_CXX_COMPILER_LAUNCHER= +export CMAKE_CUDA_COMPILER_LAUNCHER= +export RUSTC_WRAPPER= +EOF +chmod +x "$HOME/.local/bin/rapids-configure-sccache" + +# wheel_ucxx phase consumes a libucxx wheel artifact; shim downloader to it. +if [ -n "${WHEEL_INPUT_DIR:-}" ]; then + printf '#!/bin/bash\necho "%s"\n' "$WHEEL_INPUT_DIR" > "$HOME/.local/bin/rapids-download-wheels-from-github" + chmod +x "$HOME/.local/bin/rapids-download-wheels-from-github" +fi + +export PATH="$HOME/.local/bin:$PATH" + +cd "$UCXX_DIR" + +# Upstream header missing . +hdr=python/ucxx/ucxx/examples/python_future_task.h +grep -q "include " "$hdr" || sed -i '/^#pragma once/a #include ' "$hdr" + +echo "=== id: $(id), arch: $(uname -m), phase: $phase ===" + +# Wheel image (Rocky 8) defaults to gcc 8.5; UCXX C++ needs gcc-toolset-14 +# (designated initializers in assignment, matches upstream wheel CI). +case "$phase" in + wheel_*) + toolset=/opt/rh/gcc-toolset-14/root/usr/bin + if [ -d "$toolset" ]; then + export PATH="$toolset:$PATH" + export CC="$toolset/gcc" + export CXX="$toolset/g++" + fi + ;; +esac + +case "$phase" in + conda_cpp) bash ci/build_cpp.sh ;; + conda_python) bash ci/build_python.sh ;; + wheel_libucxx) bash ci/build_wheel_libucxx.sh ;; + wheel_ucxx) bash ci/build_wheel_ucxx.sh ;; + wheel_distributed_ucxx) bash ci/build_wheel_distributed_ucxx.sh ;; + *) echo "Unknown phase: $phase" >&2; exit 1 ;; +esac diff --git a/buildlib/tools/test_ucxx.sh b/buildlib/tools/test_ucxx.sh new file mode 100755 index 00000000000..b4f0bf9588d --- /dev/null +++ b/buildlib/tools/test_ucxx.sh @@ -0,0 +1,89 @@ +#!/bin/bash -eE +# +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# See file LICENSE for terms. +# +# UCXX_tests stage runner. +# Usage: test_ucxx.sh +# Env: IS_GPU, RAPIDS_CUDA_VERSION, RAPIDS_PY_VERSION, UCXX_DIR + +phase=${1:?phase required: build | test_cpp | test_python} +: "${IS_GPU:?IS_GPU required}" +: "${RAPIDS_CUDA_VERSION:?RAPIDS_CUDA_VERSION required}" +: "${RAPIDS_PY_VERSION:?RAPIDS_PY_VERSION required}" +: "${UCXX_DIR:?UCXX_DIR required}" + +export RAPIDS_CUDA_VERSION RAPIDS_PY_VERSION +export RAPIDS_CONDA_BLD_OUTPUT_DIR=/tmp/conda-bld-output +mkdir -p "$RAPIDS_CONDA_BLD_OUTPUT_DIR" + +# Shim RAPIDS gha-tools downloaders to local conda-bld dir. +mkdir -p "$HOME/.local/bin" +for tool in rapids-download-conda-from-github rapids-download-from-github; do + printf '#!/bin/bash\necho "%s"\n' "$RAPIDS_CONDA_BLD_OUTPUT_DIR" > "$HOME/.local/bin/$tool" + chmod +x "$HOME/.local/bin/$tool" +done +export PATH="$HOME/.local/bin:$PATH" + +cd "$UCXX_DIR" + +case "$phase" in + build) + echo "=== id: $(id), arch: $(uname -m), gpu: $IS_GPU ===" + which python && python --version + [ "$IS_GPU" = "true" ] && nvidia-smi || true + + if [ "$IS_GPU" = "true" ]; then + # sccache from rapids-configure-sccache crashes on CMake TryCompile in + # this image; replace with a no-op that still exports the env vars the + # rattler recipe expects. + cat > "$HOME/.local/bin/rapids-configure-sccache" <<'EOF' +#!/bin/bash +export CMAKE_C_COMPILER_LAUNCHER= +export CMAKE_CXX_COMPILER_LAUNCHER= +export CMAKE_CUDA_COMPILER_LAUNCHER= +export RUSTC_WRAPPER= +EOF + chmod +x "$HOME/.local/bin/rapids-configure-sccache" + + # Upstream header missing . + hdr=python/ucxx/ucxx/examples/python_future_task.h + grep -q "include " "$hdr" || sed -i '/^#pragma once/a #include ' "$hdr" + else + # No GPU on CPU slice; soften nvidia-smi check. + sed -i 's#^ nvidia-smi$# command -v nvidia-smi >/dev/null \&\& nvidia-smi || echo "(no GPU - CPU slice)"#' ci/test_common.sh + fi + + bash ci/build_cpp.sh + bash ci/build_python.sh + ;; + + test_cpp) + if [ "$IS_GPU" = "true" ]; then + # Run as Azure-injected step user (mapped to host swx-azure-svc uid), + # which matches the host MPS daemon owner so MPS accepts the client. + bash ci/test_cpp.sh + else + # CPU slice: no GPU driver loaded. Pin UCX onto host transports; + # filter CUDA-only gtest suites. + export CUDA_VISIBLE_DEVICES= + export UCX_TLS=tcp,sm,self + export GTEST_FILTER='-RMM*.*:CCCL*.*' + bash ci/test_cpp.sh + fi + ;; + + test_python) + if [ "$IS_GPU" != "true" ]; then + echo "test_python only runs on GPU slice; skipping" >&2 + exit 0 + fi + bash ci/test_python.sh + ;; + + *) + echo "Unknown phase: $phase (expected build | test_cpp | test_python)" >&2 + exit 1 + ;; +esac