From acce468787d76c27ce72fe9cf05e759b5e12a210 Mon Sep 17 00:00:00 2001 From: youge325 Date: Sat, 25 Apr 2026 15:54:52 +0800 Subject: [PATCH 01/13] align torch::cuda::synchronize with PyTorch using CUDAGuard --- .../torch/csrc/api/include/torch/cuda.cpp | 16 +++++++++------- test/cpp/compat/ATen_CUDAContext_test.cc | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp index e13f017e35c88a..8abf88a1e1007c 100644 --- a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp +++ b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include #include @@ -38,14 +40,14 @@ void synchronize(int64_t device_index) { TORCH_CHECK(device_index < 0 || device_index < num_gpus, "Device index out of range: ", device_index); -// TODO(yongqiang) need using DeviceGuard #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - paddle::platform::SetDeviceId(device_index); -#ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize()); -#else - PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize()); -#endif + // Match PyTorch semantics: + // 1. `device_index == -1` means "current CUDA device". + // 2. Explicit device synchronization must not leak a changed current device + // to the caller after returning. + const c10::cuda::CUDAGuard device_guard(c10::Device( + c10::DeviceType::CUDA, static_cast(device_index))); + c10::cuda::device_synchronize(); #else PADDLE_THROW(common::errors::Unavailable( "Paddle is not compiled with CUDA. Cannot visit device synchronize.")); diff --git a/test/cpp/compat/ATen_CUDAContext_test.cc b/test/cpp/compat/ATen_CUDAContext_test.cc index f4c8a58dd7084b..24223733059b58 100644 --- a/test/cpp/compat/ATen_CUDAContext_test.cc +++ b/test/cpp/compat/ATen_CUDAContext_test.cc @@ -15,10 +15,12 @@ #include #include #include +#include #include "gtest/gtest.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include #include #include "paddle/phi/backends/gpu/gpu_info.h" #endif @@ -78,6 +80,23 @@ TEST(CUDAFunctionsTest, AtNamespaceAliases) { auto stream = c10::cuda::getCurrentCUDAStream(); ASSERT_NO_THROW(at::cuda::stream_synchronize(stream)); } + +TEST(CUDAFunctionsTest, TorchSynchronizePreservesCurrentDevice) { + if (!torch::cuda::is_available()) { + return; + } + if (torch::cuda::device_count() < 2) { + return; + } + + constexpr c10::DeviceIndex current_device = 0; + constexpr c10::DeviceIndex other_device = 1; + c10::cuda::CUDAGuard guard(current_device); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), current_device); + + ASSERT_NO_THROW(torch::cuda::synchronize(other_device)); + EXPECT_EQ(phi::backends::gpu::GetCurrentDeviceId(), current_device); +} #endif // --------------------------------------------------------------------------- From 0738877285f429c6b60f7d40d4ed5ae205b81e1a Mon Sep 17 00:00:00 2001 From: youge325 Date: Sat, 18 Apr 2026 11:28:26 +0800 Subject: [PATCH 02/13] fix cpu build --- .../api/include/compat/torch/csrc/api/include/torch/cuda.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp index 8abf88a1e1007c..fe6fa4c4be1f47 100644 --- a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp +++ b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp @@ -13,7 +13,9 @@ // limitations under the License. #include +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include +#endif #include #include From e863dc88ddcd94eb1b58bb763d38ae2760a5ddc9 Mon Sep 17 00:00:00 2001 From: youge325 Date: Mon, 20 Apr 2026 11:10:57 +0800 Subject: [PATCH 03/13] fix Linking Error on Windows --- .../include/compat/torch/csrc/api/include/torch/cuda.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.h b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.h index 4eb38ceecc681f..5d45d82a21dc77 100644 --- a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.h +++ b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.h @@ -15,16 +15,16 @@ #pragma once #include - #include +#include "paddle/common/macros.h" namespace torch::cuda { -c10::DeviceIndex device_count(); +PADDLE_API c10::DeviceIndex device_count(); -bool is_available(); +PADDLE_API bool is_available(); -void synchronize(int64_t device_index = -1); +PADDLE_API void synchronize(int64_t device_index = -1); } // namespace torch::cuda namespace at::cuda { From eaa7c9b500e6c29be7d5d02caf98b3ccf6a9e8c9 Mon Sep 17 00:00:00 2001 From: youge325 Date: Sat, 18 Apr 2026 16:33:48 +0800 Subject: [PATCH 04/13] try to fix CUDAGuard --- .../api/include/compat/c10/cuda/CUDAGuard.h | 32 +++++++++++++++---- test/cpp/compat/ATen_CUDAContext_test.cc | 32 +++++++++++++++++-- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h b/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h index 130e2fe6b12727..9efc278dc0c2df 100644 --- a/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h +++ b/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h @@ -39,6 +39,20 @@ inline Device normalize_cuda_device(Device device) { : current_cuda_device(); } +inline void restore_cuda_device(const Device& original_device, + const Device& current_device) { + if (original_device.index() != current_device.index()) { + phi::backends::gpu::SetDeviceId(static_cast(original_device.index())); + } +} + +inline void restore_cuda_device(const std::optional& original_device, + const std::optional& current_device) { + if (original_device.has_value() && current_device.has_value()) { + restore_cuda_device(*original_device, *current_device); + } +} + } // namespace detail struct CUDAGuard { @@ -47,14 +61,14 @@ struct CUDAGuard { explicit CUDAGuard(DeviceIndex device_index) : original_device_(detail::current_cuda_device()), current_device_(original_device_), - guard_() { + guard_(std::in_place) { set_index(device_index); } explicit CUDAGuard(Device device) : original_device_(detail::current_cuda_device()), current_device_(original_device_), - guard_() { + guard_(std::in_place) { set_device(device); } @@ -63,18 +77,21 @@ struct CUDAGuard { CUDAGuard(CUDAGuard&& other) = delete; CUDAGuard& operator=(CUDAGuard&& other) = delete; - ~CUDAGuard() = default; + ~CUDAGuard() { + guard_.reset(); + detail::restore_cuda_device(original_device_, current_device_); + } void set_device(Device device) { current_device_ = detail::normalize_cuda_device(device); - guard_.SetDevice(current_device_._PD_GetInner()); + guard_->SetDevice(current_device_._PD_GetInner()); } void reset_device(Device device) { set_device(device); } void set_index(DeviceIndex device_index) { current_device_ = Device(kCUDA, device_index); - guard_.SetDeviceIndex(device_index); + guard_->SetDeviceIndex(device_index); } Device original_device() const { return original_device_; } @@ -84,7 +101,7 @@ struct CUDAGuard { private: Device original_device_; Device current_device_; - paddle::platform::CUDADeviceGuard guard_; + std::optional guard_; }; struct OptionalCUDAGuard { @@ -107,7 +124,7 @@ struct OptionalCUDAGuard { OptionalCUDAGuard(OptionalCUDAGuard&& other) = delete; OptionalCUDAGuard& operator=(OptionalCUDAGuard&& other) = delete; - ~OptionalCUDAGuard() = default; + ~OptionalCUDAGuard() { reset(); } void set_device(Device device) { const Device normalized = detail::normalize_cuda_device(device); @@ -130,6 +147,7 @@ struct OptionalCUDAGuard { void reset() { guard_.reset(); + detail::restore_cuda_device(original_device_, current_device_); original_device_.reset(); current_device_.reset(); } diff --git a/test/cpp/compat/ATen_CUDAContext_test.cc b/test/cpp/compat/ATen_CUDAContext_test.cc index 24223733059b58..cd3e6771ce0428 100644 --- a/test/cpp/compat/ATen_CUDAContext_test.cc +++ b/test/cpp/compat/ATen_CUDAContext_test.cc @@ -89,14 +89,40 @@ TEST(CUDAFunctionsTest, TorchSynchronizePreservesCurrentDevice) { return; } - constexpr c10::DeviceIndex current_device = 0; - constexpr c10::DeviceIndex other_device = 1; - c10::cuda::CUDAGuard guard(current_device); + constexpr int current_device = 0; + constexpr int other_device = 1; + c10::cuda::CUDAGuard guard(static_cast(current_device)); ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), current_device); ASSERT_NO_THROW(torch::cuda::synchronize(other_device)); EXPECT_EQ(phi::backends::gpu::GetCurrentDeviceId(), current_device); } + +TEST(CUDAFunctionsTest, CUDAGuardRestoresOriginalDeviceAfterMultipleSwitches) { + if (!torch::cuda::is_available()) { + return; + } + if (torch::cuda::device_count() < 2) { + return; + } + + constexpr int original_device = 0; + constexpr int intermediate_device = 1; + phi::backends::gpu::SetDeviceId(original_device); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); + + { + c10::cuda::CUDAGuard guard( + static_cast(intermediate_device)); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), intermediate_device); + guard.set_index(static_cast(original_device)); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); + guard.set_index(static_cast(intermediate_device)); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), intermediate_device); + } + + EXPECT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); +} #endif // --------------------------------------------------------------------------- From 1ffad127f1831641c4f7808dd836063343e24311 Mon Sep 17 00:00:00 2001 From: youge325 Date: Thu, 23 Apr 2026 16:14:02 +0800 Subject: [PATCH 05/13] fix --- .../api/include/compat/c10/cuda/CUDAGuard.h | 61 +++++++++---------- test/cpp/compat/ATen_CUDAContext_test.cc | 54 ++++++++++++++++ 2 files changed, 82 insertions(+), 33 deletions(-) diff --git a/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h b/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h index 9efc278dc0c2df..c99578797f9fec 100644 --- a/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h +++ b/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h @@ -23,7 +23,7 @@ #include -#include "paddle/phi/core/platform/cuda_device_guard.h" +#include "paddle/phi/backends/gpu/gpu_info.h" namespace c10::cuda { @@ -39,20 +39,6 @@ inline Device normalize_cuda_device(Device device) { : current_cuda_device(); } -inline void restore_cuda_device(const Device& original_device, - const Device& current_device) { - if (original_device.index() != current_device.index()) { - phi::backends::gpu::SetDeviceId(static_cast(original_device.index())); - } -} - -inline void restore_cuda_device(const std::optional& original_device, - const std::optional& current_device) { - if (original_device.has_value() && current_device.has_value()) { - restore_cuda_device(*original_device, *current_device); - } -} - } // namespace detail struct CUDAGuard { @@ -60,15 +46,13 @@ struct CUDAGuard { explicit CUDAGuard(DeviceIndex device_index) : original_device_(detail::current_cuda_device()), - current_device_(original_device_), - guard_(std::in_place) { + current_device_(original_device_) { set_index(device_index); } explicit CUDAGuard(Device device) : original_device_(detail::current_cuda_device()), - current_device_(original_device_), - guard_(std::in_place) { + current_device_(original_device_) { set_device(device); } @@ -78,20 +62,27 @@ struct CUDAGuard { CUDAGuard(CUDAGuard&& other) = delete; CUDAGuard& operator=(CUDAGuard&& other) = delete; ~CUDAGuard() { - guard_.reset(); - detail::restore_cuda_device(original_device_, current_device_); + if (original_device_.index() != current_device_.index()) { + phi::backends::gpu::SetDeviceId( + static_cast(original_device_.index())); + } } void set_device(Device device) { - current_device_ = detail::normalize_cuda_device(device); - guard_->SetDevice(current_device_._PD_GetInner()); + const Device normalized = detail::normalize_cuda_device(device); + if (normalized.index() != current_device_.index()) { + phi::backends::gpu::SetDeviceId(static_cast(normalized.index())); + current_device_ = normalized; + } } void reset_device(Device device) { set_device(device); } void set_index(DeviceIndex device_index) { - current_device_ = Device(kCUDA, device_index); - guard_->SetDeviceIndex(device_index); + if (current_device_.index() != device_index) { + phi::backends::gpu::SetDeviceId(static_cast(device_index)); + current_device_ = Device(kCUDA, device_index); + } } Device original_device() const { return original_device_; } @@ -101,7 +92,6 @@ struct CUDAGuard { private: Device original_device_; Device current_device_; - std::optional guard_; }; struct OptionalCUDAGuard { @@ -129,7 +119,9 @@ struct OptionalCUDAGuard { void set_device(Device device) { const Device normalized = detail::normalize_cuda_device(device); init_if_needed(); - guard_->SetDevice(normalized._PD_GetInner()); + if (normalized.index() != current_device_->index()) { + phi::backends::gpu::SetDeviceId(static_cast(normalized.index())); + } current_device_ = normalized; } @@ -137,7 +129,9 @@ struct OptionalCUDAGuard { void set_index(DeviceIndex device_index) { init_if_needed(); - guard_->SetDeviceIndex(device_index); + if (device_index != current_device_->index()) { + phi::backends::gpu::SetDeviceId(static_cast(device_index)); + } current_device_ = Device(kCUDA, device_index); } @@ -146,24 +140,25 @@ struct OptionalCUDAGuard { std::optional current_device() const { return current_device_; } void reset() { - guard_.reset(); - detail::restore_cuda_device(original_device_, current_device_); + if (original_device_.has_value() && current_device_.has_value() && + original_device_->index() != current_device_->index()) { + phi::backends::gpu::SetDeviceId( + static_cast(original_device_->index())); + } original_device_.reset(); current_device_.reset(); } private: void init_if_needed() { - if (!guard_.has_value()) { + if (!original_device_.has_value()) { original_device_ = detail::current_cuda_device(); current_device_ = original_device_; - guard_.emplace(); } } std::optional original_device_; std::optional current_device_; - std::optional guard_; }; } // namespace c10::cuda diff --git a/test/cpp/compat/ATen_CUDAContext_test.cc b/test/cpp/compat/ATen_CUDAContext_test.cc index cd3e6771ce0428..dac0a8d87e48fa 100644 --- a/test/cpp/compat/ATen_CUDAContext_test.cc +++ b/test/cpp/compat/ATen_CUDAContext_test.cc @@ -123,6 +123,60 @@ TEST(CUDAFunctionsTest, CUDAGuardRestoresOriginalDeviceAfterMultipleSwitches) { EXPECT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); } + +TEST(CUDAFunctionsTest, + CUDAGuardRestoresOriginalDeviceAfterReturnToOriginalThenExit) { + if (!torch::cuda::is_available()) { + return; + } + if (torch::cuda::device_count() < 2) { + return; + } + + constexpr int original_device = 0; + constexpr int intermediate_device = 1; + phi::backends::gpu::SetDeviceId(original_device); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); + + { + c10::cuda::CUDAGuard guard( + static_cast(intermediate_device)); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), intermediate_device); + + guard.set_index(static_cast(original_device)); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); + } + + EXPECT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); +} + +TEST(CUDAFunctionsTest, + OptionalCUDAGuardResetRestoresOriginalDeviceAfterReturnToOriginal) { + if (!torch::cuda::is_available()) { + return; + } + if (torch::cuda::device_count() < 2) { + return; + } + + constexpr int original_device = 0; + constexpr int intermediate_device = 1; + phi::backends::gpu::SetDeviceId(original_device); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); + + c10::cuda::OptionalCUDAGuard guard; + guard.set_index(static_cast(intermediate_device)); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), intermediate_device); + + guard.set_index(static_cast(original_device)); + ASSERT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); + + guard.reset(); + + EXPECT_EQ(phi::backends::gpu::GetCurrentDeviceId(), original_device); + EXPECT_FALSE(guard.original_device().has_value()); + EXPECT_FALSE(guard.current_device().has_value()); +} #endif // --------------------------------------------------------------------------- From 99029cdacb9118cb1dedcfebb4aec0976c8b9d2d Mon Sep 17 00:00:00 2001 From: youge325 Date: Sat, 25 Apr 2026 17:20:49 +0800 Subject: [PATCH 06/13] Add ABI symbol compatibility check --- ci/static_check.sh | 17 ++ tools/check_abi_compatibility.py | 374 ++++++++++++++++++++++++++ tools/test_check_abi_compatibility.py | 188 +++++++++++++ 3 files changed, 579 insertions(+) create mode 100644 tools/check_abi_compatibility.py create mode 100644 tools/test_check_abi_compatibility.py diff --git a/ci/static_check.sh b/ci/static_check.sh index 9682a6ae48da47..98cb60220995c5 100644 --- a/ci/static_check.sh +++ b/ci/static_check.sh @@ -149,6 +149,21 @@ function exec_samplecode_checking() { fi } +function exec_abi_compatibility_check() { + if [ "$(uname -s)" != "Linux" ]; then + echo "Skip ABI compatibility check on non-Linux platform." + return + fi + + python ${PADDLE_ROOT}/tools/check_abi_compatibility.py \ + --base-wheel "${PADDLE_ROOT}/build/dev_whl/*.whl" \ + --pr-wheel "${PADDLE_ROOT}/build/pr_whl/*.whl" + abi_check_error=$? + if [ "$abi_check_error" != "0" ]; then + exit $abi_check_error + fi +} + export PATH=/usr/local/python3.10.0/bin:/usr/local/python3.10.0/include:/usr/local/bin:${PATH} echo "export PATH=${PATH}" >> ~/.bashrc export LD_LIBRARY_PATH=/usr/local/cuda-11.8/compat:$LD_LIBRARY_PATH @@ -158,6 +173,8 @@ ln -sf $(which python${PY_VERSION}) /usr/bin/python ln -sf $(which pip${PY_VERSION}) /usr/local/bin/pip mkdir -p /home/data/cfs/.ccache/static-check +exec_abi_compatibility_check + pip config set global.cache-dir "/home/data/cfs/.cache/pip" pip install --upgrade pip 1>nul pip install -r "${work_dir}/python/requirements.txt" 1>nul diff --git a/tools/check_abi_compatibility.py b/tools/check_abi_compatibility.py new file mode 100644 index 00000000000000..d7bc3497595590 --- /dev/null +++ b/tools/check_abi_compatibility.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python + +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Check Linux wheel ABI compatibility by comparing protected ELF symbols. + +The check is intentionally one-way: symbols added by a PR are allowed, while +protected symbols present in the base wheel must still exist in the PR wheel. +""" + +from __future__ import annotations + +import argparse +import glob +import os +import shutil +import subprocess +import sys +import tempfile +import zipfile +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterable + +WHEEL_LIBRARY_PATHS = ( + "paddle/base/libpaddle.so", + "paddle/libs/libphi.so", + "paddle/libs/libphi_core.so", + "paddle/libs/libphi_gpu.so", +) + +DEFINED_DYNAMIC_SYMBOL_TYPES = {"FUNC", "OBJECT"} + +PROTECTED_CXX_PREFIXES = ( + "phi::", + "paddle::", + "c10::", + "at::", + "torch::", +) + +PROTECTED_C_SYMBOL_PREFIXES = ( + "PD_", + "Paddle", + "PyInit_", + "paddle_", +) + +PROTECTED_MANGLED_CXX_PREFIXES = ( + "_ZN2at", + "_ZN3c10", + "_ZN3phi", + "_ZN5torch", + "_ZN6paddle", +) + + +@dataclass(frozen=True) +class DynamicSymbol: + name: str + symbol_type: str + bind: str + section: str + demangled_name: str + + +@dataclass(frozen=True) +class RemovedSymbol: + library: str + name: str + demangled_name: str + + +@dataclass(frozen=True) +class MissingLibrary: + library: str + + +def strip_elf_symbol_version(symbol_name: str) -> str: + if "@@" in symbol_name: + return symbol_name.split("@@", 1)[0] + if "@" in symbol_name: + return symbol_name.split("@", 1)[0] + return symbol_name + + +def parse_readelf_dynamic_symbols(readelf_output: str) -> list[DynamicSymbol]: + symbols = [] + for line in readelf_output.splitlines(): + fields = line.split() + if len(fields) < 8 or not fields[0].endswith(":"): + continue + symbol_type = fields[3] + bind = fields[4] + section = fields[6] + name = fields[7] + if ( + bind != "GLOBAL" + or section == "UND" + or symbol_type not in DEFINED_DYNAMIC_SYMBOL_TYPES + ): + continue + symbols.append( + DynamicSymbol( + name=name, + symbol_type=symbol_type, + bind=bind, + section=section, + demangled_name=strip_elf_symbol_version(name), + ) + ) + return symbols + + +def demangle_symbol_names(symbol_names: Iterable[str]) -> dict[str, str]: + unique_names = sorted( + {strip_elf_symbol_version(name) for name in symbol_names} + ) + if not unique_names: + return {} + cxxfilt = shutil.which("c++filt") + if cxxfilt is None: + return {name: name for name in unique_names} + + try: + result = subprocess.run( + [cxxfilt], + input="\n".join(unique_names), + text=True, + capture_output=True, + check=True, + ) + except (OSError, subprocess.CalledProcessError): + return {name: name for name in unique_names} + + demangled = result.stdout.splitlines() + if len(demangled) != len(unique_names): + return {name: name for name in unique_names} + return dict(zip(unique_names, demangled)) + + +def attach_demangled_names( + symbols: Iterable[DynamicSymbol], +) -> list[DynamicSymbol]: + symbol_list = list(symbols) + demangled_names = demangle_symbol_names( + symbol.name for symbol in symbol_list + ) + return [ + DynamicSymbol( + name=symbol.name, + symbol_type=symbol.symbol_type, + bind=symbol.bind, + section=symbol.section, + demangled_name=demangled_names.get( + strip_elf_symbol_version(symbol.name), symbol.demangled_name + ), + ) + for symbol in symbol_list + ] + + +def is_protected_paddle_abi_symbol(symbol: DynamicSymbol) -> bool: + demangled = symbol.demangled_name + if demangled.startswith(PROTECTED_CXX_PREFIXES): + return True + + raw_name = strip_elf_symbol_version(symbol.name) + return raw_name.startswith( + PROTECTED_C_SYMBOL_PREFIXES + PROTECTED_MANGLED_CXX_PREFIXES + ) + + +def protected_symbols_by_name( + symbols: Iterable[DynamicSymbol], +) -> dict[str, DynamicSymbol]: + return { + symbol.name: symbol + for symbol in symbols + if is_protected_paddle_abi_symbol(symbol) + } + + +def read_dynamic_symbols(library_path: str) -> list[DynamicSymbol]: + try: + result = subprocess.run( + ["readelf", "--dyn-syms", "-W", library_path], + text=True, + capture_output=True, + check=True, + ) + except FileNotFoundError as exc: + raise RuntimeError("readelf is required to check ABI symbols") from exc + except subprocess.CalledProcessError as exc: + raise RuntimeError( + f"Failed to read dynamic symbols from {library_path}:\n{exc.stderr}" + ) from exc + + return attach_demangled_names(parse_readelf_dynamic_symbols(result.stdout)) + + +def extract_wheel_libraries( + wheel_path: str, library_paths: Iterable[str], output_dir: str +) -> dict[str, str]: + extracted_libraries = {} + with zipfile.ZipFile(wheel_path) as wheel: + wheel_entries = set(wheel.namelist()) + for library_path in library_paths: + if library_path not in wheel_entries: + continue + extracted_path = wheel.extract(library_path, output_dir) + extracted_libraries[library_path] = extracted_path + return extracted_libraries + + +def compare_library_symbols( + library: str, + base_symbols: Iterable[DynamicSymbol] | None, + pr_symbols: Iterable[DynamicSymbol] | None, +) -> list[RemovedSymbol | MissingLibrary]: + if base_symbols is None: + return [] + if pr_symbols is None: + return [MissingLibrary(library=library)] + + base_protected_symbols = protected_symbols_by_name(base_symbols) + pr_protected_symbols = protected_symbols_by_name(pr_symbols) + removed_names = sorted( + set(base_protected_symbols) - set(pr_protected_symbols) + ) + return [ + RemovedSymbol( + library=library, + name=name, + demangled_name=base_protected_symbols[name].demangled_name, + ) + for name in removed_names + ] + + +def resolve_wheel_path(pattern: str, label: str) -> str: + matches = sorted(glob.glob(pattern)) + if len(matches) != 1: + raise RuntimeError( + f"Expected exactly one {label} wheel matching {pattern}, " + f"but found {len(matches)}: {matches}" + ) + return matches[0] + + +def compare_wheel_abi( + base_wheel: str, pr_wheel: str, library_paths: Iterable[str] +) -> list[RemovedSymbol | MissingLibrary]: + with tempfile.TemporaryDirectory(prefix="paddle_abi_check_") as temp_dir: + base_dir = os.path.join(temp_dir, "base") + pr_dir = os.path.join(temp_dir, "pr") + base_libraries = extract_wheel_libraries( + base_wheel, library_paths, base_dir + ) + pr_libraries = extract_wheel_libraries(pr_wheel, library_paths, pr_dir) + + issues: list[RemovedSymbol | MissingLibrary] = [] + for library in library_paths: + base_path = base_libraries.get(library) + pr_path = pr_libraries.get(library) + base_symbols = ( + read_dynamic_symbols(base_path) + if base_path is not None + else None + ) + pr_symbols = ( + read_dynamic_symbols(pr_path) if pr_path is not None else None + ) + issues.extend( + compare_library_symbols(library, base_symbols, pr_symbols) + ) + return issues + + +def format_issues( + issues: Iterable[RemovedSymbol | MissingLibrary], max_report: int +) -> str: + issue_list = list(issues) + lines = [ + "ABI compatibility check failed.", + "The PR wheel removed protected dynamic symbols that exist in the base " + "wheel. Removing these symbols can break downstream wheels or shared " + "libraries compiled against the base branch.", + "", + ] + for issue in issue_list[:max_report]: + if isinstance(issue, MissingLibrary): + lines.extend( + [ + f"Library: {issue.library}", + " PR wheel is missing this library, but the base wheel " + "contains it.", + "", + ] + ) + else: + lines.extend( + [ + f"Library: {issue.library}", + f" Raw symbol: {issue.name}", + f" Demangled: {issue.demangled_name}", + "", + ] + ) + + omitted_count = len(issue_list) - max_report + if omitted_count > 0: + lines.append(f"... omitted {omitted_count} additional removed symbols.") + return "\n".join(lines) + + +def parse_args(argv: list[str]) -> argparse.Namespace: + paddle_root = os.environ.get("PADDLE_ROOT", os.getcwd()) + parser = argparse.ArgumentParser( + description="Check Linux wheel ABI compatibility for Paddle symbols." + ) + parser.add_argument( + "--base-wheel", + default=os.path.join(paddle_root, "build/dev_whl/*.whl"), + help="Base branch wheel path or glob pattern.", + ) + parser.add_argument( + "--pr-wheel", + default=os.path.join(paddle_root, "build/pr_whl/*.whl"), + help="PR wheel path or glob pattern.", + ) + parser.add_argument( + "--max-report", + type=int, + default=200, + help="Maximum number of ABI issues to print.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(sys.argv[1:] if argv is None else argv) + try: + base_wheel = resolve_wheel_path(args.base_wheel, "base") + pr_wheel = resolve_wheel_path(args.pr_wheel, "PR") + issues = compare_wheel_abi(base_wheel, pr_wheel, WHEEL_LIBRARY_PATHS) + except RuntimeError as exc: + print(f"ABI compatibility check failed: {exc}", file=sys.stderr) + return 1 + + if issues: + print(format_issues(issues, args.max_report), file=sys.stderr) + return 1 + + print("ABI compatibility check passed.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_check_abi_compatibility.py b/tools/test_check_abi_compatibility.py new file mode 100644 index 00000000000000..5053fd65f17057 --- /dev/null +++ b/tools/test_check_abi_compatibility.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python + +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +try: + from check_abi_compatibility import ( + DynamicSymbol, + MissingLibrary, + RemovedSymbol, + compare_library_symbols, + is_protected_paddle_abi_symbol, + parse_readelf_dynamic_symbols, + ) +except ModuleNotFoundError: + from tools.check_abi_compatibility import ( + DynamicSymbol, + MissingLibrary, + RemovedSymbol, + compare_library_symbols, + is_protected_paddle_abi_symbol, + parse_readelf_dynamic_symbols, + ) + + +def make_symbol(name, demangled_name=None, bind="GLOBAL", section="12"): + return DynamicSymbol( + name=name, + symbol_type="FUNC", + bind=bind, + section=section, + demangled_name=demangled_name or name, + ) + + +class TestParseReadelfDynamicSymbols(unittest.TestCase): + def test_ignores_weak_undefined_and_local_symbols(self): + readelf_output = """ +Symbol table '.dynsym' contains 5 entries: + Num: Value Size Type Bind Vis Ndx Name + 1: 0000000000001000 42 FUNC GLOBAL DEFAULT 12 _ZN3c1017get_default_dtypeEv + 2: 0000000000001010 42 FUNC WEAK DEFAULT 12 _ZN3c104weakEv + 3: 0000000000000000 0 FUNC GLOBAL DEFAULT UND _ZN3c107missingEv + 4: 0000000000001020 42 FUNC LOCAL DEFAULT 12 _ZN3c105localEv + 5: 0000000000001030 8 OBJECT GLOBAL DEFAULT 13 _ZN3phi3barE +""" + symbols = parse_readelf_dynamic_symbols(readelf_output) + self.assertEqual( + [symbol.name for symbol in symbols], + ["_ZN3c1017get_default_dtypeEv", "_ZN3phi3barE"], + ) + + +class TestProtectedSymbols(unittest.TestCase): + def test_detects_protected_cxx_namespaces(self): + self.assertTrue( + is_protected_paddle_abi_symbol( + make_symbol( + "_ZN3c1017get_default_dtypeEv", + "c10::get_default_dtype()", + ) + ) + ) + self.assertTrue( + is_protected_paddle_abi_symbol( + make_symbol("_ZN3phi3barEv", "phi::bar()") + ) + ) + self.assertTrue( + is_protected_paddle_abi_symbol( + make_symbol("_ZN5torch4cuda11synchronizeEv") + ) + ) + + def test_detects_relevant_c_and_python_entrypoints(self): + self.assertTrue( + is_protected_paddle_abi_symbol(make_symbol("PyInit_libpaddle")) + ) + self.assertTrue( + is_protected_paddle_abi_symbol(make_symbol("PD_ConfigCreate")) + ) + + def test_ignores_third_party_symbols(self): + self.assertFalse( + is_protected_paddle_abi_symbol(make_symbol("XXH32", "XXH32")) + ) + self.assertFalse( + is_protected_paddle_abi_symbol( + make_symbol("_ZN4YAML7EmitterC1Ev", "YAML::Emitter::Emitter()") + ) + ) + + +class TestCompareLibrarySymbols(unittest.TestCase): + def test_added_symbols_do_not_fail(self): + base_symbols = [ + make_symbol( + "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" + ) + ] + pr_symbols = [ + *base_symbols, + make_symbol( + "_ZN3c1017set_default_dtypeEv", "c10::set_default_dtype()" + ), + ] + + issues = compare_library_symbols( + "paddle/libs/libphi_core.so", base_symbols, pr_symbols + ) + + self.assertEqual(issues, []) + + def test_removed_protected_symbol_fails(self): + base_symbols = [ + make_symbol( + "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" + ) + ] + + issues = compare_library_symbols( + "paddle/libs/libphi_core.so", base_symbols, [] + ) + + self.assertEqual( + issues, + [ + RemovedSymbol( + library="paddle/libs/libphi_core.so", + name="_ZN3c1017get_default_dtypeEv", + demangled_name="c10::get_default_dtype()", + ) + ], + ) + + def test_removed_third_party_symbol_does_not_fail(self): + base_symbols = [make_symbol("XXH32", "XXH32")] + + issues = compare_library_symbols( + "paddle/base/libpaddle.so", base_symbols, [] + ) + + self.assertEqual(issues, []) + + def test_missing_pr_library_fails_when_base_has_library(self): + base_symbols = [ + make_symbol( + "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" + ) + ] + + issues = compare_library_symbols( + "paddle/libs/libphi_core.so", base_symbols, None + ) + + self.assertEqual( + issues, [MissingLibrary(library="paddle/libs/libphi_core.so")] + ) + + def test_missing_base_library_does_not_fail(self): + pr_symbols = [ + make_symbol( + "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" + ) + ] + + issues = compare_library_symbols( + "paddle/libs/libphi_core.so", None, pr_symbols + ) + + self.assertEqual(issues, []) + + +if __name__ == "__main__": + unittest.main() From 7d054b18f22638b876c2d69138a9e1a92d5b7321 Mon Sep 17 00:00:00 2001 From: youge325 Date: Tue, 28 Apr 2026 16:14:53 +0800 Subject: [PATCH 07/13] Revert "Add ABI symbol compatibility check" This reverts commit 99029cdacb9118cb1dedcfebb4aec0976c8b9d2d. --- ci/static_check.sh | 17 -- tools/check_abi_compatibility.py | 374 -------------------------- tools/test_check_abi_compatibility.py | 188 ------------- 3 files changed, 579 deletions(-) delete mode 100644 tools/check_abi_compatibility.py delete mode 100644 tools/test_check_abi_compatibility.py diff --git a/ci/static_check.sh b/ci/static_check.sh index 98cb60220995c5..9682a6ae48da47 100644 --- a/ci/static_check.sh +++ b/ci/static_check.sh @@ -149,21 +149,6 @@ function exec_samplecode_checking() { fi } -function exec_abi_compatibility_check() { - if [ "$(uname -s)" != "Linux" ]; then - echo "Skip ABI compatibility check on non-Linux platform." - return - fi - - python ${PADDLE_ROOT}/tools/check_abi_compatibility.py \ - --base-wheel "${PADDLE_ROOT}/build/dev_whl/*.whl" \ - --pr-wheel "${PADDLE_ROOT}/build/pr_whl/*.whl" - abi_check_error=$? - if [ "$abi_check_error" != "0" ]; then - exit $abi_check_error - fi -} - export PATH=/usr/local/python3.10.0/bin:/usr/local/python3.10.0/include:/usr/local/bin:${PATH} echo "export PATH=${PATH}" >> ~/.bashrc export LD_LIBRARY_PATH=/usr/local/cuda-11.8/compat:$LD_LIBRARY_PATH @@ -173,8 +158,6 @@ ln -sf $(which python${PY_VERSION}) /usr/bin/python ln -sf $(which pip${PY_VERSION}) /usr/local/bin/pip mkdir -p /home/data/cfs/.ccache/static-check -exec_abi_compatibility_check - pip config set global.cache-dir "/home/data/cfs/.cache/pip" pip install --upgrade pip 1>nul pip install -r "${work_dir}/python/requirements.txt" 1>nul diff --git a/tools/check_abi_compatibility.py b/tools/check_abi_compatibility.py deleted file mode 100644 index d7bc3497595590..00000000000000 --- a/tools/check_abi_compatibility.py +++ /dev/null @@ -1,374 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Check Linux wheel ABI compatibility by comparing protected ELF symbols. - -The check is intentionally one-way: symbols added by a PR are allowed, while -protected symbols present in the base wheel must still exist in the PR wheel. -""" - -from __future__ import annotations - -import argparse -import glob -import os -import shutil -import subprocess -import sys -import tempfile -import zipfile -from dataclasses import dataclass -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Iterable - -WHEEL_LIBRARY_PATHS = ( - "paddle/base/libpaddle.so", - "paddle/libs/libphi.so", - "paddle/libs/libphi_core.so", - "paddle/libs/libphi_gpu.so", -) - -DEFINED_DYNAMIC_SYMBOL_TYPES = {"FUNC", "OBJECT"} - -PROTECTED_CXX_PREFIXES = ( - "phi::", - "paddle::", - "c10::", - "at::", - "torch::", -) - -PROTECTED_C_SYMBOL_PREFIXES = ( - "PD_", - "Paddle", - "PyInit_", - "paddle_", -) - -PROTECTED_MANGLED_CXX_PREFIXES = ( - "_ZN2at", - "_ZN3c10", - "_ZN3phi", - "_ZN5torch", - "_ZN6paddle", -) - - -@dataclass(frozen=True) -class DynamicSymbol: - name: str - symbol_type: str - bind: str - section: str - demangled_name: str - - -@dataclass(frozen=True) -class RemovedSymbol: - library: str - name: str - demangled_name: str - - -@dataclass(frozen=True) -class MissingLibrary: - library: str - - -def strip_elf_symbol_version(symbol_name: str) -> str: - if "@@" in symbol_name: - return symbol_name.split("@@", 1)[0] - if "@" in symbol_name: - return symbol_name.split("@", 1)[0] - return symbol_name - - -def parse_readelf_dynamic_symbols(readelf_output: str) -> list[DynamicSymbol]: - symbols = [] - for line in readelf_output.splitlines(): - fields = line.split() - if len(fields) < 8 or not fields[0].endswith(":"): - continue - symbol_type = fields[3] - bind = fields[4] - section = fields[6] - name = fields[7] - if ( - bind != "GLOBAL" - or section == "UND" - or symbol_type not in DEFINED_DYNAMIC_SYMBOL_TYPES - ): - continue - symbols.append( - DynamicSymbol( - name=name, - symbol_type=symbol_type, - bind=bind, - section=section, - demangled_name=strip_elf_symbol_version(name), - ) - ) - return symbols - - -def demangle_symbol_names(symbol_names: Iterable[str]) -> dict[str, str]: - unique_names = sorted( - {strip_elf_symbol_version(name) for name in symbol_names} - ) - if not unique_names: - return {} - cxxfilt = shutil.which("c++filt") - if cxxfilt is None: - return {name: name for name in unique_names} - - try: - result = subprocess.run( - [cxxfilt], - input="\n".join(unique_names), - text=True, - capture_output=True, - check=True, - ) - except (OSError, subprocess.CalledProcessError): - return {name: name for name in unique_names} - - demangled = result.stdout.splitlines() - if len(demangled) != len(unique_names): - return {name: name for name in unique_names} - return dict(zip(unique_names, demangled)) - - -def attach_demangled_names( - symbols: Iterable[DynamicSymbol], -) -> list[DynamicSymbol]: - symbol_list = list(symbols) - demangled_names = demangle_symbol_names( - symbol.name for symbol in symbol_list - ) - return [ - DynamicSymbol( - name=symbol.name, - symbol_type=symbol.symbol_type, - bind=symbol.bind, - section=symbol.section, - demangled_name=demangled_names.get( - strip_elf_symbol_version(symbol.name), symbol.demangled_name - ), - ) - for symbol in symbol_list - ] - - -def is_protected_paddle_abi_symbol(symbol: DynamicSymbol) -> bool: - demangled = symbol.demangled_name - if demangled.startswith(PROTECTED_CXX_PREFIXES): - return True - - raw_name = strip_elf_symbol_version(symbol.name) - return raw_name.startswith( - PROTECTED_C_SYMBOL_PREFIXES + PROTECTED_MANGLED_CXX_PREFIXES - ) - - -def protected_symbols_by_name( - symbols: Iterable[DynamicSymbol], -) -> dict[str, DynamicSymbol]: - return { - symbol.name: symbol - for symbol in symbols - if is_protected_paddle_abi_symbol(symbol) - } - - -def read_dynamic_symbols(library_path: str) -> list[DynamicSymbol]: - try: - result = subprocess.run( - ["readelf", "--dyn-syms", "-W", library_path], - text=True, - capture_output=True, - check=True, - ) - except FileNotFoundError as exc: - raise RuntimeError("readelf is required to check ABI symbols") from exc - except subprocess.CalledProcessError as exc: - raise RuntimeError( - f"Failed to read dynamic symbols from {library_path}:\n{exc.stderr}" - ) from exc - - return attach_demangled_names(parse_readelf_dynamic_symbols(result.stdout)) - - -def extract_wheel_libraries( - wheel_path: str, library_paths: Iterable[str], output_dir: str -) -> dict[str, str]: - extracted_libraries = {} - with zipfile.ZipFile(wheel_path) as wheel: - wheel_entries = set(wheel.namelist()) - for library_path in library_paths: - if library_path not in wheel_entries: - continue - extracted_path = wheel.extract(library_path, output_dir) - extracted_libraries[library_path] = extracted_path - return extracted_libraries - - -def compare_library_symbols( - library: str, - base_symbols: Iterable[DynamicSymbol] | None, - pr_symbols: Iterable[DynamicSymbol] | None, -) -> list[RemovedSymbol | MissingLibrary]: - if base_symbols is None: - return [] - if pr_symbols is None: - return [MissingLibrary(library=library)] - - base_protected_symbols = protected_symbols_by_name(base_symbols) - pr_protected_symbols = protected_symbols_by_name(pr_symbols) - removed_names = sorted( - set(base_protected_symbols) - set(pr_protected_symbols) - ) - return [ - RemovedSymbol( - library=library, - name=name, - demangled_name=base_protected_symbols[name].demangled_name, - ) - for name in removed_names - ] - - -def resolve_wheel_path(pattern: str, label: str) -> str: - matches = sorted(glob.glob(pattern)) - if len(matches) != 1: - raise RuntimeError( - f"Expected exactly one {label} wheel matching {pattern}, " - f"but found {len(matches)}: {matches}" - ) - return matches[0] - - -def compare_wheel_abi( - base_wheel: str, pr_wheel: str, library_paths: Iterable[str] -) -> list[RemovedSymbol | MissingLibrary]: - with tempfile.TemporaryDirectory(prefix="paddle_abi_check_") as temp_dir: - base_dir = os.path.join(temp_dir, "base") - pr_dir = os.path.join(temp_dir, "pr") - base_libraries = extract_wheel_libraries( - base_wheel, library_paths, base_dir - ) - pr_libraries = extract_wheel_libraries(pr_wheel, library_paths, pr_dir) - - issues: list[RemovedSymbol | MissingLibrary] = [] - for library in library_paths: - base_path = base_libraries.get(library) - pr_path = pr_libraries.get(library) - base_symbols = ( - read_dynamic_symbols(base_path) - if base_path is not None - else None - ) - pr_symbols = ( - read_dynamic_symbols(pr_path) if pr_path is not None else None - ) - issues.extend( - compare_library_symbols(library, base_symbols, pr_symbols) - ) - return issues - - -def format_issues( - issues: Iterable[RemovedSymbol | MissingLibrary], max_report: int -) -> str: - issue_list = list(issues) - lines = [ - "ABI compatibility check failed.", - "The PR wheel removed protected dynamic symbols that exist in the base " - "wheel. Removing these symbols can break downstream wheels or shared " - "libraries compiled against the base branch.", - "", - ] - for issue in issue_list[:max_report]: - if isinstance(issue, MissingLibrary): - lines.extend( - [ - f"Library: {issue.library}", - " PR wheel is missing this library, but the base wheel " - "contains it.", - "", - ] - ) - else: - lines.extend( - [ - f"Library: {issue.library}", - f" Raw symbol: {issue.name}", - f" Demangled: {issue.demangled_name}", - "", - ] - ) - - omitted_count = len(issue_list) - max_report - if omitted_count > 0: - lines.append(f"... omitted {omitted_count} additional removed symbols.") - return "\n".join(lines) - - -def parse_args(argv: list[str]) -> argparse.Namespace: - paddle_root = os.environ.get("PADDLE_ROOT", os.getcwd()) - parser = argparse.ArgumentParser( - description="Check Linux wheel ABI compatibility for Paddle symbols." - ) - parser.add_argument( - "--base-wheel", - default=os.path.join(paddle_root, "build/dev_whl/*.whl"), - help="Base branch wheel path or glob pattern.", - ) - parser.add_argument( - "--pr-wheel", - default=os.path.join(paddle_root, "build/pr_whl/*.whl"), - help="PR wheel path or glob pattern.", - ) - parser.add_argument( - "--max-report", - type=int, - default=200, - help="Maximum number of ABI issues to print.", - ) - return parser.parse_args(argv) - - -def main(argv: list[str] | None = None) -> int: - args = parse_args(sys.argv[1:] if argv is None else argv) - try: - base_wheel = resolve_wheel_path(args.base_wheel, "base") - pr_wheel = resolve_wheel_path(args.pr_wheel, "PR") - issues = compare_wheel_abi(base_wheel, pr_wheel, WHEEL_LIBRARY_PATHS) - except RuntimeError as exc: - print(f"ABI compatibility check failed: {exc}", file=sys.stderr) - return 1 - - if issues: - print(format_issues(issues, args.max_report), file=sys.stderr) - return 1 - - print("ABI compatibility check passed.") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tools/test_check_abi_compatibility.py b/tools/test_check_abi_compatibility.py deleted file mode 100644 index 5053fd65f17057..00000000000000 --- a/tools/test_check_abi_compatibility.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -try: - from check_abi_compatibility import ( - DynamicSymbol, - MissingLibrary, - RemovedSymbol, - compare_library_symbols, - is_protected_paddle_abi_symbol, - parse_readelf_dynamic_symbols, - ) -except ModuleNotFoundError: - from tools.check_abi_compatibility import ( - DynamicSymbol, - MissingLibrary, - RemovedSymbol, - compare_library_symbols, - is_protected_paddle_abi_symbol, - parse_readelf_dynamic_symbols, - ) - - -def make_symbol(name, demangled_name=None, bind="GLOBAL", section="12"): - return DynamicSymbol( - name=name, - symbol_type="FUNC", - bind=bind, - section=section, - demangled_name=demangled_name or name, - ) - - -class TestParseReadelfDynamicSymbols(unittest.TestCase): - def test_ignores_weak_undefined_and_local_symbols(self): - readelf_output = """ -Symbol table '.dynsym' contains 5 entries: - Num: Value Size Type Bind Vis Ndx Name - 1: 0000000000001000 42 FUNC GLOBAL DEFAULT 12 _ZN3c1017get_default_dtypeEv - 2: 0000000000001010 42 FUNC WEAK DEFAULT 12 _ZN3c104weakEv - 3: 0000000000000000 0 FUNC GLOBAL DEFAULT UND _ZN3c107missingEv - 4: 0000000000001020 42 FUNC LOCAL DEFAULT 12 _ZN3c105localEv - 5: 0000000000001030 8 OBJECT GLOBAL DEFAULT 13 _ZN3phi3barE -""" - symbols = parse_readelf_dynamic_symbols(readelf_output) - self.assertEqual( - [symbol.name for symbol in symbols], - ["_ZN3c1017get_default_dtypeEv", "_ZN3phi3barE"], - ) - - -class TestProtectedSymbols(unittest.TestCase): - def test_detects_protected_cxx_namespaces(self): - self.assertTrue( - is_protected_paddle_abi_symbol( - make_symbol( - "_ZN3c1017get_default_dtypeEv", - "c10::get_default_dtype()", - ) - ) - ) - self.assertTrue( - is_protected_paddle_abi_symbol( - make_symbol("_ZN3phi3barEv", "phi::bar()") - ) - ) - self.assertTrue( - is_protected_paddle_abi_symbol( - make_symbol("_ZN5torch4cuda11synchronizeEv") - ) - ) - - def test_detects_relevant_c_and_python_entrypoints(self): - self.assertTrue( - is_protected_paddle_abi_symbol(make_symbol("PyInit_libpaddle")) - ) - self.assertTrue( - is_protected_paddle_abi_symbol(make_symbol("PD_ConfigCreate")) - ) - - def test_ignores_third_party_symbols(self): - self.assertFalse( - is_protected_paddle_abi_symbol(make_symbol("XXH32", "XXH32")) - ) - self.assertFalse( - is_protected_paddle_abi_symbol( - make_symbol("_ZN4YAML7EmitterC1Ev", "YAML::Emitter::Emitter()") - ) - ) - - -class TestCompareLibrarySymbols(unittest.TestCase): - def test_added_symbols_do_not_fail(self): - base_symbols = [ - make_symbol( - "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" - ) - ] - pr_symbols = [ - *base_symbols, - make_symbol( - "_ZN3c1017set_default_dtypeEv", "c10::set_default_dtype()" - ), - ] - - issues = compare_library_symbols( - "paddle/libs/libphi_core.so", base_symbols, pr_symbols - ) - - self.assertEqual(issues, []) - - def test_removed_protected_symbol_fails(self): - base_symbols = [ - make_symbol( - "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" - ) - ] - - issues = compare_library_symbols( - "paddle/libs/libphi_core.so", base_symbols, [] - ) - - self.assertEqual( - issues, - [ - RemovedSymbol( - library="paddle/libs/libphi_core.so", - name="_ZN3c1017get_default_dtypeEv", - demangled_name="c10::get_default_dtype()", - ) - ], - ) - - def test_removed_third_party_symbol_does_not_fail(self): - base_symbols = [make_symbol("XXH32", "XXH32")] - - issues = compare_library_symbols( - "paddle/base/libpaddle.so", base_symbols, [] - ) - - self.assertEqual(issues, []) - - def test_missing_pr_library_fails_when_base_has_library(self): - base_symbols = [ - make_symbol( - "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" - ) - ] - - issues = compare_library_symbols( - "paddle/libs/libphi_core.so", base_symbols, None - ) - - self.assertEqual( - issues, [MissingLibrary(library="paddle/libs/libphi_core.so")] - ) - - def test_missing_base_library_does_not_fail(self): - pr_symbols = [ - make_symbol( - "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" - ) - ] - - issues = compare_library_symbols( - "paddle/libs/libphi_core.so", None, pr_symbols - ) - - self.assertEqual(issues, []) - - -if __name__ == "__main__": - unittest.main() From bc8bc559fece4c01d3e637e81f8a88072d090a62 Mon Sep 17 00:00:00 2001 From: youge325 Date: Thu, 30 Apr 2026 16:49:16 +0800 Subject: [PATCH 08/13] fix --- paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h | 12 ++++++------ .../compat/torch/csrc/api/include/torch/cuda.cpp | 7 ++++--- test/cpp/compat/ATen_CUDAContext_test.cc | 7 +++++++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h b/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h index c99578797f9fec..e9087aab1d87a8 100644 --- a/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h +++ b/paddle/phi/api/include/compat/c10/cuda/CUDAGuard.h @@ -62,10 +62,9 @@ struct CUDAGuard { CUDAGuard(CUDAGuard&& other) = delete; CUDAGuard& operator=(CUDAGuard&& other) = delete; ~CUDAGuard() { - if (original_device_.index() != current_device_.index()) { - phi::backends::gpu::SetDeviceId( - static_cast(original_device_.index())); - } + // Always restore to original_device_ to handle cases where the device + // was changed outside of this guard, matching PyTorch semantics. + phi::backends::gpu::SetDeviceId(static_cast(original_device_.index())); } void set_device(Device device) { @@ -140,8 +139,9 @@ struct OptionalCUDAGuard { std::optional current_device() const { return current_device_; } void reset() { - if (original_device_.has_value() && current_device_.has_value() && - original_device_->index() != current_device_->index()) { + if (original_device_.has_value()) { + // Always restore to original_device_ to handle external device changes. + // This matches PyTorch OptionalDeviceGuard semantics. phi::backends::gpu::SetDeviceId( static_cast(original_device_->index())); } diff --git a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp index fe6fa4c4be1f47..8e955aedc08190 100644 --- a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp +++ b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp @@ -39,9 +39,10 @@ bool is_available() { return cuda::device_count() > 0; } void synchronize(int64_t device_index) { TORCH_CHECK(is_available(), "No CUDA GPUs are available"); auto num_gpus = cuda::device_count(); - TORCH_CHECK(device_index < 0 || device_index < num_gpus, - "Device index out of range: ", - device_index); + TORCH_CHECK( + device_index == -1 || (device_index >= 0 && device_index < num_gpus), + "Device index out of range: ", + device_index); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) // Match PyTorch semantics: // 1. `device_index == -1` means "current CUDA device". diff --git a/test/cpp/compat/ATen_CUDAContext_test.cc b/test/cpp/compat/ATen_CUDAContext_test.cc index dac0a8d87e48fa..a8373bb99f24da 100644 --- a/test/cpp/compat/ATen_CUDAContext_test.cc +++ b/test/cpp/compat/ATen_CUDAContext_test.cc @@ -98,6 +98,13 @@ TEST(CUDAFunctionsTest, TorchSynchronizePreservesCurrentDevice) { EXPECT_EQ(phi::backends::gpu::GetCurrentDeviceId(), current_device); } +TEST(CUDAFunctionsTest, SynchronizeRejectsInvalidNegativeDevice) { + if (!torch::cuda::is_available()) { + return; + } + ASSERT_THROW(torch::cuda::synchronize(-2), std::exception); +} + TEST(CUDAFunctionsTest, CUDAGuardRestoresOriginalDeviceAfterMultipleSwitches) { if (!torch::cuda::is_available()) { return; From 7c31b5d5800276d07c458d965d0ec04e9294b1f1 Mon Sep 17 00:00:00 2001 From: youge325 Date: Fri, 1 May 2026 21:09:03 +0800 Subject: [PATCH 09/13] fix(cuda): align device_count CPU-only with PyTorch In CPU-only builds, c10::cuda::device_count() / torch::cuda::device_count() previously threw "Cannot visit device count" via PADDLE_THROW. This made is_available() unsafe to call and caused synchronize() to surface the wrong error message. Match PyTorch semantics: return 0 in CPU-only builds so that is_available() returns false and synchronize() falls through the existing TORCH_CHECK(is_available(), "No CUDA GPUs are available") guard. The unreachable #else PADDLE_THROW branch in synchronize() is removed. Adds three CPU-only regression tests: - DeviceCountReturnsZeroInCpuOnly - IsAvailableFalseAndNoThrowInCpuOnly - SynchronizeReportsNoGpuMessageInCpuOnly Addresses Copilot review comment 3168115261. --- .../torch/csrc/api/include/torch/cuda.cpp | 10 ++-- test/cpp/compat/ATen_CUDAContext_test.cc | 48 +++++++++++++++++++ 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp index 8e955aedc08190..95efa28a63ad7a 100644 --- a/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp +++ b/paddle/phi/api/include/compat/torch/csrc/api/include/torch/cuda.cpp @@ -29,8 +29,10 @@ c10::DeviceIndex device_count() { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) return phi::backends::gpu::GetGPUDeviceCount(); #else - PADDLE_THROW(common::errors::Unavailable( - "Paddle is not compiled with CUDA. Cannot visit device count.")); + // Match PyTorch c10::cuda::device_count(): return 0 in CPU-only builds so + // that is_available() and the pre-checks of synchronize() degrade gracefully + // through a single, consistent "No CUDA GPUs are available" error path. + return 0; #endif } @@ -51,10 +53,8 @@ void synchronize(int64_t device_index) { const c10::cuda::CUDAGuard device_guard(c10::Device( c10::DeviceType::CUDA, static_cast(device_index))); c10::cuda::device_synchronize(); -#else - PADDLE_THROW(common::errors::Unavailable( - "Paddle is not compiled with CUDA. Cannot visit device synchronize.")); #endif + // CPU-only builds are already rejected above by the is_available() check. } } // namespace torch::cuda diff --git a/test/cpp/compat/ATen_CUDAContext_test.cc b/test/cpp/compat/ATen_CUDAContext_test.cc index a8373bb99f24da..642698cac2c923 100644 --- a/test/cpp/compat/ATen_CUDAContext_test.cc +++ b/test/cpp/compat/ATen_CUDAContext_test.cc @@ -59,6 +59,27 @@ TEST(CUDAFunctionsTest, DeviceSynchronize) { #endif } +// CPU-only: torch::cuda::synchronize must report "No CUDA GPUs are available" +// rather than the older "Cannot visit device count" produced by device_count(). +// Matches PyTorch behavior where device_count() returns 0 in CPU-only builds +// and the synchronize() pre-check is the single source of the GPU-missing +// error message. +TEST(CUDAFunctionsTest, SynchronizeReportsNoGpuMessageInCpuOnly) { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + // Only relevant in CPU-only builds + return; +#else + try { + torch::cuda::synchronize(); + FAIL() << "expected exception"; + } catch (const std::exception& e) { + const std::string msg = e.what(); + EXPECT_NE(msg.find("No CUDA GPUs are available"), std::string::npos) << msg; + EXPECT_EQ(msg.find("Cannot visit device count"), std::string::npos) << msg; + } +#endif +} + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) TEST(CUDAFunctionsTest, StreamSynchronize) { if (!at::cuda::is_available()) { @@ -217,6 +238,33 @@ TEST(CUDAContextLightTest, GetNumGPUs) { #endif } +// CPU-only: device_count() must return 0 instead of throwing, matching the +// PyTorch contract that device_count() is a non-throwing query. +TEST(CUDAContextLightTest, DeviceCountReturnsZeroInCpuOnly) { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + // Only relevant in CPU-only builds + return; +#else + ASSERT_NO_THROW({ + EXPECT_EQ(c10::cuda::device_count(), 0); + EXPECT_EQ(torch::cuda::device_count(), 0); + }); +#endif +} + +// CPU-only: is_available() must be false and not throw, matching PyTorch. +TEST(CUDAContextLightTest, IsAvailableFalseAndNoThrowInCpuOnly) { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + // Only relevant in CPU-only builds + return; +#else + ASSERT_NO_THROW({ + EXPECT_FALSE(at::cuda::is_available()); + EXPECT_FALSE(torch::cuda::is_available()); + }); +#endif +} + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) // The following tests require CUDA runtime and can only run in CUDA builds From d0bcc46f49ce5979498a951b30de691dbeb037b7 Mon Sep 17 00:00:00 2001 From: youge325 Date: Sat, 25 Apr 2026 17:20:49 +0800 Subject: [PATCH 10/13] Add ABI symbol compatibility check --- ci/static_check.sh | 17 ++ tools/check_abi_compatibility.py | 374 ++++++++++++++++++++++++++ tools/test_check_abi_compatibility.py | 188 +++++++++++++ 3 files changed, 579 insertions(+) create mode 100644 tools/check_abi_compatibility.py create mode 100644 tools/test_check_abi_compatibility.py diff --git a/ci/static_check.sh b/ci/static_check.sh index 9682a6ae48da47..98cb60220995c5 100644 --- a/ci/static_check.sh +++ b/ci/static_check.sh @@ -149,6 +149,21 @@ function exec_samplecode_checking() { fi } +function exec_abi_compatibility_check() { + if [ "$(uname -s)" != "Linux" ]; then + echo "Skip ABI compatibility check on non-Linux platform." + return + fi + + python ${PADDLE_ROOT}/tools/check_abi_compatibility.py \ + --base-wheel "${PADDLE_ROOT}/build/dev_whl/*.whl" \ + --pr-wheel "${PADDLE_ROOT}/build/pr_whl/*.whl" + abi_check_error=$? + if [ "$abi_check_error" != "0" ]; then + exit $abi_check_error + fi +} + export PATH=/usr/local/python3.10.0/bin:/usr/local/python3.10.0/include:/usr/local/bin:${PATH} echo "export PATH=${PATH}" >> ~/.bashrc export LD_LIBRARY_PATH=/usr/local/cuda-11.8/compat:$LD_LIBRARY_PATH @@ -158,6 +173,8 @@ ln -sf $(which python${PY_VERSION}) /usr/bin/python ln -sf $(which pip${PY_VERSION}) /usr/local/bin/pip mkdir -p /home/data/cfs/.ccache/static-check +exec_abi_compatibility_check + pip config set global.cache-dir "/home/data/cfs/.cache/pip" pip install --upgrade pip 1>nul pip install -r "${work_dir}/python/requirements.txt" 1>nul diff --git a/tools/check_abi_compatibility.py b/tools/check_abi_compatibility.py new file mode 100644 index 00000000000000..d7bc3497595590 --- /dev/null +++ b/tools/check_abi_compatibility.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python + +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Check Linux wheel ABI compatibility by comparing protected ELF symbols. + +The check is intentionally one-way: symbols added by a PR are allowed, while +protected symbols present in the base wheel must still exist in the PR wheel. +""" + +from __future__ import annotations + +import argparse +import glob +import os +import shutil +import subprocess +import sys +import tempfile +import zipfile +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterable + +WHEEL_LIBRARY_PATHS = ( + "paddle/base/libpaddle.so", + "paddle/libs/libphi.so", + "paddle/libs/libphi_core.so", + "paddle/libs/libphi_gpu.so", +) + +DEFINED_DYNAMIC_SYMBOL_TYPES = {"FUNC", "OBJECT"} + +PROTECTED_CXX_PREFIXES = ( + "phi::", + "paddle::", + "c10::", + "at::", + "torch::", +) + +PROTECTED_C_SYMBOL_PREFIXES = ( + "PD_", + "Paddle", + "PyInit_", + "paddle_", +) + +PROTECTED_MANGLED_CXX_PREFIXES = ( + "_ZN2at", + "_ZN3c10", + "_ZN3phi", + "_ZN5torch", + "_ZN6paddle", +) + + +@dataclass(frozen=True) +class DynamicSymbol: + name: str + symbol_type: str + bind: str + section: str + demangled_name: str + + +@dataclass(frozen=True) +class RemovedSymbol: + library: str + name: str + demangled_name: str + + +@dataclass(frozen=True) +class MissingLibrary: + library: str + + +def strip_elf_symbol_version(symbol_name: str) -> str: + if "@@" in symbol_name: + return symbol_name.split("@@", 1)[0] + if "@" in symbol_name: + return symbol_name.split("@", 1)[0] + return symbol_name + + +def parse_readelf_dynamic_symbols(readelf_output: str) -> list[DynamicSymbol]: + symbols = [] + for line in readelf_output.splitlines(): + fields = line.split() + if len(fields) < 8 or not fields[0].endswith(":"): + continue + symbol_type = fields[3] + bind = fields[4] + section = fields[6] + name = fields[7] + if ( + bind != "GLOBAL" + or section == "UND" + or symbol_type not in DEFINED_DYNAMIC_SYMBOL_TYPES + ): + continue + symbols.append( + DynamicSymbol( + name=name, + symbol_type=symbol_type, + bind=bind, + section=section, + demangled_name=strip_elf_symbol_version(name), + ) + ) + return symbols + + +def demangle_symbol_names(symbol_names: Iterable[str]) -> dict[str, str]: + unique_names = sorted( + {strip_elf_symbol_version(name) for name in symbol_names} + ) + if not unique_names: + return {} + cxxfilt = shutil.which("c++filt") + if cxxfilt is None: + return {name: name for name in unique_names} + + try: + result = subprocess.run( + [cxxfilt], + input="\n".join(unique_names), + text=True, + capture_output=True, + check=True, + ) + except (OSError, subprocess.CalledProcessError): + return {name: name for name in unique_names} + + demangled = result.stdout.splitlines() + if len(demangled) != len(unique_names): + return {name: name for name in unique_names} + return dict(zip(unique_names, demangled)) + + +def attach_demangled_names( + symbols: Iterable[DynamicSymbol], +) -> list[DynamicSymbol]: + symbol_list = list(symbols) + demangled_names = demangle_symbol_names( + symbol.name for symbol in symbol_list + ) + return [ + DynamicSymbol( + name=symbol.name, + symbol_type=symbol.symbol_type, + bind=symbol.bind, + section=symbol.section, + demangled_name=demangled_names.get( + strip_elf_symbol_version(symbol.name), symbol.demangled_name + ), + ) + for symbol in symbol_list + ] + + +def is_protected_paddle_abi_symbol(symbol: DynamicSymbol) -> bool: + demangled = symbol.demangled_name + if demangled.startswith(PROTECTED_CXX_PREFIXES): + return True + + raw_name = strip_elf_symbol_version(symbol.name) + return raw_name.startswith( + PROTECTED_C_SYMBOL_PREFIXES + PROTECTED_MANGLED_CXX_PREFIXES + ) + + +def protected_symbols_by_name( + symbols: Iterable[DynamicSymbol], +) -> dict[str, DynamicSymbol]: + return { + symbol.name: symbol + for symbol in symbols + if is_protected_paddle_abi_symbol(symbol) + } + + +def read_dynamic_symbols(library_path: str) -> list[DynamicSymbol]: + try: + result = subprocess.run( + ["readelf", "--dyn-syms", "-W", library_path], + text=True, + capture_output=True, + check=True, + ) + except FileNotFoundError as exc: + raise RuntimeError("readelf is required to check ABI symbols") from exc + except subprocess.CalledProcessError as exc: + raise RuntimeError( + f"Failed to read dynamic symbols from {library_path}:\n{exc.stderr}" + ) from exc + + return attach_demangled_names(parse_readelf_dynamic_symbols(result.stdout)) + + +def extract_wheel_libraries( + wheel_path: str, library_paths: Iterable[str], output_dir: str +) -> dict[str, str]: + extracted_libraries = {} + with zipfile.ZipFile(wheel_path) as wheel: + wheel_entries = set(wheel.namelist()) + for library_path in library_paths: + if library_path not in wheel_entries: + continue + extracted_path = wheel.extract(library_path, output_dir) + extracted_libraries[library_path] = extracted_path + return extracted_libraries + + +def compare_library_symbols( + library: str, + base_symbols: Iterable[DynamicSymbol] | None, + pr_symbols: Iterable[DynamicSymbol] | None, +) -> list[RemovedSymbol | MissingLibrary]: + if base_symbols is None: + return [] + if pr_symbols is None: + return [MissingLibrary(library=library)] + + base_protected_symbols = protected_symbols_by_name(base_symbols) + pr_protected_symbols = protected_symbols_by_name(pr_symbols) + removed_names = sorted( + set(base_protected_symbols) - set(pr_protected_symbols) + ) + return [ + RemovedSymbol( + library=library, + name=name, + demangled_name=base_protected_symbols[name].demangled_name, + ) + for name in removed_names + ] + + +def resolve_wheel_path(pattern: str, label: str) -> str: + matches = sorted(glob.glob(pattern)) + if len(matches) != 1: + raise RuntimeError( + f"Expected exactly one {label} wheel matching {pattern}, " + f"but found {len(matches)}: {matches}" + ) + return matches[0] + + +def compare_wheel_abi( + base_wheel: str, pr_wheel: str, library_paths: Iterable[str] +) -> list[RemovedSymbol | MissingLibrary]: + with tempfile.TemporaryDirectory(prefix="paddle_abi_check_") as temp_dir: + base_dir = os.path.join(temp_dir, "base") + pr_dir = os.path.join(temp_dir, "pr") + base_libraries = extract_wheel_libraries( + base_wheel, library_paths, base_dir + ) + pr_libraries = extract_wheel_libraries(pr_wheel, library_paths, pr_dir) + + issues: list[RemovedSymbol | MissingLibrary] = [] + for library in library_paths: + base_path = base_libraries.get(library) + pr_path = pr_libraries.get(library) + base_symbols = ( + read_dynamic_symbols(base_path) + if base_path is not None + else None + ) + pr_symbols = ( + read_dynamic_symbols(pr_path) if pr_path is not None else None + ) + issues.extend( + compare_library_symbols(library, base_symbols, pr_symbols) + ) + return issues + + +def format_issues( + issues: Iterable[RemovedSymbol | MissingLibrary], max_report: int +) -> str: + issue_list = list(issues) + lines = [ + "ABI compatibility check failed.", + "The PR wheel removed protected dynamic symbols that exist in the base " + "wheel. Removing these symbols can break downstream wheels or shared " + "libraries compiled against the base branch.", + "", + ] + for issue in issue_list[:max_report]: + if isinstance(issue, MissingLibrary): + lines.extend( + [ + f"Library: {issue.library}", + " PR wheel is missing this library, but the base wheel " + "contains it.", + "", + ] + ) + else: + lines.extend( + [ + f"Library: {issue.library}", + f" Raw symbol: {issue.name}", + f" Demangled: {issue.demangled_name}", + "", + ] + ) + + omitted_count = len(issue_list) - max_report + if omitted_count > 0: + lines.append(f"... omitted {omitted_count} additional removed symbols.") + return "\n".join(lines) + + +def parse_args(argv: list[str]) -> argparse.Namespace: + paddle_root = os.environ.get("PADDLE_ROOT", os.getcwd()) + parser = argparse.ArgumentParser( + description="Check Linux wheel ABI compatibility for Paddle symbols." + ) + parser.add_argument( + "--base-wheel", + default=os.path.join(paddle_root, "build/dev_whl/*.whl"), + help="Base branch wheel path or glob pattern.", + ) + parser.add_argument( + "--pr-wheel", + default=os.path.join(paddle_root, "build/pr_whl/*.whl"), + help="PR wheel path or glob pattern.", + ) + parser.add_argument( + "--max-report", + type=int, + default=200, + help="Maximum number of ABI issues to print.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(sys.argv[1:] if argv is None else argv) + try: + base_wheel = resolve_wheel_path(args.base_wheel, "base") + pr_wheel = resolve_wheel_path(args.pr_wheel, "PR") + issues = compare_wheel_abi(base_wheel, pr_wheel, WHEEL_LIBRARY_PATHS) + except RuntimeError as exc: + print(f"ABI compatibility check failed: {exc}", file=sys.stderr) + return 1 + + if issues: + print(format_issues(issues, args.max_report), file=sys.stderr) + return 1 + + print("ABI compatibility check passed.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/test_check_abi_compatibility.py b/tools/test_check_abi_compatibility.py new file mode 100644 index 00000000000000..5053fd65f17057 --- /dev/null +++ b/tools/test_check_abi_compatibility.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python + +# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +try: + from check_abi_compatibility import ( + DynamicSymbol, + MissingLibrary, + RemovedSymbol, + compare_library_symbols, + is_protected_paddle_abi_symbol, + parse_readelf_dynamic_symbols, + ) +except ModuleNotFoundError: + from tools.check_abi_compatibility import ( + DynamicSymbol, + MissingLibrary, + RemovedSymbol, + compare_library_symbols, + is_protected_paddle_abi_symbol, + parse_readelf_dynamic_symbols, + ) + + +def make_symbol(name, demangled_name=None, bind="GLOBAL", section="12"): + return DynamicSymbol( + name=name, + symbol_type="FUNC", + bind=bind, + section=section, + demangled_name=demangled_name or name, + ) + + +class TestParseReadelfDynamicSymbols(unittest.TestCase): + def test_ignores_weak_undefined_and_local_symbols(self): + readelf_output = """ +Symbol table '.dynsym' contains 5 entries: + Num: Value Size Type Bind Vis Ndx Name + 1: 0000000000001000 42 FUNC GLOBAL DEFAULT 12 _ZN3c1017get_default_dtypeEv + 2: 0000000000001010 42 FUNC WEAK DEFAULT 12 _ZN3c104weakEv + 3: 0000000000000000 0 FUNC GLOBAL DEFAULT UND _ZN3c107missingEv + 4: 0000000000001020 42 FUNC LOCAL DEFAULT 12 _ZN3c105localEv + 5: 0000000000001030 8 OBJECT GLOBAL DEFAULT 13 _ZN3phi3barE +""" + symbols = parse_readelf_dynamic_symbols(readelf_output) + self.assertEqual( + [symbol.name for symbol in symbols], + ["_ZN3c1017get_default_dtypeEv", "_ZN3phi3barE"], + ) + + +class TestProtectedSymbols(unittest.TestCase): + def test_detects_protected_cxx_namespaces(self): + self.assertTrue( + is_protected_paddle_abi_symbol( + make_symbol( + "_ZN3c1017get_default_dtypeEv", + "c10::get_default_dtype()", + ) + ) + ) + self.assertTrue( + is_protected_paddle_abi_symbol( + make_symbol("_ZN3phi3barEv", "phi::bar()") + ) + ) + self.assertTrue( + is_protected_paddle_abi_symbol( + make_symbol("_ZN5torch4cuda11synchronizeEv") + ) + ) + + def test_detects_relevant_c_and_python_entrypoints(self): + self.assertTrue( + is_protected_paddle_abi_symbol(make_symbol("PyInit_libpaddle")) + ) + self.assertTrue( + is_protected_paddle_abi_symbol(make_symbol("PD_ConfigCreate")) + ) + + def test_ignores_third_party_symbols(self): + self.assertFalse( + is_protected_paddle_abi_symbol(make_symbol("XXH32", "XXH32")) + ) + self.assertFalse( + is_protected_paddle_abi_symbol( + make_symbol("_ZN4YAML7EmitterC1Ev", "YAML::Emitter::Emitter()") + ) + ) + + +class TestCompareLibrarySymbols(unittest.TestCase): + def test_added_symbols_do_not_fail(self): + base_symbols = [ + make_symbol( + "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" + ) + ] + pr_symbols = [ + *base_symbols, + make_symbol( + "_ZN3c1017set_default_dtypeEv", "c10::set_default_dtype()" + ), + ] + + issues = compare_library_symbols( + "paddle/libs/libphi_core.so", base_symbols, pr_symbols + ) + + self.assertEqual(issues, []) + + def test_removed_protected_symbol_fails(self): + base_symbols = [ + make_symbol( + "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" + ) + ] + + issues = compare_library_symbols( + "paddle/libs/libphi_core.so", base_symbols, [] + ) + + self.assertEqual( + issues, + [ + RemovedSymbol( + library="paddle/libs/libphi_core.so", + name="_ZN3c1017get_default_dtypeEv", + demangled_name="c10::get_default_dtype()", + ) + ], + ) + + def test_removed_third_party_symbol_does_not_fail(self): + base_symbols = [make_symbol("XXH32", "XXH32")] + + issues = compare_library_symbols( + "paddle/base/libpaddle.so", base_symbols, [] + ) + + self.assertEqual(issues, []) + + def test_missing_pr_library_fails_when_base_has_library(self): + base_symbols = [ + make_symbol( + "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" + ) + ] + + issues = compare_library_symbols( + "paddle/libs/libphi_core.so", base_symbols, None + ) + + self.assertEqual( + issues, [MissingLibrary(library="paddle/libs/libphi_core.so")] + ) + + def test_missing_base_library_does_not_fail(self): + pr_symbols = [ + make_symbol( + "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" + ) + ] + + issues = compare_library_symbols( + "paddle/libs/libphi_core.so", None, pr_symbols + ) + + self.assertEqual(issues, []) + + +if __name__ == "__main__": + unittest.main() From 391d5f2c7f7e7fa07b88a425dc1b575a2fcda34d Mon Sep 17 00:00:00 2001 From: youge325 Date: Thu, 30 Apr 2026 17:58:50 +0800 Subject: [PATCH 11/13] Restrict ABI check to compat symbols --- tools/check_abi_compatibility.py | 27 ++++++---------- tools/test_check_abi_compatibility.py | 45 ++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/tools/check_abi_compatibility.py b/tools/check_abi_compatibility.py index d7bc3497595590..49712204976a11 100644 --- a/tools/check_abi_compatibility.py +++ b/tools/check_abi_compatibility.py @@ -45,27 +45,22 @@ DEFINED_DYNAMIC_SYMBOL_TYPES = {"FUNC", "OBJECT"} -PROTECTED_CXX_PREFIXES = ( - "phi::", - "paddle::", +PROTECTED_COMPAT_CXX_PREFIXES = ( "c10::", "at::", "torch::", + "caffe2::", ) -PROTECTED_C_SYMBOL_PREFIXES = ( - "PD_", - "Paddle", - "PyInit_", - "paddle_", -) - -PROTECTED_MANGLED_CXX_PREFIXES = ( +PROTECTED_COMPAT_MANGLED_CXX_PREFIXES = ( "_ZN2at", + "_ZNK2at", "_ZN3c10", - "_ZN3phi", + "_ZNK3c10", "_ZN5torch", - "_ZN6paddle", + "_ZNK5torch", + "_ZN6caffe2", + "_ZNK6caffe2", ) @@ -176,13 +171,11 @@ def attach_demangled_names( def is_protected_paddle_abi_symbol(symbol: DynamicSymbol) -> bool: demangled = symbol.demangled_name - if demangled.startswith(PROTECTED_CXX_PREFIXES): + if demangled.startswith(PROTECTED_COMPAT_CXX_PREFIXES): return True raw_name = strip_elf_symbol_version(symbol.name) - return raw_name.startswith( - PROTECTED_C_SYMBOL_PREFIXES + PROTECTED_MANGLED_CXX_PREFIXES - ) + return raw_name.startswith(PROTECTED_COMPAT_MANGLED_CXX_PREFIXES) def protected_symbols_by_name( diff --git a/tools/test_check_abi_compatibility.py b/tools/test_check_abi_compatibility.py index 5053fd65f17057..dd78a7686fdb72 100644 --- a/tools/test_check_abi_compatibility.py +++ b/tools/test_check_abi_compatibility.py @@ -65,7 +65,7 @@ def test_ignores_weak_undefined_and_local_symbols(self): class TestProtectedSymbols(unittest.TestCase): - def test_detects_protected_cxx_namespaces(self): + def test_detects_protected_compat_cxx_namespaces(self): self.assertTrue( is_protected_paddle_abi_symbol( make_symbol( @@ -76,7 +76,7 @@ def test_detects_protected_cxx_namespaces(self): ) self.assertTrue( is_protected_paddle_abi_symbol( - make_symbol("_ZN3phi3barEv", "phi::bar()") + make_symbol("_ZN2at6Tensor3dimEv", "at::Tensor::dim()") ) ) self.assertTrue( @@ -84,12 +84,33 @@ def test_detects_protected_cxx_namespaces(self): make_symbol("_ZN5torch4cuda11synchronizeEv") ) ) - - def test_detects_relevant_c_and_python_entrypoints(self): self.assertTrue( + is_protected_paddle_abi_symbol( + make_symbol( + "_ZN6caffe28TypeMeta12toScalarTypeEv", + "caffe2::TypeMeta::toScalarType()", + ) + ) + ) + + def test_ignores_non_compat_paddle_entrypoints(self): + self.assertFalse( + is_protected_paddle_abi_symbol( + make_symbol( + "_ZN3phi12is_cpu_placeERKNS_5PlaceE", + "phi::is_cpu_place(phi::Place const&)", + ) + ) + ) + self.assertFalse( + is_protected_paddle_abi_symbol( + make_symbol("_ZN6paddle3fooEv", "paddle::foo()") + ) + ) + self.assertFalse( is_protected_paddle_abi_symbol(make_symbol("PyInit_libpaddle")) ) - self.assertTrue( + self.assertFalse( is_protected_paddle_abi_symbol(make_symbol("PD_ConfigCreate")) ) @@ -155,6 +176,20 @@ def test_removed_third_party_symbol_does_not_fail(self): self.assertEqual(issues, []) + def test_removed_non_compat_phi_symbol_does_not_fail(self): + base_symbols = [ + make_symbol( + "_ZN3phi12is_cpu_placeERKNS_5PlaceE", + "phi::is_cpu_place(phi::Place const&)", + ) + ] + + issues = compare_library_symbols( + "paddle/libs/libphi_core.so", base_symbols, [] + ) + + self.assertEqual(issues, []) + def test_missing_pr_library_fails_when_base_has_library(self): base_symbols = [ make_symbol( From c11df37a3dfc1a8c770082ab50d7da51900bfb5b Mon Sep 17 00:00:00 2001 From: youge325 Date: Fri, 1 May 2026 22:56:27 +0800 Subject: [PATCH 12/13] Revert "Restrict ABI check to compat symbols" This reverts commit 391d5f2c7f7e7fa07b88a425dc1b575a2fcda34d. --- tools/check_abi_compatibility.py | 27 ++++++++++------ tools/test_check_abi_compatibility.py | 45 +++------------------------ 2 files changed, 22 insertions(+), 50 deletions(-) diff --git a/tools/check_abi_compatibility.py b/tools/check_abi_compatibility.py index 49712204976a11..d7bc3497595590 100644 --- a/tools/check_abi_compatibility.py +++ b/tools/check_abi_compatibility.py @@ -45,22 +45,27 @@ DEFINED_DYNAMIC_SYMBOL_TYPES = {"FUNC", "OBJECT"} -PROTECTED_COMPAT_CXX_PREFIXES = ( +PROTECTED_CXX_PREFIXES = ( + "phi::", + "paddle::", "c10::", "at::", "torch::", - "caffe2::", ) -PROTECTED_COMPAT_MANGLED_CXX_PREFIXES = ( +PROTECTED_C_SYMBOL_PREFIXES = ( + "PD_", + "Paddle", + "PyInit_", + "paddle_", +) + +PROTECTED_MANGLED_CXX_PREFIXES = ( "_ZN2at", - "_ZNK2at", "_ZN3c10", - "_ZNK3c10", + "_ZN3phi", "_ZN5torch", - "_ZNK5torch", - "_ZN6caffe2", - "_ZNK6caffe2", + "_ZN6paddle", ) @@ -171,11 +176,13 @@ def attach_demangled_names( def is_protected_paddle_abi_symbol(symbol: DynamicSymbol) -> bool: demangled = symbol.demangled_name - if demangled.startswith(PROTECTED_COMPAT_CXX_PREFIXES): + if demangled.startswith(PROTECTED_CXX_PREFIXES): return True raw_name = strip_elf_symbol_version(symbol.name) - return raw_name.startswith(PROTECTED_COMPAT_MANGLED_CXX_PREFIXES) + return raw_name.startswith( + PROTECTED_C_SYMBOL_PREFIXES + PROTECTED_MANGLED_CXX_PREFIXES + ) def protected_symbols_by_name( diff --git a/tools/test_check_abi_compatibility.py b/tools/test_check_abi_compatibility.py index dd78a7686fdb72..5053fd65f17057 100644 --- a/tools/test_check_abi_compatibility.py +++ b/tools/test_check_abi_compatibility.py @@ -65,7 +65,7 @@ def test_ignores_weak_undefined_and_local_symbols(self): class TestProtectedSymbols(unittest.TestCase): - def test_detects_protected_compat_cxx_namespaces(self): + def test_detects_protected_cxx_namespaces(self): self.assertTrue( is_protected_paddle_abi_symbol( make_symbol( @@ -76,7 +76,7 @@ def test_detects_protected_compat_cxx_namespaces(self): ) self.assertTrue( is_protected_paddle_abi_symbol( - make_symbol("_ZN2at6Tensor3dimEv", "at::Tensor::dim()") + make_symbol("_ZN3phi3barEv", "phi::bar()") ) ) self.assertTrue( @@ -84,33 +84,12 @@ def test_detects_protected_compat_cxx_namespaces(self): make_symbol("_ZN5torch4cuda11synchronizeEv") ) ) - self.assertTrue( - is_protected_paddle_abi_symbol( - make_symbol( - "_ZN6caffe28TypeMeta12toScalarTypeEv", - "caffe2::TypeMeta::toScalarType()", - ) - ) - ) - def test_ignores_non_compat_paddle_entrypoints(self): - self.assertFalse( - is_protected_paddle_abi_symbol( - make_symbol( - "_ZN3phi12is_cpu_placeERKNS_5PlaceE", - "phi::is_cpu_place(phi::Place const&)", - ) - ) - ) - self.assertFalse( - is_protected_paddle_abi_symbol( - make_symbol("_ZN6paddle3fooEv", "paddle::foo()") - ) - ) - self.assertFalse( + def test_detects_relevant_c_and_python_entrypoints(self): + self.assertTrue( is_protected_paddle_abi_symbol(make_symbol("PyInit_libpaddle")) ) - self.assertFalse( + self.assertTrue( is_protected_paddle_abi_symbol(make_symbol("PD_ConfigCreate")) ) @@ -176,20 +155,6 @@ def test_removed_third_party_symbol_does_not_fail(self): self.assertEqual(issues, []) - def test_removed_non_compat_phi_symbol_does_not_fail(self): - base_symbols = [ - make_symbol( - "_ZN3phi12is_cpu_placeERKNS_5PlaceE", - "phi::is_cpu_place(phi::Place const&)", - ) - ] - - issues = compare_library_symbols( - "paddle/libs/libphi_core.so", base_symbols, [] - ) - - self.assertEqual(issues, []) - def test_missing_pr_library_fails_when_base_has_library(self): base_symbols = [ make_symbol( From 6d428d9fcf8688385cbceea3f421bc4bbf9845b2 Mon Sep 17 00:00:00 2001 From: youge325 Date: Fri, 1 May 2026 22:56:37 +0800 Subject: [PATCH 13/13] Revert "Add ABI symbol compatibility check" This reverts commit d0bcc46f49ce5979498a951b30de691dbeb037b7. --- ci/static_check.sh | 17 -- tools/check_abi_compatibility.py | 374 -------------------------- tools/test_check_abi_compatibility.py | 188 ------------- 3 files changed, 579 deletions(-) delete mode 100644 tools/check_abi_compatibility.py delete mode 100644 tools/test_check_abi_compatibility.py diff --git a/ci/static_check.sh b/ci/static_check.sh index 98cb60220995c5..9682a6ae48da47 100644 --- a/ci/static_check.sh +++ b/ci/static_check.sh @@ -149,21 +149,6 @@ function exec_samplecode_checking() { fi } -function exec_abi_compatibility_check() { - if [ "$(uname -s)" != "Linux" ]; then - echo "Skip ABI compatibility check on non-Linux platform." - return - fi - - python ${PADDLE_ROOT}/tools/check_abi_compatibility.py \ - --base-wheel "${PADDLE_ROOT}/build/dev_whl/*.whl" \ - --pr-wheel "${PADDLE_ROOT}/build/pr_whl/*.whl" - abi_check_error=$? - if [ "$abi_check_error" != "0" ]; then - exit $abi_check_error - fi -} - export PATH=/usr/local/python3.10.0/bin:/usr/local/python3.10.0/include:/usr/local/bin:${PATH} echo "export PATH=${PATH}" >> ~/.bashrc export LD_LIBRARY_PATH=/usr/local/cuda-11.8/compat:$LD_LIBRARY_PATH @@ -173,8 +158,6 @@ ln -sf $(which python${PY_VERSION}) /usr/bin/python ln -sf $(which pip${PY_VERSION}) /usr/local/bin/pip mkdir -p /home/data/cfs/.ccache/static-check -exec_abi_compatibility_check - pip config set global.cache-dir "/home/data/cfs/.cache/pip" pip install --upgrade pip 1>nul pip install -r "${work_dir}/python/requirements.txt" 1>nul diff --git a/tools/check_abi_compatibility.py b/tools/check_abi_compatibility.py deleted file mode 100644 index d7bc3497595590..00000000000000 --- a/tools/check_abi_compatibility.py +++ /dev/null @@ -1,374 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Check Linux wheel ABI compatibility by comparing protected ELF symbols. - -The check is intentionally one-way: symbols added by a PR are allowed, while -protected symbols present in the base wheel must still exist in the PR wheel. -""" - -from __future__ import annotations - -import argparse -import glob -import os -import shutil -import subprocess -import sys -import tempfile -import zipfile -from dataclasses import dataclass -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Iterable - -WHEEL_LIBRARY_PATHS = ( - "paddle/base/libpaddle.so", - "paddle/libs/libphi.so", - "paddle/libs/libphi_core.so", - "paddle/libs/libphi_gpu.so", -) - -DEFINED_DYNAMIC_SYMBOL_TYPES = {"FUNC", "OBJECT"} - -PROTECTED_CXX_PREFIXES = ( - "phi::", - "paddle::", - "c10::", - "at::", - "torch::", -) - -PROTECTED_C_SYMBOL_PREFIXES = ( - "PD_", - "Paddle", - "PyInit_", - "paddle_", -) - -PROTECTED_MANGLED_CXX_PREFIXES = ( - "_ZN2at", - "_ZN3c10", - "_ZN3phi", - "_ZN5torch", - "_ZN6paddle", -) - - -@dataclass(frozen=True) -class DynamicSymbol: - name: str - symbol_type: str - bind: str - section: str - demangled_name: str - - -@dataclass(frozen=True) -class RemovedSymbol: - library: str - name: str - demangled_name: str - - -@dataclass(frozen=True) -class MissingLibrary: - library: str - - -def strip_elf_symbol_version(symbol_name: str) -> str: - if "@@" in symbol_name: - return symbol_name.split("@@", 1)[0] - if "@" in symbol_name: - return symbol_name.split("@", 1)[0] - return symbol_name - - -def parse_readelf_dynamic_symbols(readelf_output: str) -> list[DynamicSymbol]: - symbols = [] - for line in readelf_output.splitlines(): - fields = line.split() - if len(fields) < 8 or not fields[0].endswith(":"): - continue - symbol_type = fields[3] - bind = fields[4] - section = fields[6] - name = fields[7] - if ( - bind != "GLOBAL" - or section == "UND" - or symbol_type not in DEFINED_DYNAMIC_SYMBOL_TYPES - ): - continue - symbols.append( - DynamicSymbol( - name=name, - symbol_type=symbol_type, - bind=bind, - section=section, - demangled_name=strip_elf_symbol_version(name), - ) - ) - return symbols - - -def demangle_symbol_names(symbol_names: Iterable[str]) -> dict[str, str]: - unique_names = sorted( - {strip_elf_symbol_version(name) for name in symbol_names} - ) - if not unique_names: - return {} - cxxfilt = shutil.which("c++filt") - if cxxfilt is None: - return {name: name for name in unique_names} - - try: - result = subprocess.run( - [cxxfilt], - input="\n".join(unique_names), - text=True, - capture_output=True, - check=True, - ) - except (OSError, subprocess.CalledProcessError): - return {name: name for name in unique_names} - - demangled = result.stdout.splitlines() - if len(demangled) != len(unique_names): - return {name: name for name in unique_names} - return dict(zip(unique_names, demangled)) - - -def attach_demangled_names( - symbols: Iterable[DynamicSymbol], -) -> list[DynamicSymbol]: - symbol_list = list(symbols) - demangled_names = demangle_symbol_names( - symbol.name for symbol in symbol_list - ) - return [ - DynamicSymbol( - name=symbol.name, - symbol_type=symbol.symbol_type, - bind=symbol.bind, - section=symbol.section, - demangled_name=demangled_names.get( - strip_elf_symbol_version(symbol.name), symbol.demangled_name - ), - ) - for symbol in symbol_list - ] - - -def is_protected_paddle_abi_symbol(symbol: DynamicSymbol) -> bool: - demangled = symbol.demangled_name - if demangled.startswith(PROTECTED_CXX_PREFIXES): - return True - - raw_name = strip_elf_symbol_version(symbol.name) - return raw_name.startswith( - PROTECTED_C_SYMBOL_PREFIXES + PROTECTED_MANGLED_CXX_PREFIXES - ) - - -def protected_symbols_by_name( - symbols: Iterable[DynamicSymbol], -) -> dict[str, DynamicSymbol]: - return { - symbol.name: symbol - for symbol in symbols - if is_protected_paddle_abi_symbol(symbol) - } - - -def read_dynamic_symbols(library_path: str) -> list[DynamicSymbol]: - try: - result = subprocess.run( - ["readelf", "--dyn-syms", "-W", library_path], - text=True, - capture_output=True, - check=True, - ) - except FileNotFoundError as exc: - raise RuntimeError("readelf is required to check ABI symbols") from exc - except subprocess.CalledProcessError as exc: - raise RuntimeError( - f"Failed to read dynamic symbols from {library_path}:\n{exc.stderr}" - ) from exc - - return attach_demangled_names(parse_readelf_dynamic_symbols(result.stdout)) - - -def extract_wheel_libraries( - wheel_path: str, library_paths: Iterable[str], output_dir: str -) -> dict[str, str]: - extracted_libraries = {} - with zipfile.ZipFile(wheel_path) as wheel: - wheel_entries = set(wheel.namelist()) - for library_path in library_paths: - if library_path not in wheel_entries: - continue - extracted_path = wheel.extract(library_path, output_dir) - extracted_libraries[library_path] = extracted_path - return extracted_libraries - - -def compare_library_symbols( - library: str, - base_symbols: Iterable[DynamicSymbol] | None, - pr_symbols: Iterable[DynamicSymbol] | None, -) -> list[RemovedSymbol | MissingLibrary]: - if base_symbols is None: - return [] - if pr_symbols is None: - return [MissingLibrary(library=library)] - - base_protected_symbols = protected_symbols_by_name(base_symbols) - pr_protected_symbols = protected_symbols_by_name(pr_symbols) - removed_names = sorted( - set(base_protected_symbols) - set(pr_protected_symbols) - ) - return [ - RemovedSymbol( - library=library, - name=name, - demangled_name=base_protected_symbols[name].demangled_name, - ) - for name in removed_names - ] - - -def resolve_wheel_path(pattern: str, label: str) -> str: - matches = sorted(glob.glob(pattern)) - if len(matches) != 1: - raise RuntimeError( - f"Expected exactly one {label} wheel matching {pattern}, " - f"but found {len(matches)}: {matches}" - ) - return matches[0] - - -def compare_wheel_abi( - base_wheel: str, pr_wheel: str, library_paths: Iterable[str] -) -> list[RemovedSymbol | MissingLibrary]: - with tempfile.TemporaryDirectory(prefix="paddle_abi_check_") as temp_dir: - base_dir = os.path.join(temp_dir, "base") - pr_dir = os.path.join(temp_dir, "pr") - base_libraries = extract_wheel_libraries( - base_wheel, library_paths, base_dir - ) - pr_libraries = extract_wheel_libraries(pr_wheel, library_paths, pr_dir) - - issues: list[RemovedSymbol | MissingLibrary] = [] - for library in library_paths: - base_path = base_libraries.get(library) - pr_path = pr_libraries.get(library) - base_symbols = ( - read_dynamic_symbols(base_path) - if base_path is not None - else None - ) - pr_symbols = ( - read_dynamic_symbols(pr_path) if pr_path is not None else None - ) - issues.extend( - compare_library_symbols(library, base_symbols, pr_symbols) - ) - return issues - - -def format_issues( - issues: Iterable[RemovedSymbol | MissingLibrary], max_report: int -) -> str: - issue_list = list(issues) - lines = [ - "ABI compatibility check failed.", - "The PR wheel removed protected dynamic symbols that exist in the base " - "wheel. Removing these symbols can break downstream wheels or shared " - "libraries compiled against the base branch.", - "", - ] - for issue in issue_list[:max_report]: - if isinstance(issue, MissingLibrary): - lines.extend( - [ - f"Library: {issue.library}", - " PR wheel is missing this library, but the base wheel " - "contains it.", - "", - ] - ) - else: - lines.extend( - [ - f"Library: {issue.library}", - f" Raw symbol: {issue.name}", - f" Demangled: {issue.demangled_name}", - "", - ] - ) - - omitted_count = len(issue_list) - max_report - if omitted_count > 0: - lines.append(f"... omitted {omitted_count} additional removed symbols.") - return "\n".join(lines) - - -def parse_args(argv: list[str]) -> argparse.Namespace: - paddle_root = os.environ.get("PADDLE_ROOT", os.getcwd()) - parser = argparse.ArgumentParser( - description="Check Linux wheel ABI compatibility for Paddle symbols." - ) - parser.add_argument( - "--base-wheel", - default=os.path.join(paddle_root, "build/dev_whl/*.whl"), - help="Base branch wheel path or glob pattern.", - ) - parser.add_argument( - "--pr-wheel", - default=os.path.join(paddle_root, "build/pr_whl/*.whl"), - help="PR wheel path or glob pattern.", - ) - parser.add_argument( - "--max-report", - type=int, - default=200, - help="Maximum number of ABI issues to print.", - ) - return parser.parse_args(argv) - - -def main(argv: list[str] | None = None) -> int: - args = parse_args(sys.argv[1:] if argv is None else argv) - try: - base_wheel = resolve_wheel_path(args.base_wheel, "base") - pr_wheel = resolve_wheel_path(args.pr_wheel, "PR") - issues = compare_wheel_abi(base_wheel, pr_wheel, WHEEL_LIBRARY_PATHS) - except RuntimeError as exc: - print(f"ABI compatibility check failed: {exc}", file=sys.stderr) - return 1 - - if issues: - print(format_issues(issues, args.max_report), file=sys.stderr) - return 1 - - print("ABI compatibility check passed.") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tools/test_check_abi_compatibility.py b/tools/test_check_abi_compatibility.py deleted file mode 100644 index 5053fd65f17057..00000000000000 --- a/tools/test_check_abi_compatibility.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -try: - from check_abi_compatibility import ( - DynamicSymbol, - MissingLibrary, - RemovedSymbol, - compare_library_symbols, - is_protected_paddle_abi_symbol, - parse_readelf_dynamic_symbols, - ) -except ModuleNotFoundError: - from tools.check_abi_compatibility import ( - DynamicSymbol, - MissingLibrary, - RemovedSymbol, - compare_library_symbols, - is_protected_paddle_abi_symbol, - parse_readelf_dynamic_symbols, - ) - - -def make_symbol(name, demangled_name=None, bind="GLOBAL", section="12"): - return DynamicSymbol( - name=name, - symbol_type="FUNC", - bind=bind, - section=section, - demangled_name=demangled_name or name, - ) - - -class TestParseReadelfDynamicSymbols(unittest.TestCase): - def test_ignores_weak_undefined_and_local_symbols(self): - readelf_output = """ -Symbol table '.dynsym' contains 5 entries: - Num: Value Size Type Bind Vis Ndx Name - 1: 0000000000001000 42 FUNC GLOBAL DEFAULT 12 _ZN3c1017get_default_dtypeEv - 2: 0000000000001010 42 FUNC WEAK DEFAULT 12 _ZN3c104weakEv - 3: 0000000000000000 0 FUNC GLOBAL DEFAULT UND _ZN3c107missingEv - 4: 0000000000001020 42 FUNC LOCAL DEFAULT 12 _ZN3c105localEv - 5: 0000000000001030 8 OBJECT GLOBAL DEFAULT 13 _ZN3phi3barE -""" - symbols = parse_readelf_dynamic_symbols(readelf_output) - self.assertEqual( - [symbol.name for symbol in symbols], - ["_ZN3c1017get_default_dtypeEv", "_ZN3phi3barE"], - ) - - -class TestProtectedSymbols(unittest.TestCase): - def test_detects_protected_cxx_namespaces(self): - self.assertTrue( - is_protected_paddle_abi_symbol( - make_symbol( - "_ZN3c1017get_default_dtypeEv", - "c10::get_default_dtype()", - ) - ) - ) - self.assertTrue( - is_protected_paddle_abi_symbol( - make_symbol("_ZN3phi3barEv", "phi::bar()") - ) - ) - self.assertTrue( - is_protected_paddle_abi_symbol( - make_symbol("_ZN5torch4cuda11synchronizeEv") - ) - ) - - def test_detects_relevant_c_and_python_entrypoints(self): - self.assertTrue( - is_protected_paddle_abi_symbol(make_symbol("PyInit_libpaddle")) - ) - self.assertTrue( - is_protected_paddle_abi_symbol(make_symbol("PD_ConfigCreate")) - ) - - def test_ignores_third_party_symbols(self): - self.assertFalse( - is_protected_paddle_abi_symbol(make_symbol("XXH32", "XXH32")) - ) - self.assertFalse( - is_protected_paddle_abi_symbol( - make_symbol("_ZN4YAML7EmitterC1Ev", "YAML::Emitter::Emitter()") - ) - ) - - -class TestCompareLibrarySymbols(unittest.TestCase): - def test_added_symbols_do_not_fail(self): - base_symbols = [ - make_symbol( - "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" - ) - ] - pr_symbols = [ - *base_symbols, - make_symbol( - "_ZN3c1017set_default_dtypeEv", "c10::set_default_dtype()" - ), - ] - - issues = compare_library_symbols( - "paddle/libs/libphi_core.so", base_symbols, pr_symbols - ) - - self.assertEqual(issues, []) - - def test_removed_protected_symbol_fails(self): - base_symbols = [ - make_symbol( - "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" - ) - ] - - issues = compare_library_symbols( - "paddle/libs/libphi_core.so", base_symbols, [] - ) - - self.assertEqual( - issues, - [ - RemovedSymbol( - library="paddle/libs/libphi_core.so", - name="_ZN3c1017get_default_dtypeEv", - demangled_name="c10::get_default_dtype()", - ) - ], - ) - - def test_removed_third_party_symbol_does_not_fail(self): - base_symbols = [make_symbol("XXH32", "XXH32")] - - issues = compare_library_symbols( - "paddle/base/libpaddle.so", base_symbols, [] - ) - - self.assertEqual(issues, []) - - def test_missing_pr_library_fails_when_base_has_library(self): - base_symbols = [ - make_symbol( - "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" - ) - ] - - issues = compare_library_symbols( - "paddle/libs/libphi_core.so", base_symbols, None - ) - - self.assertEqual( - issues, [MissingLibrary(library="paddle/libs/libphi_core.so")] - ) - - def test_missing_base_library_does_not_fail(self): - pr_symbols = [ - make_symbol( - "_ZN3c1017get_default_dtypeEv", "c10::get_default_dtype()" - ) - ] - - issues = compare_library_symbols( - "paddle/libs/libphi_core.so", None, pr_symbols - ) - - self.assertEqual(issues, []) - - -if __name__ == "__main__": - unittest.main()