From 7b92719180fc88f4dd633607d521db579ff0f4ad Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Mon, 11 May 2026 19:54:30 -0400 Subject: [PATCH 01/97] adds wheels --- projects/hipdnn/CMakeLists.txt | 7 ++ projects/hipdnn/python/CMakeLists.txt | 57 ++++++++- projects/hipdnn/python/pyproject.toml | 8 +- .../tools/dnn-benchmarking/CMakeLists.txt | 23 ++++ .../tools/dnn-benchmarking/pyproject.toml | 11 +- .../src/dnn_benchmarking/graphs/__init__.py | 9 ++ .../dnn_benchmarking/graphs/sample_add.json | 48 ++++++++ .../graphs/sample_batchnorm.json | 73 ++++++++++++ .../graphs/sample_conv_fwd.json | 53 +++++++++ .../graphs/sample_matmul.json | 46 ++++++++ .../dnn_benchmarking/graphs/sample_relu.json | 43 +++++++ .../tools/dnn-benchmarking/tests/conftest.py | 9 +- .../tests/integration/test_execution.py | 37 +++--- test/therock/test_dnn_benchmarking.py | 108 ++++++++++++++++++ 14 files changed, 503 insertions(+), 29 deletions(-) create mode 100644 projects/hipdnn/tools/dnn-benchmarking/CMakeLists.txt create mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/__init__.py create mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_add.json create mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_batchnorm.json create mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_conv_fwd.json create mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_matmul.json create mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_relu.json create mode 100644 test/therock/test_dnn_benchmarking.py diff --git a/projects/hipdnn/CMakeLists.txt b/projects/hipdnn/CMakeLists.txt index aca3eca02c3..63f7202e1a3 100644 --- a/projects/hipdnn/CMakeLists.txt +++ b/projects/hipdnn/CMakeLists.txt @@ -153,6 +153,8 @@ if(DEFINED HIP_DNN_SKIP_TESTS) endif() option(HIPDNN_SKIP_TESTS "Skips building all tests" OFF) +option(HIPDNN_BUILD_PYTHON_BINDINGS "Build Python bindings (requires Python and nanobind)" OFF) + option(HIPDNN_ENABLE_COVERAGE "Build with code coverage flags" OFF) option(BUILD_ADDRESS_SANITIZER "Build with Address Sanitizer enabled" OFF) option(BUILD_THREAD_SANITIZER "Build with Thread Sanitizer enabled" OFF) @@ -245,6 +247,11 @@ add_subdirectory(plugin_sdk) add_subdirectory(tests) add_subdirectory(tools) +if(HIPDNN_BUILD_PYTHON_BINDINGS) + add_subdirectory(python) + add_subdirectory(tools/dnn-benchmarking) +endif() + if(NOT HIPDNN_SKIP_TESTS) # Keep this after all build folders have been added so they have a chance to register their tests # using add_*_test_target() diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index a0acea6a3d9..257ddc2c5e4 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -25,8 +25,14 @@ set(HIPDNN_FRONTEND_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../frontend/include # Find HIP (if needed) find_package(hip REQUIRED) -find_package(hipdnn_frontend CONFIG REQUIRED) -find_package(hipdnn_backend REQUIRED) + +# Only find_package for hipDNN targets when built standalone +if(NOT TARGET hipdnn_frontend) + find_package(hipdnn_frontend CONFIG REQUIRED) +endif() +if(NOT TARGET hipdnn_backend) + find_package(hipdnn_backend REQUIRED) +endif() # Create the Python module nanobind_add_module( @@ -51,5 +57,48 @@ set_target_properties(hipdnn_frontend_python PROPERTIES INSTALL_RPATH_USE_LINK_P # actual library structure target_link_libraries(hipdnn_frontend_python PRIVATE hipdnn_frontend hipdnn_backend hip::host) -# Install the module - scikit-build-core handles the destination -install(TARGETS hipdnn_frontend_python LIBRARY DESTINATION .) +# ============================================================================ +# Installation +# ============================================================================ + +# Use a relative path so the install respects CMAKE_INSTALL_PREFIX. +set(HIPDNN_PYTHON_SITE_DIR + "lib/python${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}/dist-packages" + CACHE PATH "Python module install directory (relative to CMAKE_INSTALL_PREFIX)") + +# scikit-build-core (pip wheel / pip install) install rules. +# SKBUILD is set by scikit-build-core when building a wheel. +if(DEFINED SKBUILD) + install(TARGETS hipdnn_frontend_python DESTINATION hipdnn_frontend) + install(FILES hipdnn_frontend/__init__.py DESTINATION hipdnn_frontend) +else() + # Regular cmake install — place in dist-packages under CMAKE_INSTALL_PREFIX. + install(TARGETS hipdnn_frontend_python + LIBRARY DESTINATION ${HIPDNN_PYTHON_SITE_DIR}/hipdnn_frontend + ) + install(FILES hipdnn_frontend/__init__.py + DESTINATION ${HIPDNN_PYTHON_SITE_DIR}/hipdnn_frontend + ) +endif() + +# Build wheels and stage them in the install prefix. +# Skipped when scikit-build-core drives cmake (SKBUILD) to avoid recursion. +if(NOT DEFINED SKBUILD) + install(CODE " + set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") + file(MAKE_DIRECTORY \"\${_wheel_dir}\") + + message(STATUS \"Building hipdnn-frontend wheel\") + execute_process( + COMMAND \"${Python_EXECUTABLE}\" -m pip wheel --no-deps + -w \"\${_wheel_dir}\" + \"${CMAKE_CURRENT_SOURCE_DIR}\" + -C \"cmake.define.CMAKE_PREFIX_PATH=\${CMAKE_INSTALL_PREFIX}\" + RESULT_VARIABLE _result + ) + if(_result) + message(FATAL_ERROR \"Failed to build hipdnn-frontend wheel\") + endif() + + ") +endif() diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index 04b7a351e2a..e3fd4537dd7 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -51,10 +51,9 @@ build-dir = "build" cmake.minimum-version = "3.18" # Build type cmake.build-type = "Release" -# Where the Python module will be installed -wheel.install-dir = "hipdnn_frontend" -# Package data -wheel.packages = ["hipdnn_frontend"] +# No pure-Python packages to scan; the wheel contents come entirely from +# cmake install() rules (hipdnn_frontend_python.so + __init__.py → hipdnn_frontend/). +wheel.packages = [] [tool.scikit-build.cmake.define] # Pass any additional CMake definitions if needed @@ -62,7 +61,6 @@ CMAKE_PREFIX_PATH = "${CMAKE_PREFIX_PATH}" CMAKE_C_COMPILER = "/opt/rocm/llvm/bin/clang" CMAKE_CXX_COMPILER = "/opt/rocm/llvm/bin/clang++" - hip_DIR = "${hip_DIR}" hipdnn_frontend_DIR = "${hipdnn_frontend_DIR}" hipdnn_backend_DIR = "${hipdnn_backend_DIR}" diff --git a/projects/hipdnn/tools/dnn-benchmarking/CMakeLists.txt b/projects/hipdnn/tools/dnn-benchmarking/CMakeLists.txt new file mode 100644 index 00000000000..71125c9e181 --- /dev/null +++ b/projects/hipdnn/tools/dnn-benchmarking/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +cmake_minimum_required(VERSION 3.18) + +find_package(Python COMPONENTS Interpreter REQUIRED) + +# Build dnn-benchmarking wheel and install to staging area +install(CODE " + set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") + file(MAKE_DIRECTORY \"\${_wheel_dir}\") + + message(STATUS \"Building dnn-benchmarking wheel\") + execute_process( + COMMAND \"${Python_EXECUTABLE}\" -m pip wheel --no-deps + -w \"\${_wheel_dir}\" + \"${CMAKE_CURRENT_SOURCE_DIR}\" + RESULT_VARIABLE _result + ) + if(_result) + message(FATAL_ERROR \"Failed to build dnn-benchmarking wheel\") + endif() +") diff --git a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml index 9583de29449..b2982341b30 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml +++ b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml @@ -20,11 +20,13 @@ classifiers = [ "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", ] -# torch is intentionally not listed here — it must come from the ROCm or CUDA -# nightly index (see requirements-{rocm,cuda}.txt) which pip cannot express -# in pyproject.toml. Everything else resolves cleanly from PyPI. dependencies = [ "numpy>=1.19.0", + "hipdnn-frontend", +] + +[project.optional-dependencies] +test = [ "pytest>=7.0.0", "pytest-cov>=4.0.0", ] @@ -40,6 +42,9 @@ build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] where = ["src"] +[tool.setuptools.package-data] +"dnn_benchmarking.graphs" = ["*.json"] + [tool.pytest.ini_options] testpaths = ["tests"] markers = [ diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/__init__.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/__init__.py new file mode 100644 index 00000000000..cec7234b791 --- /dev/null +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/__init__.py @@ -0,0 +1,9 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +from importlib.resources import files + + +def sample_graphs_path() -> str: + """Return the path to the bundled sample graphs directory.""" + return str(files("dnn_benchmarking.graphs")) diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_add.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_add.json new file mode 100644 index 00000000000..289c2c2b127 --- /dev/null +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_add.json @@ -0,0 +1,48 @@ +{ + "name": "sample_pointwise_add_128x256x14x14", + "compute_data_type": "float", + "io_data_type": "float", + "intermediate_data_type": "float", + "tensors": [ + { + "uid": 1, + "name": "input_x", + "dims": [128, 256, 14, 14], + "strides": [50176, 196, 14, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 2, + "name": "input_y", + "dims": [128, 256, 14, 14], + "strides": [50176, 196, 14, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 3, + "name": "output_z", + "dims": [128, 256, 14, 14], + "strides": [50176, 196, 14, 1], + "data_type": "float", + "virtual": false + } + ], + "nodes": [ + { + "name": "add_node", + "type": "PointwiseAttributes", + "compute_data_type": "float", + "inputs": { + "operation": "add", + "in_0_tensor_uid": 1, + "in_1_tensor_uid": 2, + "in_2_tensor_uid": null + }, + "outputs": { + "out_0_tensor_uid": 3 + } + } + ] +} diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_batchnorm.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_batchnorm.json new file mode 100644 index 00000000000..4d35dc005f8 --- /dev/null +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_batchnorm.json @@ -0,0 +1,73 @@ +{ + "name": "sample_batchnorm_inference_32x64x28x28", + "compute_data_type": "float", + "io_data_type": "float", + "intermediate_data_type": "float", + "tensors": [ + { + "uid": 1, + "name": "input_x", + "dims": [32, 64, 28, 28], + "strides": [50176, 784, 28, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 2, + "name": "mean", + "dims": [1, 64, 1, 1], + "strides": [64, 1, 1, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 3, + "name": "inv_variance", + "dims": [1, 64, 1, 1], + "strides": [64, 1, 1, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 4, + "name": "scale", + "dims": [1, 64, 1, 1], + "strides": [64, 1, 1, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 5, + "name": "bias", + "dims": [1, 64, 1, 1], + "strides": [64, 1, 1, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 6, + "name": "output_y", + "dims": [32, 64, 28, 28], + "strides": [50176, 784, 28, 1], + "data_type": "float", + "virtual": false + } + ], + "nodes": [ + { + "name": "batchnorm_inference_node", + "type": "BatchnormInferenceAttributes", + "compute_data_type": "float", + "inputs": { + "x_tensor_uid": 1, + "mean_tensor_uid": 2, + "inv_variance_tensor_uid": 3, + "scale_tensor_uid": 4, + "bias_tensor_uid": 5 + }, + "outputs": { + "y_tensor_uid": 6 + } + } + ] +} diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_conv_fwd.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_conv_fwd.json new file mode 100644 index 00000000000..c9e476af8bc --- /dev/null +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_conv_fwd.json @@ -0,0 +1,53 @@ +{ + "name": "sample_conv_fwd_16x16x16x16_k16_3x3", + "compute_data_type": "float", + "io_data_type": "float", + "intermediate_data_type": "float", + "tensors": [ + { + "uid": 0, + "name": "output_y", + "dims": [16, 16, 16, 16], + "strides": [4096, 256, 16, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 1, + "name": "input_x", + "dims": [16, 16, 16, 16], + "strides": [4096, 256, 16, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 2, + "name": "weight", + "dims": [16, 16, 3, 3], + "strides": [144, 9, 3, 1], + "data_type": "float", + "virtual": false + } + ], + "nodes": [ + { + "name": "conv_fprop_node", + "type": "ConvolutionFwdAttributes", + "compute_data_type": "float", + "inputs": { + "x_tensor_uid": 1, + "w_tensor_uid": 2 + }, + "outputs": { + "y_tensor_uid": 0 + }, + "parameters": { + "conv_mode": "CROSS_CORRELATION", + "pre_padding": [1, 1], + "post_padding": [1, 1], + "stride": [1, 1], + "dilation": [1, 1] + } + } + ] +} diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_matmul.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_matmul.json new file mode 100644 index 00000000000..d1c73413886 --- /dev/null +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_matmul.json @@ -0,0 +1,46 @@ +{ + "name": "sample_matmul_256x512x1024", + "compute_data_type": "float", + "io_data_type": "float", + "intermediate_data_type": "float", + "tensors": [ + { + "uid": 1, + "name": "input_a", + "dims": [256, 512], + "strides": [512, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 2, + "name": "input_b", + "dims": [512, 1024], + "strides": [1024, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 3, + "name": "output_c", + "dims": [256, 1024], + "strides": [1024, 1], + "data_type": "float", + "virtual": false + } + ], + "nodes": [ + { + "name": "matmul_node", + "type": "MatmulAttributes", + "compute_data_type": "float", + "inputs": { + "a_tensor_uid": 1, + "b_tensor_uid": 2 + }, + "outputs": { + "c_tensor_uid": 3 + } + } + ] +} diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_relu.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_relu.json new file mode 100644 index 00000000000..cd58e0660b1 --- /dev/null +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_relu.json @@ -0,0 +1,43 @@ +{ + "name": "sample_relu_activation_64x128x56x56", + "compute_data_type": "float", + "io_data_type": "float", + "intermediate_data_type": "float", + "tensors": [ + { + "uid": 1, + "name": "input_x", + "dims": [64, 128, 56, 56], + "strides": [401408, 3136, 56, 1], + "data_type": "float", + "virtual": false + }, + { + "uid": 2, + "name": "output_y", + "dims": [64, 128, 56, 56], + "strides": [401408, 3136, 56, 1], + "data_type": "float", + "virtual": false + } + ], + "nodes": [ + { + "name": "relu_node", + "type": "PointwiseAttributes", + "compute_data_type": "float", + "inputs": { + "operation": "relu_fwd", + "relu_lower_clip": 0.0, + "relu_upper_clip": 6.0, + "relu_lower_clip_slope": 0.0, + "in_0_tensor_uid": 1, + "in_1_tensor_uid": null, + "in_2_tensor_uid": null + }, + "outputs": { + "out_0_tensor_uid": 2 + } + } + ] +} diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/conftest.py b/projects/hipdnn/tools/dnn-benchmarking/tests/conftest.py index 31e673b89be..71b73cd5277 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/conftest.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/conftest.py @@ -167,9 +167,16 @@ def skip_if_no_gpu(): def _find_plugin_path() -> str: """Find the hipDNN engine plugin directory. - Searches worktree build dir and standard install locations. + Checks DNN_BENCHMARKING_HIPDNN_PLUGIN_PATH env var first, then + searches worktree build dir and standard install locations. Returns the path as a string, or None if not found. """ + import os + + env_path = os.environ.get("DNN_BENCHMARKING_HIPDNN_PLUGIN_PATH") + if env_path and Path(env_path).is_dir(): + return env_path + project_root = Path(__file__).parent.parent candidates = [ # Worktree/superbuild: relative to dnn-benchmarking tool diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/integration/test_execution.py b/projects/hipdnn/tools/dnn-benchmarking/tests/integration/test_execution.py index 5907203885b..8322c852f21 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/integration/test_execution.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/integration/test_execution.py @@ -27,24 +27,29 @@ def _setup_hipdnn(): pytest.skip(f"PyTorch not available: {e}") try: + import os + import hipdnn_frontend - # Auto-discover and set plugin path - project_root = Path(__file__).parent.parent.parent - candidates = [ - project_root.parent.parent.parent.parent - / "dnn-providers" - / "miopen-provider" - / "build" - / "lib" - / "hipdnn_plugins" - / "engines", - Path("/opt/rocm/lib/hipdnn_plugins/engines"), - ] - for p in candidates: - if p.is_dir() and any(p.glob("*.so")): - hipdnn_frontend.set_engine_plugin_paths([str(p)]) - break + env_path = os.environ.get("DNN_BENCHMARKING_HIPDNN_PLUGIN_PATH") + if env_path and Path(env_path).is_dir(): + hipdnn_frontend.set_engine_plugin_paths([env_path]) + else: + project_root = Path(__file__).parent.parent.parent + candidates = [ + project_root.parent.parent.parent.parent + / "dnn-providers" + / "miopen-provider" + / "build" + / "lib" + / "hipdnn_plugins" + / "engines", + Path("/opt/rocm/lib/hipdnn_plugins/engines"), + ] + for p in candidates: + if p.is_dir() and any(p.glob("*.so")): + hipdnn_frontend.set_engine_plugin_paths([str(p)]) + break hipdnn_frontend.Handle() return hipdnn_frontend diff --git a/test/therock/test_dnn_benchmarking.py b/test/therock/test_dnn_benchmarking.py new file mode 100644 index 00000000000..f9d977c2988 --- /dev/null +++ b/test/therock/test_dnn_benchmarking.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +"""TheRock test runner for dnn-benchmarking. + +Installs the dnn-benchmarking wheel into the active venv, configures the +ROCm environment from the unpacked artifact tree, and runs a curated +pytest target. + +Environment variables used: + THEROCK_BIN_DIR: Root of the unpacked artifact tree (contains lib/, bin/, etc.) + THEROCK_DIR: Repository root (fallback: three directories above this script) +""" + +import logging +import os +import platform +import shlex +import subprocess +import sys +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = Path( + os.environ.get("THEROCK_DIR") or SCRIPT_DIR.parent.parent.parent +).resolve() + +logging.basicConfig(level=logging.INFO) + +if not THEROCK_BIN_DIR: + logging.error("THEROCK_BIN_DIR is not set") + sys.exit(1) + +artifact_root = Path(THEROCK_BIN_DIR).resolve().parent +bundle = artifact_root / "share" / "hipdnn" / "dnn-benchmarking" +wheelhouse = bundle / "wheels" +tests_dir = bundle / "tests" + +if not bundle.exists(): + logging.error(f"dnn-benchmarking test bundle not found: {bundle}") + sys.exit(1) +if not tests_dir.exists(): + logging.error(f"dnn-benchmarking tests not found: {tests_dir}") + sys.exit(1) + +env = os.environ.copy() +env["ROCM_PATH"] = str(artifact_root) +env["HIP_PLATFORM"] = "amd" + +workspace = Path(env.get("RUNNER_TEMP", str(artifact_root / "tmp"))) / "dnn-benchmarking" +workspace.mkdir(parents=True, exist_ok=True) +env["DNN_BENCH_WORKSPACE"] = str(workspace) +env["PYTHONPYCACHEPREFIX"] = str(workspace / "pycache") +env["XDG_CACHE_HOME"] = str(workspace / "cache") +env["MIOPEN_USER_DB_PATH"] = str(workspace / "miopen_cache") +env["MIOPEN_CUSTOM_CACHE_DIR"] = str(workspace / "miopen_cache") +env["AMD_COMGR_CACHE_DIR"] = str(workspace / "comgr_cache") + +plugin_dir = artifact_root / "lib" / "hipdnn_plugins" / "engines" +if plugin_dir.exists(): + env["DNN_BENCHMARKING_HIPDNN_PLUGIN_PATH"] = str(plugin_dir) + +if platform.system() == "Windows": + env["PATH"] = os.pathsep.join( + [str(artifact_root / "bin"), str(artifact_root / "lib"), env.get("PATH", "")] + ) +else: + env["PATH"] = os.pathsep.join( + [str(artifact_root / "bin"), env.get("PATH", "")] + ) + env["LD_LIBRARY_PATH"] = os.pathsep.join( + [ + str(artifact_root / "lib"), + str(artifact_root / "lib" / "llvm" / "lib"), + env.get("LD_LIBRARY_PATH", ""), + ] + ) + +# Install dnn-benchmarking (and hipdnn-frontend if bundled) from the wheelhouse. +install_cmd = [ + sys.executable, "-m", "pip", "install", + "--no-index", "--find-links", str(wheelhouse), + "dnn-benchmarking[test]", +] +if any(wheelhouse.glob("hipdnn_frontend-*.whl")): + install_cmd.append("hipdnn-frontend") + +logging.info(f"++ Install: {shlex.join(install_cmd)}") +subprocess.run(install_cmd, check=True, env=env) + +# Run the no-torch smoke suite: CLI parsing, graph loading, config, reporting, +# shape conversion, and non-GPU tests. These don't require torch or a GPU. +pytest_cmd = [ + sys.executable, "-m", "pytest", "-q", + str(tests_dir / "unit" / "cli"), + str(tests_dir / "unit" / "config"), + str(tests_dir / "unit" / "graph"), + str(tests_dir / "unit" / "reporting"), + str(tests_dir / "unit" / "tools"), + str(tests_dir / "unit" / "validation" / "test_comparison.py"), + str(tests_dir / "integration" / "test_graph_loading.py"), +] + +logging.info(f"++ Test: {shlex.join(pytest_cmd)}") +result = subprocess.run(pytest_cmd, env=env, cwd=str(bundle)) +sys.exit(result.returncode) From 9ad3b63095833183e1e4753cc3a733f54eec55cb Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 12 May 2026 10:49:14 -0400 Subject: [PATCH 02/97] removes the duplicate graphs --- .../tools/dnn-benchmarking/pyproject.toml | 3 - .../src/dnn_benchmarking/graphs/__init__.py | 9 --- .../dnn_benchmarking/graphs/sample_add.json | 48 ------------ .../graphs/sample_batchnorm.json | 73 ------------------- .../graphs/sample_conv_fwd.json | 53 -------------- .../graphs/sample_matmul.json | 46 ------------ .../dnn_benchmarking/graphs/sample_relu.json | 43 ----------- 7 files changed, 275 deletions(-) delete mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/__init__.py delete mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_add.json delete mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_batchnorm.json delete mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_conv_fwd.json delete mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_matmul.json delete mode 100644 projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_relu.json diff --git a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml index b2982341b30..788a7a80422 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml +++ b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml @@ -42,9 +42,6 @@ build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] where = ["src"] -[tool.setuptools.package-data] -"dnn_benchmarking.graphs" = ["*.json"] - [tool.pytest.ini_options] testpaths = ["tests"] markers = [ diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/__init__.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/__init__.py deleted file mode 100644 index cec7234b791..00000000000 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT - -from importlib.resources import files - - -def sample_graphs_path() -> str: - """Return the path to the bundled sample graphs directory.""" - return str(files("dnn_benchmarking.graphs")) diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_add.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_add.json deleted file mode 100644 index 289c2c2b127..00000000000 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_add.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "name": "sample_pointwise_add_128x256x14x14", - "compute_data_type": "float", - "io_data_type": "float", - "intermediate_data_type": "float", - "tensors": [ - { - "uid": 1, - "name": "input_x", - "dims": [128, 256, 14, 14], - "strides": [50176, 196, 14, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 2, - "name": "input_y", - "dims": [128, 256, 14, 14], - "strides": [50176, 196, 14, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 3, - "name": "output_z", - "dims": [128, 256, 14, 14], - "strides": [50176, 196, 14, 1], - "data_type": "float", - "virtual": false - } - ], - "nodes": [ - { - "name": "add_node", - "type": "PointwiseAttributes", - "compute_data_type": "float", - "inputs": { - "operation": "add", - "in_0_tensor_uid": 1, - "in_1_tensor_uid": 2, - "in_2_tensor_uid": null - }, - "outputs": { - "out_0_tensor_uid": 3 - } - } - ] -} diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_batchnorm.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_batchnorm.json deleted file mode 100644 index 4d35dc005f8..00000000000 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_batchnorm.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "name": "sample_batchnorm_inference_32x64x28x28", - "compute_data_type": "float", - "io_data_type": "float", - "intermediate_data_type": "float", - "tensors": [ - { - "uid": 1, - "name": "input_x", - "dims": [32, 64, 28, 28], - "strides": [50176, 784, 28, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 2, - "name": "mean", - "dims": [1, 64, 1, 1], - "strides": [64, 1, 1, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 3, - "name": "inv_variance", - "dims": [1, 64, 1, 1], - "strides": [64, 1, 1, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 4, - "name": "scale", - "dims": [1, 64, 1, 1], - "strides": [64, 1, 1, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 5, - "name": "bias", - "dims": [1, 64, 1, 1], - "strides": [64, 1, 1, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 6, - "name": "output_y", - "dims": [32, 64, 28, 28], - "strides": [50176, 784, 28, 1], - "data_type": "float", - "virtual": false - } - ], - "nodes": [ - { - "name": "batchnorm_inference_node", - "type": "BatchnormInferenceAttributes", - "compute_data_type": "float", - "inputs": { - "x_tensor_uid": 1, - "mean_tensor_uid": 2, - "inv_variance_tensor_uid": 3, - "scale_tensor_uid": 4, - "bias_tensor_uid": 5 - }, - "outputs": { - "y_tensor_uid": 6 - } - } - ] -} diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_conv_fwd.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_conv_fwd.json deleted file mode 100644 index c9e476af8bc..00000000000 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_conv_fwd.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "name": "sample_conv_fwd_16x16x16x16_k16_3x3", - "compute_data_type": "float", - "io_data_type": "float", - "intermediate_data_type": "float", - "tensors": [ - { - "uid": 0, - "name": "output_y", - "dims": [16, 16, 16, 16], - "strides": [4096, 256, 16, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 1, - "name": "input_x", - "dims": [16, 16, 16, 16], - "strides": [4096, 256, 16, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 2, - "name": "weight", - "dims": [16, 16, 3, 3], - "strides": [144, 9, 3, 1], - "data_type": "float", - "virtual": false - } - ], - "nodes": [ - { - "name": "conv_fprop_node", - "type": "ConvolutionFwdAttributes", - "compute_data_type": "float", - "inputs": { - "x_tensor_uid": 1, - "w_tensor_uid": 2 - }, - "outputs": { - "y_tensor_uid": 0 - }, - "parameters": { - "conv_mode": "CROSS_CORRELATION", - "pre_padding": [1, 1], - "post_padding": [1, 1], - "stride": [1, 1], - "dilation": [1, 1] - } - } - ] -} diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_matmul.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_matmul.json deleted file mode 100644 index d1c73413886..00000000000 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_matmul.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "name": "sample_matmul_256x512x1024", - "compute_data_type": "float", - "io_data_type": "float", - "intermediate_data_type": "float", - "tensors": [ - { - "uid": 1, - "name": "input_a", - "dims": [256, 512], - "strides": [512, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 2, - "name": "input_b", - "dims": [512, 1024], - "strides": [1024, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 3, - "name": "output_c", - "dims": [256, 1024], - "strides": [1024, 1], - "data_type": "float", - "virtual": false - } - ], - "nodes": [ - { - "name": "matmul_node", - "type": "MatmulAttributes", - "compute_data_type": "float", - "inputs": { - "a_tensor_uid": 1, - "b_tensor_uid": 2 - }, - "outputs": { - "c_tensor_uid": 3 - } - } - ] -} diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_relu.json b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_relu.json deleted file mode 100644 index cd58e0660b1..00000000000 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graphs/sample_relu.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "name": "sample_relu_activation_64x128x56x56", - "compute_data_type": "float", - "io_data_type": "float", - "intermediate_data_type": "float", - "tensors": [ - { - "uid": 1, - "name": "input_x", - "dims": [64, 128, 56, 56], - "strides": [401408, 3136, 56, 1], - "data_type": "float", - "virtual": false - }, - { - "uid": 2, - "name": "output_y", - "dims": [64, 128, 56, 56], - "strides": [401408, 3136, 56, 1], - "data_type": "float", - "virtual": false - } - ], - "nodes": [ - { - "name": "relu_node", - "type": "PointwiseAttributes", - "compute_data_type": "float", - "inputs": { - "operation": "relu_fwd", - "relu_lower_clip": 0.0, - "relu_upper_clip": 6.0, - "relu_lower_clip_slope": 0.0, - "in_0_tensor_uid": 1, - "in_1_tensor_uid": null, - "in_2_tensor_uid": null - }, - "outputs": { - "out_0_tensor_uid": 2 - } - } - ] -} From 06f59e5f70be78e55712b96019ba809e7bf90043 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 12 May 2026 11:37:20 -0400 Subject: [PATCH 03/97] Adopt SKBUILD pattern for hipdnn-frontend wheel and revert dnn-benchmarking changes Rewrite python/CMakeLists.txt to use scikit-build-core's SKBUILD variable for wheel-specific install rules (following the stinkytofu pattern) instead of unconditional dist-packages install. Regular cmake --install stages the wheel via pip wheel in an else() branch, avoiding recursion. Revert all dnn-benchmarking packaging changes to keep this branch focused on hipdnn-frontend Python bindings only. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/CMakeLists.txt | 1 - projects/hipdnn/python/CMakeLists.txt | 20 +--- .../tools/dnn-benchmarking/CMakeLists.txt | 23 ---- .../tools/dnn-benchmarking/pyproject.toml | 8 +- .../tools/dnn-benchmarking/tests/conftest.py | 9 +- .../tests/integration/test_execution.py | 37 +++--- test/therock/test_dnn_benchmarking.py | 108 ------------------ 7 files changed, 21 insertions(+), 185 deletions(-) delete mode 100644 projects/hipdnn/tools/dnn-benchmarking/CMakeLists.txt delete mode 100644 test/therock/test_dnn_benchmarking.py diff --git a/projects/hipdnn/CMakeLists.txt b/projects/hipdnn/CMakeLists.txt index 63f7202e1a3..0202683a6aa 100644 --- a/projects/hipdnn/CMakeLists.txt +++ b/projects/hipdnn/CMakeLists.txt @@ -249,7 +249,6 @@ add_subdirectory(tools) if(HIPDNN_BUILD_PYTHON_BINDINGS) add_subdirectory(python) - add_subdirectory(tools/dnn-benchmarking) endif() if(NOT HIPDNN_SKIP_TESTS) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 257ddc2c5e4..e6f135b0c2e 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -61,29 +61,12 @@ target_link_libraries(hipdnn_frontend_python PRIVATE hipdnn_frontend hipdnn_back # Installation # ============================================================================ -# Use a relative path so the install respects CMAKE_INSTALL_PREFIX. -set(HIPDNN_PYTHON_SITE_DIR - "lib/python${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}/dist-packages" - CACHE PATH "Python module install directory (relative to CMAKE_INSTALL_PREFIX)") - -# scikit-build-core (pip wheel / pip install) install rules. # SKBUILD is set by scikit-build-core when building a wheel. if(DEFINED SKBUILD) install(TARGETS hipdnn_frontend_python DESTINATION hipdnn_frontend) install(FILES hipdnn_frontend/__init__.py DESTINATION hipdnn_frontend) else() - # Regular cmake install — place in dist-packages under CMAKE_INSTALL_PREFIX. - install(TARGETS hipdnn_frontend_python - LIBRARY DESTINATION ${HIPDNN_PYTHON_SITE_DIR}/hipdnn_frontend - ) - install(FILES hipdnn_frontend/__init__.py - DESTINATION ${HIPDNN_PYTHON_SITE_DIR}/hipdnn_frontend - ) -endif() - -# Build wheels and stage them in the install prefix. -# Skipped when scikit-build-core drives cmake (SKBUILD) to avoid recursion. -if(NOT DEFINED SKBUILD) + # Build wheel and stage it in the install prefix. install(CODE " set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") file(MAKE_DIRECTORY \"\${_wheel_dir}\") @@ -99,6 +82,5 @@ if(NOT DEFINED SKBUILD) if(_result) message(FATAL_ERROR \"Failed to build hipdnn-frontend wheel\") endif() - ") endif() diff --git a/projects/hipdnn/tools/dnn-benchmarking/CMakeLists.txt b/projects/hipdnn/tools/dnn-benchmarking/CMakeLists.txt deleted file mode 100644 index 71125c9e181..00000000000 --- a/projects/hipdnn/tools/dnn-benchmarking/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT - -cmake_minimum_required(VERSION 3.18) - -find_package(Python COMPONENTS Interpreter REQUIRED) - -# Build dnn-benchmarking wheel and install to staging area -install(CODE " - set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") - file(MAKE_DIRECTORY \"\${_wheel_dir}\") - - message(STATUS \"Building dnn-benchmarking wheel\") - execute_process( - COMMAND \"${Python_EXECUTABLE}\" -m pip wheel --no-deps - -w \"\${_wheel_dir}\" - \"${CMAKE_CURRENT_SOURCE_DIR}\" - RESULT_VARIABLE _result - ) - if(_result) - message(FATAL_ERROR \"Failed to build dnn-benchmarking wheel\") - endif() -") diff --git a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml index 788a7a80422..9583de29449 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml +++ b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml @@ -20,13 +20,11 @@ classifiers = [ "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", ] +# torch is intentionally not listed here — it must come from the ROCm or CUDA +# nightly index (see requirements-{rocm,cuda}.txt) which pip cannot express +# in pyproject.toml. Everything else resolves cleanly from PyPI. dependencies = [ "numpy>=1.19.0", - "hipdnn-frontend", -] - -[project.optional-dependencies] -test = [ "pytest>=7.0.0", "pytest-cov>=4.0.0", ] diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/conftest.py b/projects/hipdnn/tools/dnn-benchmarking/tests/conftest.py index 71b73cd5277..31e673b89be 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/conftest.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/conftest.py @@ -167,16 +167,9 @@ def skip_if_no_gpu(): def _find_plugin_path() -> str: """Find the hipDNN engine plugin directory. - Checks DNN_BENCHMARKING_HIPDNN_PLUGIN_PATH env var first, then - searches worktree build dir and standard install locations. + Searches worktree build dir and standard install locations. Returns the path as a string, or None if not found. """ - import os - - env_path = os.environ.get("DNN_BENCHMARKING_HIPDNN_PLUGIN_PATH") - if env_path and Path(env_path).is_dir(): - return env_path - project_root = Path(__file__).parent.parent candidates = [ # Worktree/superbuild: relative to dnn-benchmarking tool diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/integration/test_execution.py b/projects/hipdnn/tools/dnn-benchmarking/tests/integration/test_execution.py index 8322c852f21..5907203885b 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/integration/test_execution.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/integration/test_execution.py @@ -27,29 +27,24 @@ def _setup_hipdnn(): pytest.skip(f"PyTorch not available: {e}") try: - import os - import hipdnn_frontend - env_path = os.environ.get("DNN_BENCHMARKING_HIPDNN_PLUGIN_PATH") - if env_path and Path(env_path).is_dir(): - hipdnn_frontend.set_engine_plugin_paths([env_path]) - else: - project_root = Path(__file__).parent.parent.parent - candidates = [ - project_root.parent.parent.parent.parent - / "dnn-providers" - / "miopen-provider" - / "build" - / "lib" - / "hipdnn_plugins" - / "engines", - Path("/opt/rocm/lib/hipdnn_plugins/engines"), - ] - for p in candidates: - if p.is_dir() and any(p.glob("*.so")): - hipdnn_frontend.set_engine_plugin_paths([str(p)]) - break + # Auto-discover and set plugin path + project_root = Path(__file__).parent.parent.parent + candidates = [ + project_root.parent.parent.parent.parent + / "dnn-providers" + / "miopen-provider" + / "build" + / "lib" + / "hipdnn_plugins" + / "engines", + Path("/opt/rocm/lib/hipdnn_plugins/engines"), + ] + for p in candidates: + if p.is_dir() and any(p.glob("*.so")): + hipdnn_frontend.set_engine_plugin_paths([str(p)]) + break hipdnn_frontend.Handle() return hipdnn_frontend diff --git a/test/therock/test_dnn_benchmarking.py b/test/therock/test_dnn_benchmarking.py deleted file mode 100644 index f9d977c2988..00000000000 --- a/test/therock/test_dnn_benchmarking.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python3 -# Copyright Advanced Micro Devices, Inc. -# SPDX-License-Identifier: MIT - -"""TheRock test runner for dnn-benchmarking. - -Installs the dnn-benchmarking wheel into the active venv, configures the -ROCm environment from the unpacked artifact tree, and runs a curated -pytest target. - -Environment variables used: - THEROCK_BIN_DIR: Root of the unpacked artifact tree (contains lib/, bin/, etc.) - THEROCK_DIR: Repository root (fallback: three directories above this script) -""" - -import logging -import os -import platform -import shlex -import subprocess -import sys -from pathlib import Path - -THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") -SCRIPT_DIR = Path(__file__).resolve().parent -THEROCK_DIR = Path( - os.environ.get("THEROCK_DIR") or SCRIPT_DIR.parent.parent.parent -).resolve() - -logging.basicConfig(level=logging.INFO) - -if not THEROCK_BIN_DIR: - logging.error("THEROCK_BIN_DIR is not set") - sys.exit(1) - -artifact_root = Path(THEROCK_BIN_DIR).resolve().parent -bundle = artifact_root / "share" / "hipdnn" / "dnn-benchmarking" -wheelhouse = bundle / "wheels" -tests_dir = bundle / "tests" - -if not bundle.exists(): - logging.error(f"dnn-benchmarking test bundle not found: {bundle}") - sys.exit(1) -if not tests_dir.exists(): - logging.error(f"dnn-benchmarking tests not found: {tests_dir}") - sys.exit(1) - -env = os.environ.copy() -env["ROCM_PATH"] = str(artifact_root) -env["HIP_PLATFORM"] = "amd" - -workspace = Path(env.get("RUNNER_TEMP", str(artifact_root / "tmp"))) / "dnn-benchmarking" -workspace.mkdir(parents=True, exist_ok=True) -env["DNN_BENCH_WORKSPACE"] = str(workspace) -env["PYTHONPYCACHEPREFIX"] = str(workspace / "pycache") -env["XDG_CACHE_HOME"] = str(workspace / "cache") -env["MIOPEN_USER_DB_PATH"] = str(workspace / "miopen_cache") -env["MIOPEN_CUSTOM_CACHE_DIR"] = str(workspace / "miopen_cache") -env["AMD_COMGR_CACHE_DIR"] = str(workspace / "comgr_cache") - -plugin_dir = artifact_root / "lib" / "hipdnn_plugins" / "engines" -if plugin_dir.exists(): - env["DNN_BENCHMARKING_HIPDNN_PLUGIN_PATH"] = str(plugin_dir) - -if platform.system() == "Windows": - env["PATH"] = os.pathsep.join( - [str(artifact_root / "bin"), str(artifact_root / "lib"), env.get("PATH", "")] - ) -else: - env["PATH"] = os.pathsep.join( - [str(artifact_root / "bin"), env.get("PATH", "")] - ) - env["LD_LIBRARY_PATH"] = os.pathsep.join( - [ - str(artifact_root / "lib"), - str(artifact_root / "lib" / "llvm" / "lib"), - env.get("LD_LIBRARY_PATH", ""), - ] - ) - -# Install dnn-benchmarking (and hipdnn-frontend if bundled) from the wheelhouse. -install_cmd = [ - sys.executable, "-m", "pip", "install", - "--no-index", "--find-links", str(wheelhouse), - "dnn-benchmarking[test]", -] -if any(wheelhouse.glob("hipdnn_frontend-*.whl")): - install_cmd.append("hipdnn-frontend") - -logging.info(f"++ Install: {shlex.join(install_cmd)}") -subprocess.run(install_cmd, check=True, env=env) - -# Run the no-torch smoke suite: CLI parsing, graph loading, config, reporting, -# shape conversion, and non-GPU tests. These don't require torch or a GPU. -pytest_cmd = [ - sys.executable, "-m", "pytest", "-q", - str(tests_dir / "unit" / "cli"), - str(tests_dir / "unit" / "config"), - str(tests_dir / "unit" / "graph"), - str(tests_dir / "unit" / "reporting"), - str(tests_dir / "unit" / "tools"), - str(tests_dir / "unit" / "validation" / "test_comparison.py"), - str(tests_dir / "integration" / "test_graph_loading.py"), -] - -logging.info(f"++ Test: {shlex.join(pytest_cmd)}") -result = subprocess.run(pytest_cmd, env=env, cwd=str(bundle)) -sys.exit(result.returncode) From 7d98bfcfe63fdeeb141c987f2f9ab1d5364821ae Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 12 May 2026 11:44:28 -0400 Subject: [PATCH 04/97] Revert pyproject.toml to use wheel.packages for __init__.py Let scikit-build-core handle Python files via wheel.packages instead of cmake install rules. Simplifies SKBUILD block to only install the .so. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 6 +++--- projects/hipdnn/python/pyproject.toml | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index e6f135b0c2e..82070816c9e 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -63,10 +63,10 @@ target_link_libraries(hipdnn_frontend_python PRIVATE hipdnn_frontend hipdnn_back # SKBUILD is set by scikit-build-core when building a wheel. if(DEFINED SKBUILD) - install(TARGETS hipdnn_frontend_python DESTINATION hipdnn_frontend) - install(FILES hipdnn_frontend/__init__.py DESTINATION hipdnn_frontend) + # pip install . / pip wheel . → scikit-build-core packages the extension into a wheel + install(TARGETS hipdnn_frontend_python DESTINATION .) else() - # Build wheel and stage it in the install prefix. + # cmake --install → calls pip wheel to build .whl, stages it in share/hipdnn/wheels/ install(CODE " set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") file(MAKE_DIRECTORY \"\${_wheel_dir}\") diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index e3fd4537dd7..dd039e39e2a 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -51,9 +51,10 @@ build-dir = "build" cmake.minimum-version = "3.18" # Build type cmake.build-type = "Release" -# No pure-Python packages to scan; the wheel contents come entirely from -# cmake install() rules (hipdnn_frontend_python.so + __init__.py → hipdnn_frontend/). -wheel.packages = [] +# Where the Python module will be installed +wheel.install-dir = "hipdnn_frontend" +# Package data +wheel.packages = ["hipdnn_frontend"] [tool.scikit-build.cmake.define] # Pass any additional CMake definitions if needed From 0567247dec004450ac385d33c4d0a5b5f11d0a16 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 12 May 2026 11:49:06 -0400 Subject: [PATCH 05/97] Clarify comment on target guards in python/CMakeLists.txt Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 82070816c9e..f465998904a 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -26,7 +26,8 @@ set(HIPDNN_FRONTEND_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../frontend/include # Find HIP (if needed) find_package(hip REQUIRED) -# Only find_package for hipDNN targets when built standalone +# When built as a subdirectory of hipdnn, these targets already exist. +# When built standalone (pip wheel / pip install), find the installed libraries. if(NOT TARGET hipdnn_frontend) find_package(hipdnn_frontend CONFIG REQUIRED) endif() From 462f97865160b7b9661cc5a2d3738eb06d4d2311 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 12 May 2026 14:16:39 -0400 Subject: [PATCH 06/97] Add smoke tests for hipDNN Python bindings wheel Installs the built hipdnn-frontend wheel into a fresh venv and runs 17 smoke tests covering imports, enums, Tensor/Graph construction, method chaining, serialization helpers, and engine ID management. Co-Authored-By: Claude Opus 4 --- test/therock/test_hipdnn_python_bindings.py | 358 ++++++++++++++++++++ 1 file changed, 358 insertions(+) create mode 100644 test/therock/test_hipdnn_python_bindings.py diff --git a/test/therock/test_hipdnn_python_bindings.py b/test/therock/test_hipdnn_python_bindings.py new file mode 100644 index 00000000000..51ed012c9f0 --- /dev/null +++ b/test/therock/test_hipdnn_python_bindings.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +""" +hipDNN Python bindings wheel install and smoke test. + +This test verifies that the hipdnn-frontend wheel built by TheRock can be +installed into a fresh venv and that the basic Python API surface is +functional (import, enum access, Graph/Tensor construction, serialization). + +Environment variables: + OUTPUT_ARTIFACTS_DIR: Path to the TheRock dist/rocm output directory + that contains share/hipdnn/wheels/*.whl +""" + +import argparse +import glob +import logging +import os +import platform +import shlex +import subprocess +import sys +import tempfile +import venv +from pathlib import Path + +OUTPUT_ARTIFACTS_DIR = os.getenv("OUTPUT_ARTIFACTS_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = Path( + os.environ.get("THEROCK_DIR") or SCRIPT_DIR.parent.parent.parent +).resolve() + +logging.basicConfig(level=logging.INFO) + + +def find_wheel(artifacts_path: Path) -> Path: + """Locate the hipdnn-frontend wheel under the artifacts directory.""" + wheel_dir = artifacts_path / "share" / "hipdnn" / "wheels" + wheels = sorted(wheel_dir.glob("hipdnn_frontend-*.whl")) + if not wheels: + raise FileNotFoundError( + f"No hipdnn-frontend wheel found in {wheel_dir}. " + "Ensure the build was configured with -DHIPDNN_BUILD_PYTHON_BINDINGS=ON." + ) + logging.info(f"Found wheel: {wheels[-1]}") + return wheels[-1] + + +def create_venv(venv_dir: Path) -> Path: + """Create a virtual environment and return the python executable path.""" + logging.info(f"Creating virtual environment in {venv_dir}") + venv.create(venv_dir, with_pip=True) + + if platform.system() == "Windows": + python = venv_dir / "Scripts" / "python.exe" + else: + python = venv_dir / "bin" / "python" + + if not python.exists(): + raise RuntimeError(f"venv python not found at {python}") + return python + + +def install_wheel(python: Path, wheel: Path, artifacts_path: Path) -> None: + """Install the wheel and numpy into the venv.""" + env = os.environ.copy() + + if platform.system() == "Windows": + lib_path = str(artifacts_path) + env["PATH"] = f"{lib_path};{env.get('PATH', '')}" + else: + lib_path = str(artifacts_path / "lib") + env["LD_LIBRARY_PATH"] = f"{lib_path}:{env.get('LD_LIBRARY_PATH', '')}" + + cmd = [str(python), "-m", "pip", "install", str(wheel), "numpy"] + logging.info(f"++ {shlex.join(cmd)}") + subprocess.run(cmd, check=True, env=env) + + +def run_smoke_tests(python: Path, artifacts_path: Path) -> None: + """Run inline Python smoke tests inside the venv.""" + env = os.environ.copy() + + if platform.system() == "Windows": + lib_path = str(artifacts_path) + env["PATH"] = f"{lib_path};{env.get('PATH', '')}" + else: + lib_path = str(artifacts_path / "lib") + env["LD_LIBRARY_PATH"] = f"{lib_path}:{env.get('LD_LIBRARY_PATH', '')}" + + test_script = r''' +import sys + +def test_import(): + """Verify the package can be imported.""" + import hipdnn_frontend as fe + assert hasattr(fe, "__version__"), "Missing __version__" + print(f" OK import hipdnn_frontend (version {fe.__version__})") + +def test_data_types(): + """Verify enum bindings are accessible.""" + import hipdnn_frontend as fe + for name in ["FLOAT", "HALF", "BFLOAT16", "INT8", "DOUBLE"]: + assert hasattr(fe.DataType, name), f"Missing DataType.{name}" + assert fe.DataType.FLOAT != fe.DataType.HALF + print(" OK DataType enum values accessible") + +def test_error(): + """Verify Error struct bindings.""" + import hipdnn_frontend as fe + err = fe.Error() + assert err.is_good(), "Default Error should be good" + assert not err.is_bad(), "Default Error should not be bad" + assert err.get_code() == fe.ErrorCode.OK + + err2 = fe.Error(fe.ErrorCode.INVALID_VALUE, "test error") + assert err2.is_bad() + assert "test error" in err2.get_message() + print(" OK Error struct works") + +def test_pointwise_mode(): + """Verify PointwiseMode enum.""" + import hipdnn_frontend as fe + for name in ["ADD", "MUL", "RELU_FWD", "SIGMOID_FWD", "TANH_FWD"]: + assert hasattr(fe.PointwiseMode, name), f"Missing PointwiseMode.{name}" + print(" OK PointwiseMode enum values accessible") + +def test_convolution_mode(): + """Verify ConvolutionMode enum.""" + import hipdnn_frontend as fe + assert hasattr(fe.ConvolutionMode, "CROSS_CORRELATION") + assert hasattr(fe.ConvolutionMode, "CONVOLUTION") + print(" OK ConvolutionMode enum values accessible") + +def test_tensor_create(): + """Verify Tensor creation and attribute access.""" + import hipdnn_frontend as fe + t = fe.Tensor.create([1, 3, 224, 224], fe.DataType.FLOAT) + assert t.get_dim() == [1, 3, 224, 224] + assert t.get_data_type() == fe.DataType.FLOAT + assert not t.get_is_virtual() + assert t.get_volume() == 1 * 3 * 224 * 224 + print(" OK Tensor.create works") + +def test_tensor_attributes(): + """Verify Tensor setters and method chaining.""" + import hipdnn_frontend as fe + t = fe.Tensor() + result = t.set_dim([2, 64, 32, 32]).set_data_type(fe.DataType.HALF).set_name("input") + assert result is t, "Setters should return self for chaining" + assert t.get_name() == "input" + assert t.get_dim() == [2, 64, 32, 32] + assert t.get_data_type() == fe.DataType.HALF + print(" OK Tensor attribute setters and chaining work") + +def test_tensor_uid(): + """Verify Tensor UID management.""" + import hipdnn_frontend as fe + t = fe.Tensor.create([1, 1], fe.DataType.FLOAT) + assert not t.has_uid(), "New tensor should not have UID" + t.set_uid(42) + assert t.has_uid() + assert t.get_uid() == 42 + t.clear_uid() + assert not t.has_uid() + print(" OK Tensor UID management works") + +def test_tensor_virtual(): + """Verify virtual tensor flag.""" + import hipdnn_frontend as fe + t = fe.Tensor.create([1, 1], fe.DataType.FLOAT) + t.set_is_virtual(True) + assert t.get_is_virtual() + t.set_is_virtual(False) + assert not t.get_is_virtual() + print(" OK Tensor virtual flag works") + +def test_graph_create(): + """Verify Graph construction and attribute setting.""" + import hipdnn_frontend as fe + g = fe.Graph() + g.set_name("test_graph") + g.set_compute_data_type(fe.DataType.FLOAT) + g.set_io_data_type(fe.DataType.FLOAT) + + assert g.get_name() == "test_graph" + assert g.get_compute_data_type() == fe.DataType.FLOAT + assert g.get_io_data_type() == fe.DataType.FLOAT + print(" OK Graph creation and attributes work") + +def test_graph_chaining(): + """Verify Graph method chaining.""" + import hipdnn_frontend as fe + g = fe.Graph() + result = ( + g.set_name("chained") + .set_compute_data_type(fe.DataType.FLOAT) + .set_io_data_type(fe.DataType.HALF) + .set_intermediate_data_type(fe.DataType.FLOAT) + ) + assert result is g + assert g.get_name() == "chained" + assert g.get_io_data_type() == fe.DataType.HALF + assert g.get_intermediate_data_type() == fe.DataType.FLOAT + print(" OK Graph method chaining works") + +def test_graph_tensor(): + """Verify creating tensors through the graph.""" + import hipdnn_frontend as fe + g = fe.Graph() + g.set_compute_data_type(fe.DataType.FLOAT).set_io_data_type(fe.DataType.FLOAT) + + t = g.tensor([1, 3, 224, 224], "input_tensor") + assert t is not None + assert t.get_name() == "input_tensor" + assert t.get_dim() == [1, 3, 224, 224] + print(" OK Graph.tensor() works") + +def test_graph_tensor_like(): + """Verify tensor_like static method.""" + import hipdnn_frontend as fe + t = fe.Tensor.create([2, 64, 16, 16], fe.DataType.HALF) + t.set_name("original") + + copy = fe.Graph.tensor_like(t, "copy_tensor") + assert copy.get_dim() == [2, 64, 16, 16] + assert copy.get_data_type() == fe.DataType.HALF + assert copy.get_name() == "copy_tensor" + print(" OK Graph.tensor_like() works") + +def test_engine_id_to_name(): + """Verify engine_id_to_name function.""" + import hipdnn_frontend as fe + result = fe.engine_id_to_name(999999) + assert isinstance(result, str) + print(" OK engine_id_to_name works") + +def test_preferred_engine_id(): + """Verify preferred engine ID set/get/clear.""" + import hipdnn_frontend as fe + g = fe.Graph() + + assert g.get_preferred_engine_id_ext() is None + + g.set_preferred_engine_id_ext(12345) + assert g.get_preferred_engine_id_ext() == 12345 + + g.set_preferred_engine_id_ext(None) + assert g.get_preferred_engine_id_ext() is None + print(" OK Preferred engine ID management works") + +def test_plugin_loading_mode(): + """Verify PluginLoadingMode enum.""" + import hipdnn_frontend as fe + assert hasattr(fe, "PluginLoadingMode") + assert hasattr(fe.PluginLoadingMode, "ADDITIVE") + assert hasattr(fe.PluginLoadingMode, "ABSOLUTE") + print(" OK PluginLoadingMode enum accessible") + +def test_heuristic_mode(): + """Verify HeuristicMode enum.""" + import hipdnn_frontend as fe + assert hasattr(fe.HeuristicMode, "FALLBACK") + print(" OK HeuristicMode enum accessible") + +def main(): + print("=" * 60) + print("hipDNN Python bindings smoke tests") + print("=" * 60) + + tests = [ + test_import, + test_data_types, + test_error, + test_pointwise_mode, + test_convolution_mode, + test_tensor_create, + test_tensor_attributes, + test_tensor_uid, + test_tensor_virtual, + test_graph_create, + test_graph_chaining, + test_graph_tensor, + test_graph_tensor_like, + test_engine_id_to_name, + test_preferred_engine_id, + test_plugin_loading_mode, + test_heuristic_mode, + ] + + passed = 0 + failed = 0 + for test in tests: + try: + test() + passed += 1 + except Exception as e: + print(f" FAIL {test.__name__}: {e}") + import traceback + traceback.print_exc() + failed += 1 + + print("=" * 60) + print(f"Results: {passed} passed, {failed} failed, {len(tests)} total") + print("=" * 60) + + if failed: + sys.exit(1) + +if __name__ == "__main__": + main() +''' + + cmd = [str(python), "-c", test_script] + logging.info("Running smoke tests...") + subprocess.run(cmd, check=True, env=env) + + +def run_tests(artifacts_path: Path, venv_dir: Path) -> None: + """Find wheel, create venv, install, and run tests.""" + wheel = find_wheel(artifacts_path) + python = create_venv(venv_dir) + install_wheel(python, wheel, artifacts_path) + run_smoke_tests(python, artifacts_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Install and test hipDNN Python bindings wheel" + ) + parser.add_argument( + "--venv-dir", + type=Path, + help="Directory for the test virtual environment. " + "If not specified, uses a temporary directory that is auto-deleted.", + ) + args = parser.parse_args() + + if not OUTPUT_ARTIFACTS_DIR: + raise RuntimeError("OUTPUT_ARTIFACTS_DIR environment variable not set") + + artifacts_path = Path(OUTPUT_ARTIFACTS_DIR).resolve() + logging.info(f"Using OUTPUT_ARTIFACTS_DIR: {artifacts_path}") + + if args.venv_dir: + venv_dir = args.venv_dir.resolve() + venv_dir.mkdir(parents=True, exist_ok=True) + logging.info(f"Using persistent venv directory: {venv_dir}") + run_tests(artifacts_path, venv_dir) + logging.info(f"Venv retained in: {venv_dir}") + else: + logging.info("Using temporary venv directory (auto-cleanup)") + with tempfile.TemporaryDirectory() as temp_dir: + run_tests(artifacts_path, Path(temp_dir) / "venv") + + logging.info("All hipDNN Python binding tests passed!") From 5def8a0c2f4ecfeb24bde221984d7b5d1b2c619f Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 12 May 2026 14:22:53 -0400 Subject: [PATCH 07/97] Remove hipDNN Python bindings test (moved to TheRock) Co-Authored-By: Claude Opus 4 --- test/therock/test_hipdnn_python_bindings.py | 358 -------------------- 1 file changed, 358 deletions(-) delete mode 100644 test/therock/test_hipdnn_python_bindings.py diff --git a/test/therock/test_hipdnn_python_bindings.py b/test/therock/test_hipdnn_python_bindings.py deleted file mode 100644 index 51ed012c9f0..00000000000 --- a/test/therock/test_hipdnn_python_bindings.py +++ /dev/null @@ -1,358 +0,0 @@ -#!/usr/bin/env python3 -# Copyright Advanced Micro Devices, Inc. -# SPDX-License-Identifier: MIT - -""" -hipDNN Python bindings wheel install and smoke test. - -This test verifies that the hipdnn-frontend wheel built by TheRock can be -installed into a fresh venv and that the basic Python API surface is -functional (import, enum access, Graph/Tensor construction, serialization). - -Environment variables: - OUTPUT_ARTIFACTS_DIR: Path to the TheRock dist/rocm output directory - that contains share/hipdnn/wheels/*.whl -""" - -import argparse -import glob -import logging -import os -import platform -import shlex -import subprocess -import sys -import tempfile -import venv -from pathlib import Path - -OUTPUT_ARTIFACTS_DIR = os.getenv("OUTPUT_ARTIFACTS_DIR") -SCRIPT_DIR = Path(__file__).resolve().parent -THEROCK_DIR = Path( - os.environ.get("THEROCK_DIR") or SCRIPT_DIR.parent.parent.parent -).resolve() - -logging.basicConfig(level=logging.INFO) - - -def find_wheel(artifacts_path: Path) -> Path: - """Locate the hipdnn-frontend wheel under the artifacts directory.""" - wheel_dir = artifacts_path / "share" / "hipdnn" / "wheels" - wheels = sorted(wheel_dir.glob("hipdnn_frontend-*.whl")) - if not wheels: - raise FileNotFoundError( - f"No hipdnn-frontend wheel found in {wheel_dir}. " - "Ensure the build was configured with -DHIPDNN_BUILD_PYTHON_BINDINGS=ON." - ) - logging.info(f"Found wheel: {wheels[-1]}") - return wheels[-1] - - -def create_venv(venv_dir: Path) -> Path: - """Create a virtual environment and return the python executable path.""" - logging.info(f"Creating virtual environment in {venv_dir}") - venv.create(venv_dir, with_pip=True) - - if platform.system() == "Windows": - python = venv_dir / "Scripts" / "python.exe" - else: - python = venv_dir / "bin" / "python" - - if not python.exists(): - raise RuntimeError(f"venv python not found at {python}") - return python - - -def install_wheel(python: Path, wheel: Path, artifacts_path: Path) -> None: - """Install the wheel and numpy into the venv.""" - env = os.environ.copy() - - if platform.system() == "Windows": - lib_path = str(artifacts_path) - env["PATH"] = f"{lib_path};{env.get('PATH', '')}" - else: - lib_path = str(artifacts_path / "lib") - env["LD_LIBRARY_PATH"] = f"{lib_path}:{env.get('LD_LIBRARY_PATH', '')}" - - cmd = [str(python), "-m", "pip", "install", str(wheel), "numpy"] - logging.info(f"++ {shlex.join(cmd)}") - subprocess.run(cmd, check=True, env=env) - - -def run_smoke_tests(python: Path, artifacts_path: Path) -> None: - """Run inline Python smoke tests inside the venv.""" - env = os.environ.copy() - - if platform.system() == "Windows": - lib_path = str(artifacts_path) - env["PATH"] = f"{lib_path};{env.get('PATH', '')}" - else: - lib_path = str(artifacts_path / "lib") - env["LD_LIBRARY_PATH"] = f"{lib_path}:{env.get('LD_LIBRARY_PATH', '')}" - - test_script = r''' -import sys - -def test_import(): - """Verify the package can be imported.""" - import hipdnn_frontend as fe - assert hasattr(fe, "__version__"), "Missing __version__" - print(f" OK import hipdnn_frontend (version {fe.__version__})") - -def test_data_types(): - """Verify enum bindings are accessible.""" - import hipdnn_frontend as fe - for name in ["FLOAT", "HALF", "BFLOAT16", "INT8", "DOUBLE"]: - assert hasattr(fe.DataType, name), f"Missing DataType.{name}" - assert fe.DataType.FLOAT != fe.DataType.HALF - print(" OK DataType enum values accessible") - -def test_error(): - """Verify Error struct bindings.""" - import hipdnn_frontend as fe - err = fe.Error() - assert err.is_good(), "Default Error should be good" - assert not err.is_bad(), "Default Error should not be bad" - assert err.get_code() == fe.ErrorCode.OK - - err2 = fe.Error(fe.ErrorCode.INVALID_VALUE, "test error") - assert err2.is_bad() - assert "test error" in err2.get_message() - print(" OK Error struct works") - -def test_pointwise_mode(): - """Verify PointwiseMode enum.""" - import hipdnn_frontend as fe - for name in ["ADD", "MUL", "RELU_FWD", "SIGMOID_FWD", "TANH_FWD"]: - assert hasattr(fe.PointwiseMode, name), f"Missing PointwiseMode.{name}" - print(" OK PointwiseMode enum values accessible") - -def test_convolution_mode(): - """Verify ConvolutionMode enum.""" - import hipdnn_frontend as fe - assert hasattr(fe.ConvolutionMode, "CROSS_CORRELATION") - assert hasattr(fe.ConvolutionMode, "CONVOLUTION") - print(" OK ConvolutionMode enum values accessible") - -def test_tensor_create(): - """Verify Tensor creation and attribute access.""" - import hipdnn_frontend as fe - t = fe.Tensor.create([1, 3, 224, 224], fe.DataType.FLOAT) - assert t.get_dim() == [1, 3, 224, 224] - assert t.get_data_type() == fe.DataType.FLOAT - assert not t.get_is_virtual() - assert t.get_volume() == 1 * 3 * 224 * 224 - print(" OK Tensor.create works") - -def test_tensor_attributes(): - """Verify Tensor setters and method chaining.""" - import hipdnn_frontend as fe - t = fe.Tensor() - result = t.set_dim([2, 64, 32, 32]).set_data_type(fe.DataType.HALF).set_name("input") - assert result is t, "Setters should return self for chaining" - assert t.get_name() == "input" - assert t.get_dim() == [2, 64, 32, 32] - assert t.get_data_type() == fe.DataType.HALF - print(" OK Tensor attribute setters and chaining work") - -def test_tensor_uid(): - """Verify Tensor UID management.""" - import hipdnn_frontend as fe - t = fe.Tensor.create([1, 1], fe.DataType.FLOAT) - assert not t.has_uid(), "New tensor should not have UID" - t.set_uid(42) - assert t.has_uid() - assert t.get_uid() == 42 - t.clear_uid() - assert not t.has_uid() - print(" OK Tensor UID management works") - -def test_tensor_virtual(): - """Verify virtual tensor flag.""" - import hipdnn_frontend as fe - t = fe.Tensor.create([1, 1], fe.DataType.FLOAT) - t.set_is_virtual(True) - assert t.get_is_virtual() - t.set_is_virtual(False) - assert not t.get_is_virtual() - print(" OK Tensor virtual flag works") - -def test_graph_create(): - """Verify Graph construction and attribute setting.""" - import hipdnn_frontend as fe - g = fe.Graph() - g.set_name("test_graph") - g.set_compute_data_type(fe.DataType.FLOAT) - g.set_io_data_type(fe.DataType.FLOAT) - - assert g.get_name() == "test_graph" - assert g.get_compute_data_type() == fe.DataType.FLOAT - assert g.get_io_data_type() == fe.DataType.FLOAT - print(" OK Graph creation and attributes work") - -def test_graph_chaining(): - """Verify Graph method chaining.""" - import hipdnn_frontend as fe - g = fe.Graph() - result = ( - g.set_name("chained") - .set_compute_data_type(fe.DataType.FLOAT) - .set_io_data_type(fe.DataType.HALF) - .set_intermediate_data_type(fe.DataType.FLOAT) - ) - assert result is g - assert g.get_name() == "chained" - assert g.get_io_data_type() == fe.DataType.HALF - assert g.get_intermediate_data_type() == fe.DataType.FLOAT - print(" OK Graph method chaining works") - -def test_graph_tensor(): - """Verify creating tensors through the graph.""" - import hipdnn_frontend as fe - g = fe.Graph() - g.set_compute_data_type(fe.DataType.FLOAT).set_io_data_type(fe.DataType.FLOAT) - - t = g.tensor([1, 3, 224, 224], "input_tensor") - assert t is not None - assert t.get_name() == "input_tensor" - assert t.get_dim() == [1, 3, 224, 224] - print(" OK Graph.tensor() works") - -def test_graph_tensor_like(): - """Verify tensor_like static method.""" - import hipdnn_frontend as fe - t = fe.Tensor.create([2, 64, 16, 16], fe.DataType.HALF) - t.set_name("original") - - copy = fe.Graph.tensor_like(t, "copy_tensor") - assert copy.get_dim() == [2, 64, 16, 16] - assert copy.get_data_type() == fe.DataType.HALF - assert copy.get_name() == "copy_tensor" - print(" OK Graph.tensor_like() works") - -def test_engine_id_to_name(): - """Verify engine_id_to_name function.""" - import hipdnn_frontend as fe - result = fe.engine_id_to_name(999999) - assert isinstance(result, str) - print(" OK engine_id_to_name works") - -def test_preferred_engine_id(): - """Verify preferred engine ID set/get/clear.""" - import hipdnn_frontend as fe - g = fe.Graph() - - assert g.get_preferred_engine_id_ext() is None - - g.set_preferred_engine_id_ext(12345) - assert g.get_preferred_engine_id_ext() == 12345 - - g.set_preferred_engine_id_ext(None) - assert g.get_preferred_engine_id_ext() is None - print(" OK Preferred engine ID management works") - -def test_plugin_loading_mode(): - """Verify PluginLoadingMode enum.""" - import hipdnn_frontend as fe - assert hasattr(fe, "PluginLoadingMode") - assert hasattr(fe.PluginLoadingMode, "ADDITIVE") - assert hasattr(fe.PluginLoadingMode, "ABSOLUTE") - print(" OK PluginLoadingMode enum accessible") - -def test_heuristic_mode(): - """Verify HeuristicMode enum.""" - import hipdnn_frontend as fe - assert hasattr(fe.HeuristicMode, "FALLBACK") - print(" OK HeuristicMode enum accessible") - -def main(): - print("=" * 60) - print("hipDNN Python bindings smoke tests") - print("=" * 60) - - tests = [ - test_import, - test_data_types, - test_error, - test_pointwise_mode, - test_convolution_mode, - test_tensor_create, - test_tensor_attributes, - test_tensor_uid, - test_tensor_virtual, - test_graph_create, - test_graph_chaining, - test_graph_tensor, - test_graph_tensor_like, - test_engine_id_to_name, - test_preferred_engine_id, - test_plugin_loading_mode, - test_heuristic_mode, - ] - - passed = 0 - failed = 0 - for test in tests: - try: - test() - passed += 1 - except Exception as e: - print(f" FAIL {test.__name__}: {e}") - import traceback - traceback.print_exc() - failed += 1 - - print("=" * 60) - print(f"Results: {passed} passed, {failed} failed, {len(tests)} total") - print("=" * 60) - - if failed: - sys.exit(1) - -if __name__ == "__main__": - main() -''' - - cmd = [str(python), "-c", test_script] - logging.info("Running smoke tests...") - subprocess.run(cmd, check=True, env=env) - - -def run_tests(artifacts_path: Path, venv_dir: Path) -> None: - """Find wheel, create venv, install, and run tests.""" - wheel = find_wheel(artifacts_path) - python = create_venv(venv_dir) - install_wheel(python, wheel, artifacts_path) - run_smoke_tests(python, artifacts_path) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Install and test hipDNN Python bindings wheel" - ) - parser.add_argument( - "--venv-dir", - type=Path, - help="Directory for the test virtual environment. " - "If not specified, uses a temporary directory that is auto-deleted.", - ) - args = parser.parse_args() - - if not OUTPUT_ARTIFACTS_DIR: - raise RuntimeError("OUTPUT_ARTIFACTS_DIR environment variable not set") - - artifacts_path = Path(OUTPUT_ARTIFACTS_DIR).resolve() - logging.info(f"Using OUTPUT_ARTIFACTS_DIR: {artifacts_path}") - - if args.venv_dir: - venv_dir = args.venv_dir.resolve() - venv_dir.mkdir(parents=True, exist_ok=True) - logging.info(f"Using persistent venv directory: {venv_dir}") - run_tests(artifacts_path, venv_dir) - logging.info(f"Venv retained in: {venv_dir}") - else: - logging.info("Using temporary venv directory (auto-cleanup)") - with tempfile.TemporaryDirectory() as temp_dir: - run_tests(artifacts_path, Path(temp_dir) / "venv") - - logging.info("All hipDNN Python binding tests passed!") From fae98b85e49419359e925052ad29983cddc68a89 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 12 May 2026 16:05:35 -0400 Subject: [PATCH 08/97] Use find_package for nanobind, fall back to FetchContent When built via TheRock superbuild, nanobind is provided as a third-party dependency. For standalone builds, FetchContent fetches it from GitHub. Also bumps FetchContent tag from v1.8.0 to v2.4.0. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index f465998904a..7efb450a121 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -13,10 +13,12 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Find Python and nanobind find_package(Python COMPONENTS Interpreter Development REQUIRED) -# FetchContent to get nanobind if not found -include(FetchContent) -fetchcontent_declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind GIT_TAG v1.8.0) -fetchcontent_makeavailable(nanobind) +find_package(nanobind CONFIG QUIET) +if(NOT nanobind_FOUND) + include(FetchContent) + fetchcontent_declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind GIT_TAG v2.4.0) + fetchcontent_makeavailable(nanobind) +endif() # Find hipDNN frontend headers set(HIPDNN_FRONTEND_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../frontend/include" From d0aae6233521182cb5b88126b1f9500ae981e4cc Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 13 May 2026 15:32:39 -0400 Subject: [PATCH 09/97] Use hipdnn_add_dependency for nanobind and tsl-robin-map in python bindings Replace inline FetchContent for nanobind with the project's hipdnn_add_dependency pattern so find-or-fetch behavior is consistent with all other dependencies. Add tsl-robin-map as a separate dependency since nanobind is configured with NB_USE_SUBMODULE_DEPS OFF. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/CMakeLists.txt | 4 ++- projects/hipdnn/cmake/Dependencies.cmake | 38 +++++++++++++++++++++++- projects/hipdnn/python/CMakeLists.txt | 8 ++--- projects/hipdnn/python/pyproject.toml | 2 +- 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/projects/hipdnn/CMakeLists.txt b/projects/hipdnn/CMakeLists.txt index 0202683a6aa..f89aed8b19b 100644 --- a/projects/hipdnn/CMakeLists.txt +++ b/projects/hipdnn/CMakeLists.txt @@ -8,6 +8,8 @@ set(HIPDNN_SPDLOG_VERSION "1.15.3" CACHE STRING "Version of spdlog to use") set(HIPDNN_FLATBUFFERS_VERSION "25.9.23" CACHE STRING "Version of flatbuffers to use") set(HIPDNN_NLOHMANN_JSON_VERSION "3.12.0" CACHE STRING "Version of nlohmann_json to use") set(HIPDNN_GTEST_VERSION "1.16.0" CACHE STRING "Version of googletest to use") +set(HIPDNN_NANOBIND_VERSION "2.4.0" CACHE STRING "Version of nanobind to use") +set(HIPDNN_TSL_ROBIN_MAP_VERSION "1.3.0" CACHE STRING "Version of tsl-robin-map to use") option(HIPDNN_SKIP_JSON_LIB "Defines whether SDK public headers should not include nlohmann/json.hpp." OFF) @@ -153,7 +155,7 @@ if(DEFINED HIP_DNN_SKIP_TESTS) endif() option(HIPDNN_SKIP_TESTS "Skips building all tests" OFF) -option(HIPDNN_BUILD_PYTHON_BINDINGS "Build Python bindings (requires Python and nanobind)" OFF) +option(HIPDNN_BUILD_PYTHON_BINDINGS "Build Python bindings (requires Python and nanobind)" ON) option(HIPDNN_ENABLE_COVERAGE "Build with code coverage flags" OFF) option(BUILD_ADDRESS_SANITIZER "Build with Address Sanitizer enabled" OFF) diff --git a/projects/hipdnn/cmake/Dependencies.cmake b/projects/hipdnn/cmake/Dependencies.cmake index 62504066335..d2bc827165a 100644 --- a/projects/hipdnn/cmake/Dependencies.cmake +++ b/projects/hipdnn/cmake/Dependencies.cmake @@ -14,7 +14,7 @@ if(HIPDNN_NO_DOWNLOAD) endif() # Dependencies where the local version should be used, if available -set(_hipdnn_all_local_deps GTest flatbuffers spdlog nlohmann_json) +set(_hipdnn_all_local_deps GTest flatbuffers spdlog nlohmann_json nanobind tsl-robin-map) # Dependencies where we never look for a local version set(_hipdnn_all_remote_deps) @@ -247,6 +247,42 @@ function(_fetch_nlohmann_json VERSION HASH) endfunction() +# Fetches tsl-robin-map +function(_fetch_tsl-robin-map VERSION HASH) + _determine_git_tag(v v1.3.0) + + fetchcontent_declare( + tsl-robin-map + GIT_REPOSITORY https://github.com/Tessil/robin-map.git + GIT_TAG ${GIT_TAG} + DOWNLOAD_EXTRACT_TIMESTAMP + TRUE + ) + + fetchcontent_makeavailable(tsl-robin-map) + + _exclude_from_all(${tsl-robin-map_SOURCE_DIR}) + _mark_targets_as_system(${tsl-robin-map_SOURCE_DIR}) +endfunction() + +# Fetches nanobind +function(_fetch_nanobind VERSION HASH) + _determine_git_tag(v v2.4.0) + + set(NB_USE_SUBMODULE_DEPS OFF) + + fetchcontent_declare( + nanobind + GIT_REPOSITORY https://github.com/wjakob/nanobind.git + GIT_TAG ${GIT_TAG} + ) + + fetchcontent_makeavailable(nanobind) + + _exclude_from_all(${nanobind_SOURCE_DIR}) + _mark_targets_as_system(${nanobind_SOURCE_DIR}) +endfunction() + # Utility functions, pulled from rocroller repo # # Determines the git tag to use diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 7efb450a121..0254a1fa5da 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -13,12 +13,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Find Python and nanobind find_package(Python COMPONENTS Interpreter Development REQUIRED) -find_package(nanobind CONFIG QUIET) -if(NOT nanobind_FOUND) - include(FetchContent) - fetchcontent_declare(nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind GIT_TAG v2.4.0) - fetchcontent_makeavailable(nanobind) -endif() +hipdnn_add_dependency(tsl-robin-map VERSION ${HIPDNN_TSL_ROBIN_MAP_VERSION}) +hipdnn_add_dependency(nanobind VERSION ${HIPDNN_NANOBIND_VERSION}) # Find hipDNN frontend headers set(HIPDNN_FRONTEND_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../frontend/include" diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index dd039e39e2a..4716f5cb1cf 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -4,7 +4,7 @@ [build-system] requires = [ "scikit-build-core>=0.4.3", - "nanobind>=1.8.0", + "nanobind>=2.6.1", "cmake>=3.18", ] build-backend = "scikit_build_core.build" From b99478e50b455e27d60c22faf7f16fe8dec494c0 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 13 May 2026 15:38:25 -0400 Subject: [PATCH 10/97] Align nanobind version in pyproject.toml with CMake (2.4.0) Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index 4716f5cb1cf..bcb52827727 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -4,7 +4,7 @@ [build-system] requires = [ "scikit-build-core>=0.4.3", - "nanobind>=2.6.1", + "nanobind>=2.4.0", "cmake>=3.18", ] build-backend = "scikit_build_core.build" From db1940f430f00bdbf874c6cb89ab0b51548b3e1d Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 13 May 2026 17:16:20 -0400 Subject: [PATCH 11/97] Fix clang-tidy errors in Python bindings Exclude Python, nanobind, and _deps headers from clang-tidy's header filter to prevent false positives on third-party code. Rename binding functions from snake_case to camelBack to match project naming conventions, and add braces around bare if/else bodies. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/.clang-tidy | 2 +- .../hipdnn/python/src/attributes_bindings.cpp | 2 +- projects/hipdnn/python/src/graph_bindings.cpp | 2 +- .../hipdnn/python/src/handle_bindings.cpp | 2 +- .../hipdnn/python/src/memory_bindings.cpp | 32 +++++++++++++------ projects/hipdnn/python/src/module.cpp | 24 +++++++------- .../hipdnn/python/src/tensor_bindings.cpp | 2 +- projects/hipdnn/python/src/types_bindings.cpp | 2 +- 8 files changed, 41 insertions(+), 27 deletions(-) diff --git a/projects/hipdnn/.clang-tidy b/projects/hipdnn/.clang-tidy index 4cf2f589585..18b0a43d7a4 100644 --- a/projects/hipdnn/.clang-tidy +++ b/projects/hipdnn/.clang-tidy @@ -47,7 +47,7 @@ HeaderFileExtensions: ['h','hpp'] # This regex is a inclusion filter for the files to be checked. Its a Posix regex and # does not seem to support negative lookahead. HeaderFilterRegex: '.' -ExcludeHeaderFilterRegex: '^(.*data_objects.*)$' +ExcludeHeaderFilterRegex: '^(.*(data_objects|python3\.[0-9]+|nanobind|_deps).*)$' CheckOptions: - { key: readability-identifier-naming.NamespaceCase, value: lower_case } diff --git a/projects/hipdnn/python/src/attributes_bindings.cpp b/projects/hipdnn/python/src/attributes_bindings.cpp index 681cf9d1b27..67cdb5ff689 100644 --- a/projects/hipdnn/python/src/attributes_bindings.cpp +++ b/projects/hipdnn/python/src/attributes_bindings.cpp @@ -16,7 +16,7 @@ namespace nb = nanobind; using namespace hipdnn_frontend; -void attributes_bindings(nb::module_& m) +void attributesBindings(nb::module_& m) { // BatchnormAttributes nb::class_(m, "BatchnormAttributes") diff --git a/projects/hipdnn/python/src/graph_bindings.cpp b/projects/hipdnn/python/src/graph_bindings.cpp index 95312644cd7..6d8c4c56c6c 100644 --- a/projects/hipdnn/python/src/graph_bindings.cpp +++ b/projects/hipdnn/python/src/graph_bindings.cpp @@ -19,7 +19,7 @@ namespace nb = nanobind; using namespace hipdnn_frontend; -void graph_bindings(nb::module_& m) +void graphBindings(nb::module_& m) { nb::class_(m, "Graph") .def(nb::init<>()) diff --git a/projects/hipdnn/python/src/handle_bindings.cpp b/projects/hipdnn/python/src/handle_bindings.cpp index 2c046831bb2..072eefb1707 100644 --- a/projects/hipdnn/python/src/handle_bindings.cpp +++ b/projects/hipdnn/python/src/handle_bindings.cpp @@ -84,7 +84,7 @@ class HandleWrapper } }; -void handle_bindings(nb::module_& m) +void handleBindings(nb::module_& m) { nb::class_(m, "Handle") .def(nb::init<>(), "Create a new hipdnn handle") diff --git a/projects/hipdnn/python/src/memory_bindings.cpp b/projects/hipdnn/python/src/memory_bindings.cpp index 40b1c3bc324..24eb39b422f 100644 --- a/projects/hipdnn/python/src/memory_bindings.cpp +++ b/projects/hipdnn/python/src/memory_bindings.cpp @@ -122,7 +122,7 @@ class DeviceBuffer } }; -void memory_bindings(nb::module_& m) +void memoryBindings(nb::module_& m) { nb::class_(m, "DeviceBuffer") .def(nb::init(), @@ -163,22 +163,36 @@ void memory_bindings(nb::module_& m) m.def( "get_dtype_size", [](nb::object dtype) -> size_t { - std::string dtype_str = nb::str(dtype).c_str(); + std::string dtypeStr = nb::str(dtype).c_str(); - if(dtype_str == "(m, "Tensor") .def(nb::init<>()) diff --git a/projects/hipdnn/python/src/types_bindings.cpp b/projects/hipdnn/python/src/types_bindings.cpp index cd85b7c236a..fed8f5d89a9 100644 --- a/projects/hipdnn/python/src/types_bindings.cpp +++ b/projects/hipdnn/python/src/types_bindings.cpp @@ -10,7 +10,7 @@ namespace nb = nanobind; using namespace hipdnn_frontend; -void types_bindings(nb::module_& m) +void typesBindings(nb::module_& m) { // Bind DataType enum nb::enum_(m, "DataType") From 5ef247729054c267c278b67c2c61f85ff39076e7 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 13 May 2026 17:34:54 -0400 Subject: [PATCH 12/97] Fix remaining clang-tidy warnings in Python bindings Add shared bindings.hpp header with forward declarations to resolve misc-use-internal-linkage. Suppress unavoidable performance-no-int-to-ptr for Python FFI pointer casts. Use const references for nb::object and nb::bytes parameters. Add const to status/error variables and use auto with reinterpret_cast. Co-Authored-By: Claude Opus 4 --- .../hipdnn/python/src/attributes_bindings.cpp | 2 + projects/hipdnn/python/src/bindings.hpp | 13 ++++++ projects/hipdnn/python/src/graph_bindings.cpp | 43 +++++++++++-------- .../hipdnn/python/src/handle_bindings.cpp | 12 ++++-- .../hipdnn/python/src/memory_bindings.cpp | 16 ++++--- projects/hipdnn/python/src/module.cpp | 13 ++---- .../hipdnn/python/src/tensor_bindings.cpp | 2 + projects/hipdnn/python/src/types_bindings.cpp | 2 + 8 files changed, 63 insertions(+), 40 deletions(-) create mode 100644 projects/hipdnn/python/src/bindings.hpp diff --git a/projects/hipdnn/python/src/attributes_bindings.cpp b/projects/hipdnn/python/src/attributes_bindings.cpp index 67cdb5ff689..e7893c834b3 100644 --- a/projects/hipdnn/python/src/attributes_bindings.cpp +++ b/projects/hipdnn/python/src/attributes_bindings.cpp @@ -1,6 +1,8 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT +#include "bindings.hpp" + #include #include #include diff --git a/projects/hipdnn/python/src/bindings.hpp b/projects/hipdnn/python/src/bindings.hpp new file mode 100644 index 00000000000..6389e864bc1 --- /dev/null +++ b/projects/hipdnn/python/src/bindings.hpp @@ -0,0 +1,13 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +void graphBindings(nanobind::module_& m); +void tensorBindings(nanobind::module_& m); +void attributesBindings(nanobind::module_& m); +void typesBindings(nanobind::module_& m); +void handleBindings(nanobind::module_& m); +void memoryBindings(nanobind::module_& m); diff --git a/projects/hipdnn/python/src/graph_bindings.cpp b/projects/hipdnn/python/src/graph_bindings.cpp index 6d8c4c56c6c..44df523d95e 100644 --- a/projects/hipdnn/python/src/graph_bindings.cpp +++ b/projects/hipdnn/python/src/graph_bindings.cpp @@ -1,6 +1,8 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT +#include "bindings.hpp" + #include #include #include @@ -28,10 +30,10 @@ void graphBindings(nb::module_& m) .def("topologicallySortGraph", &graph::Graph::topologicallySortGraph) .def( "build_operation_graph", - [](graph::Graph& g, nb::object handle) { - // Extract handle pointer from Python Handle object + [](graph::Graph& g, const nb::object& handle) { auto handlePtr = handle.attr("get")(); - hipdnnHandle_t rawHandle + // NOLINTNEXTLINE(performance-no-int-to-ptr) + auto rawHandle = reinterpret_cast(nb::cast(handlePtr)); return g.build_operation_graph(rawHandle); }, @@ -45,7 +47,7 @@ void graphBindings(nb::module_& m) "get_ranked_engine_ids", [](graph::Graph& g, const std::vector& modes) { std::vector ids; - auto err = g.get_ranked_engine_ids(ids, modes); + const auto err = g.get_ranked_engine_ids(ids, modes); if(err.is_bad()) { throw std::runtime_error("Failed to get ranked engine ids: " @@ -61,8 +63,8 @@ void graphBindings(nb::module_& m) .def( "get_workspace_size", [](const graph::Graph& g) { - int64_t workspaceSize; - auto result = g.get_workspace_size(workspaceSize); + int64_t workspaceSize = 0; + const auto result = g.get_workspace_size(workspaceSize); if(!result.is_good()) { throw std::runtime_error("Failed to get workspace size: " @@ -74,21 +76,22 @@ void graphBindings(nb::module_& m) .def( "execute", [](const graph::Graph& g, - nb::object handle, + const nb::object& handle, std::unordered_map& variantPack, uintptr_t workspace) { - // Extract handle pointer from Python Handle object auto handlePtr = handle.attr("get")(); - hipdnnHandle_t rawHandle + // NOLINTNEXTLINE(performance-no-int-to-ptr) + auto rawHandle = reinterpret_cast(nb::cast(handlePtr)); - // Convert Python integer pointers to void* std::unordered_map cppVariantPack; for(const auto& [key, value] : variantPack) { + // NOLINTNEXTLINE(performance-no-int-to-ptr) cppVariantPack[key] = reinterpret_cast(value); } + // NOLINTNEXTLINE(performance-no-int-to-ptr) void* workspacePtr = workspace ? reinterpret_cast(workspace) : nullptr; return g.execute(rawHandle, cppVariantPack, workspacePtr); @@ -160,9 +163,10 @@ void graphBindings(nb::module_& m) "Serialize the graph to a JSON string") .def( "from_json", - [](graph::Graph& g, nb::object handle, const std::string& jsonStr) { + [](graph::Graph& g, const nb::object& handle, const std::string& jsonStr) { auto handlePtr = handle.attr("get")(); - hipdnnHandle_t rawHandle + // NOLINTNEXTLINE(performance-no-int-to-ptr) + auto rawHandle = reinterpret_cast(nb::cast(handlePtr)); return g.deserialize(rawHandle, jsonStr); }, @@ -190,12 +194,13 @@ void graphBindings(nb::module_& m) "Serialize the graph to binary bytes") .def( "from_binary", - [](graph::Graph& g, nb::object handle, nb::bytes data) { + [](graph::Graph& g, const nb::object& handle, const nb::bytes& data) { auto handlePtr = handle.attr("get")(); - hipdnnHandle_t rawHandle + // NOLINTNEXTLINE(performance-no-int-to-ptr) + auto rawHandle = reinterpret_cast(nb::cast(handlePtr)); - auto* ptr = reinterpret_cast(data.c_str()); - std::vector vec(ptr, ptr + data.size()); + const auto* ptr = reinterpret_cast(data.c_str()); + const std::vector vec(ptr, ptr + data.size()); return g.deserialize(rawHandle, vec); }, nb::arg("handle"), @@ -204,9 +209,9 @@ void graphBindings(nb::module_& m) "The graph is ready for create_execution_plans() after this call.") .def( "from_binary", - [](graph::Graph& g, nb::bytes data) { - auto* ptr = reinterpret_cast(data.c_str()); - std::vector vec(ptr, ptr + data.size()); + [](graph::Graph& g, const nb::bytes& data) { + const auto* ptr = reinterpret_cast(data.c_str()); + const std::vector vec(ptr, ptr + data.size()); return g.deserialize(vec); }, nb::arg("data"), diff --git a/projects/hipdnn/python/src/handle_bindings.cpp b/projects/hipdnn/python/src/handle_bindings.cpp index 072eefb1707..411f75eb938 100644 --- a/projects/hipdnn/python/src/handle_bindings.cpp +++ b/projects/hipdnn/python/src/handle_bindings.cpp @@ -1,6 +1,8 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT +#include "bindings.hpp" + #include #include #include @@ -27,7 +29,7 @@ class HandleWrapper public: HandleWrapper() { - auto error = createHipdnnHandle(_handle); + const auto error = createHipdnnHandle(_handle); if(error.is_bad()) { throw std::runtime_error("Failed to create hipdnn handle: " + error.get_message()); @@ -36,7 +38,8 @@ class HandleWrapper explicit HandleWrapper(uintptr_t streamPtr) { - auto error = createHipdnnHandle(_handle, reinterpret_cast(streamPtr)); + // NOLINTNEXTLINE(performance-no-int-to-ptr) + const auto error = createHipdnnHandle(_handle, reinterpret_cast(streamPtr)); if(error.is_bad()) { throw std::runtime_error("Failed to create hipdnn handle: " + error.get_message()); @@ -62,7 +65,8 @@ class HandleWrapper void setStream(uintptr_t streamPtr) { checkNotDestroyed(); - auto error = setHipdnnHandleStream(_handle, reinterpret_cast(streamPtr)); + // NOLINTNEXTLINE(performance-no-int-to-ptr) + const auto error = setHipdnnHandleStream(_handle, reinterpret_cast(streamPtr)); if(error.is_bad()) { throw std::runtime_error("Failed to set stream on hipdnn handle: " @@ -74,7 +78,7 @@ class HandleWrapper { checkNotDestroyed(); hipStream_t stream = nullptr; - auto error = getHipdnnHandleStream(_handle, &stream); + const auto error = getHipdnnHandleStream(_handle, &stream); if(error.is_bad()) { throw std::runtime_error("Failed to get stream from hipdnn handle: " diff --git a/projects/hipdnn/python/src/memory_bindings.cpp b/projects/hipdnn/python/src/memory_bindings.cpp index 24eb39b422f..37ea6cd5593 100644 --- a/projects/hipdnn/python/src/memory_bindings.cpp +++ b/projects/hipdnn/python/src/memory_bindings.cpp @@ -1,6 +1,8 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT +#include "bindings.hpp" + #include #include #include @@ -24,7 +26,7 @@ class DeviceBuffer { if(_sizeBytes > 0) { - auto status = hipMalloc(&_devicePtr, _sizeBytes); + const auto status = hipMalloc(&_devicePtr, _sizeBytes); if(status != hipSuccess) { throw std::runtime_error("Failed to allocate device memory: " @@ -76,7 +78,7 @@ class DeviceBuffer { throw std::runtime_error("Invalid pointers for copy operation"); } - auto status = hipMemcpy(_devicePtr, hostPtr, _sizeBytes, hipMemcpyHostToDevice); + const auto status = hipMemcpy(_devicePtr, hostPtr, _sizeBytes, hipMemcpyHostToDevice); if(status != hipSuccess) { throw std::runtime_error("Failed to copy from host: " @@ -90,7 +92,7 @@ class DeviceBuffer { throw std::runtime_error("Invalid pointers for copy operation"); } - auto status = hipMemcpy(hostPtr, _devicePtr, _sizeBytes, hipMemcpyDeviceToHost); + const auto status = hipMemcpy(hostPtr, _devicePtr, _sizeBytes, hipMemcpyDeviceToHost); if(status != hipSuccess) { throw std::runtime_error("Failed to copy to host: " @@ -112,7 +114,7 @@ class DeviceBuffer { if(_devicePtr) { - auto status = hipMemset(_devicePtr, 0, _sizeBytes); + const auto status = hipMemset(_devicePtr, 0, _sizeBytes); if(status != hipSuccess) { throw std::runtime_error("Failed to zero memory: " @@ -130,7 +132,7 @@ void memoryBindings(nb::module_& m) "Create a device buffer with the given size in bytes") .def( "copy_from_host", - [](DeviceBuffer& self, nb::bytes data) { + [](DeviceBuffer& self, const nb::bytes& data) { if(data.size() != self.size()) { throw std::runtime_error("Data size (" + std::to_string(data.size()) @@ -162,8 +164,8 @@ void memoryBindings(nb::module_& m) // Utility function to get element size for different data types m.def( "get_dtype_size", - [](nb::object dtype) -> size_t { - std::string dtypeStr = nb::str(dtype).c_str(); + [](const nb::object& dtype) -> size_t { + const std::string dtypeStr = nb::str(dtype).c_str(); if(dtypeStr == " #include #include -#include #include #include namespace nb = nanobind; -// Forward declarations for binding functions -void graphBindings(nb::module_& m); -void tensorBindings(nb::module_& m); -void attributesBindings(nb::module_& m); -void typesBindings(nb::module_& m); -void handleBindings(nb::module_& m); -void memoryBindings(nb::module_& m); - NB_MODULE(hipdnn_frontend_python, m) { m.doc() = "Python bindings for the hipDNN frontend library"; @@ -43,7 +36,7 @@ NB_MODULE(hipdnn_frontend_python, m) cPaths.push_back(path.c_str()); } - hipdnnStatus_t status + const auto status = hipdnnSetEnginePluginPaths_ext(cPaths.size(), cPaths.data(), mode); if(status != HIPDNN_STATUS_SUCCESS) diff --git a/projects/hipdnn/python/src/tensor_bindings.cpp b/projects/hipdnn/python/src/tensor_bindings.cpp index 377d869ee61..5e170bb9e11 100644 --- a/projects/hipdnn/python/src/tensor_bindings.cpp +++ b/projects/hipdnn/python/src/tensor_bindings.cpp @@ -1,6 +1,8 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT +#include "bindings.hpp" + #include #include #include diff --git a/projects/hipdnn/python/src/types_bindings.cpp b/projects/hipdnn/python/src/types_bindings.cpp index fed8f5d89a9..3b7f86596bb 100644 --- a/projects/hipdnn/python/src/types_bindings.cpp +++ b/projects/hipdnn/python/src/types_bindings.cpp @@ -1,6 +1,8 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT +#include "bindings.hpp" + #include #include #include From c7cf70b591ab20ba1cf74c499f3cc17c1ffe89f7 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 13 May 2026 17:46:08 -0400 Subject: [PATCH 13/97] Run clang-format on Python binding source files Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/src/graph_bindings.cpp | 12 ++++-------- projects/hipdnn/python/src/module.cpp | 3 +-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/projects/hipdnn/python/src/graph_bindings.cpp b/projects/hipdnn/python/src/graph_bindings.cpp index 44df523d95e..19c2304703f 100644 --- a/projects/hipdnn/python/src/graph_bindings.cpp +++ b/projects/hipdnn/python/src/graph_bindings.cpp @@ -33,8 +33,7 @@ void graphBindings(nb::module_& m) [](graph::Graph& g, const nb::object& handle) { auto handlePtr = handle.attr("get")(); // NOLINTNEXTLINE(performance-no-int-to-ptr) - auto rawHandle - = reinterpret_cast(nb::cast(handlePtr)); + auto rawHandle = reinterpret_cast(nb::cast(handlePtr)); return g.build_operation_graph(rawHandle); }, nb::arg("handle"), @@ -81,8 +80,7 @@ void graphBindings(nb::module_& m) uintptr_t workspace) { auto handlePtr = handle.attr("get")(); // NOLINTNEXTLINE(performance-no-int-to-ptr) - auto rawHandle - = reinterpret_cast(nb::cast(handlePtr)); + auto rawHandle = reinterpret_cast(nb::cast(handlePtr)); std::unordered_map cppVariantPack; for(const auto& [key, value] : variantPack) @@ -166,8 +164,7 @@ void graphBindings(nb::module_& m) [](graph::Graph& g, const nb::object& handle, const std::string& jsonStr) { auto handlePtr = handle.attr("get")(); // NOLINTNEXTLINE(performance-no-int-to-ptr) - auto rawHandle - = reinterpret_cast(nb::cast(handlePtr)); + auto rawHandle = reinterpret_cast(nb::cast(handlePtr)); return g.deserialize(rawHandle, jsonStr); }, nb::arg("handle"), @@ -197,8 +194,7 @@ void graphBindings(nb::module_& m) [](graph::Graph& g, const nb::object& handle, const nb::bytes& data) { auto handlePtr = handle.attr("get")(); // NOLINTNEXTLINE(performance-no-int-to-ptr) - auto rawHandle - = reinterpret_cast(nb::cast(handlePtr)); + auto rawHandle = reinterpret_cast(nb::cast(handlePtr)); const auto* ptr = reinterpret_cast(data.c_str()); const std::vector vec(ptr, ptr + data.size()); return g.deserialize(rawHandle, vec); diff --git a/projects/hipdnn/python/src/module.cpp b/projects/hipdnn/python/src/module.cpp index b1d4a1ba010..08d4ddf44f4 100644 --- a/projects/hipdnn/python/src/module.cpp +++ b/projects/hipdnn/python/src/module.cpp @@ -36,8 +36,7 @@ NB_MODULE(hipdnn_frontend_python, m) cPaths.push_back(path.c_str()); } - const auto status - = hipdnnSetEnginePluginPaths_ext(cPaths.size(), cPaths.data(), mode); + const auto status = hipdnnSetEnginePluginPaths_ext(cPaths.size(), cPaths.data(), mode); if(status != HIPDNN_STATUS_SUCCESS) { From f955995049edc8354ebf94ea705341df5489b377 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 13 May 2026 18:21:08 -0400 Subject: [PATCH 14/97] Add python3-dev to clang-tidy CI for Python bindings Co-Authored-By: Claude Opus 4 --- .github/workflows/clang-tidy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 4cb9a59c5cf..4179cc7b8e8 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -64,6 +64,7 @@ jobs: sudo apt-get install -y \ ninja-build \ python3-venv \ + python3-dev \ lsb-release wget \ software-properties-common \ gnupg From 9ad8a4cd4b92199524015fac48555119737d9685 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 13 May 2026 19:00:00 -0400 Subject: [PATCH 15/97] Fix clang-tidy errors in memory_bindings.cpp Co-Authored-By: Claude Opus 4 --- .../hipdnn/python/src/memory_bindings.cpp | 35 ++++++++----------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/projects/hipdnn/python/src/memory_bindings.cpp b/projects/hipdnn/python/src/memory_bindings.cpp index 37ea6cd5593..cbc319ab525 100644 --- a/projects/hipdnn/python/src/memory_bindings.cpp +++ b/projects/hipdnn/python/src/memory_bindings.cpp @@ -16,8 +16,8 @@ namespace nb = nanobind; class DeviceBuffer { private: - void* _devicePtr; - size_t _sizeBytes; + void* _devicePtr = nullptr; + size_t _sizeBytes = 0; public: DeviceBuffer(size_t sizeBytes) @@ -37,17 +37,15 @@ class DeviceBuffer ~DeviceBuffer() { - if(_devicePtr) + if(_devicePtr != nullptr) { - (void)hipFree(_devicePtr); // Explicitly ignore return value + (void)hipFree(_devicePtr); } } - // Disable copy DeviceBuffer(const DeviceBuffer&) = delete; DeviceBuffer& operator=(const DeviceBuffer&) = delete; - // Enable move DeviceBuffer(DeviceBuffer&& other) noexcept : _devicePtr(other._devicePtr) , _sizeBytes(other._sizeBytes) @@ -60,9 +58,9 @@ class DeviceBuffer { if(this != &other) { - if(_devicePtr) + if(_devicePtr != nullptr) { - (void)hipFree(_devicePtr); // Explicitly ignore return value + (void)hipFree(_devicePtr); } _devicePtr = other._devicePtr; _sizeBytes = other._sizeBytes; @@ -74,7 +72,7 @@ class DeviceBuffer void copyFromHost(const void* hostPtr) { - if(!_devicePtr || !hostPtr) + if(_devicePtr == nullptr || hostPtr == nullptr) { throw std::runtime_error("Invalid pointers for copy operation"); } @@ -88,7 +86,7 @@ class DeviceBuffer void copyToHost(void* hostPtr) { - if(!_devicePtr || !hostPtr) + if(_devicePtr == nullptr || hostPtr == nullptr) { throw std::runtime_error("Invalid pointers for copy operation"); } @@ -112,7 +110,7 @@ class DeviceBuffer // Fill with zeros void zeros() { - if(_devicePtr) + if(_devicePtr != nullptr) { const auto status = hipMemset(_devicePtr, 0, _sizeBytes); if(status != hipSuccess) @@ -171,30 +169,27 @@ void memoryBindings(nb::module_& m) { return sizeof(float); } - else if(dtypeStr == " Date: Thu, 14 May 2026 12:38:49 -0400 Subject: [PATCH 16/97] Bump nanobind to 2.12.0 and tsl-robin-map to 1.4.1 Co-Authored-By: Claude Opus 4 --- projects/hipdnn/CMakeLists.txt | 4 ++-- projects/hipdnn/python/pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/hipdnn/CMakeLists.txt b/projects/hipdnn/CMakeLists.txt index f89aed8b19b..e0c85cba9cd 100644 --- a/projects/hipdnn/CMakeLists.txt +++ b/projects/hipdnn/CMakeLists.txt @@ -8,8 +8,8 @@ set(HIPDNN_SPDLOG_VERSION "1.15.3" CACHE STRING "Version of spdlog to use") set(HIPDNN_FLATBUFFERS_VERSION "25.9.23" CACHE STRING "Version of flatbuffers to use") set(HIPDNN_NLOHMANN_JSON_VERSION "3.12.0" CACHE STRING "Version of nlohmann_json to use") set(HIPDNN_GTEST_VERSION "1.16.0" CACHE STRING "Version of googletest to use") -set(HIPDNN_NANOBIND_VERSION "2.4.0" CACHE STRING "Version of nanobind to use") -set(HIPDNN_TSL_ROBIN_MAP_VERSION "1.3.0" CACHE STRING "Version of tsl-robin-map to use") +set(HIPDNN_NANOBIND_VERSION "2.12.0" CACHE STRING "Version of nanobind to use") +set(HIPDNN_TSL_ROBIN_MAP_VERSION "1.4.1" CACHE STRING "Version of tsl-robin-map to use") option(HIPDNN_SKIP_JSON_LIB "Defines whether SDK public headers should not include nlohmann/json.hpp." OFF) diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index bcb52827727..4b099936a9f 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -4,7 +4,7 @@ [build-system] requires = [ "scikit-build-core>=0.4.3", - "nanobind>=2.4.0", + "nanobind>=2.12.0", "cmake>=3.18", ] build-backend = "scikit_build_core.build" From 612a014160a51fb9a8521fa08cc69ad81be6c456 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 14 May 2026 13:27:59 -0400 Subject: [PATCH 17/97] changes default version for nanobind and robin-map --- projects/hipdnn/cmake/Dependencies.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/hipdnn/cmake/Dependencies.cmake b/projects/hipdnn/cmake/Dependencies.cmake index d2bc827165a..e6f91752b2e 100644 --- a/projects/hipdnn/cmake/Dependencies.cmake +++ b/projects/hipdnn/cmake/Dependencies.cmake @@ -249,7 +249,7 @@ endfunction() # Fetches tsl-robin-map function(_fetch_tsl-robin-map VERSION HASH) - _determine_git_tag(v v1.3.0) + _determine_git_tag(v v1.4.1) fetchcontent_declare( tsl-robin-map @@ -267,7 +267,7 @@ endfunction() # Fetches nanobind function(_fetch_nanobind VERSION HASH) - _determine_git_tag(v v2.4.0) + _determine_git_tag(v v2.12.0) set(NB_USE_SUBMODULE_DEPS OFF) From 378cbd8b26a51fa9ee5d1c47f3e665f2c92482a2 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 14 May 2026 13:31:11 -0400 Subject: [PATCH 18/97] Fix clang-tidy errors in Python binding source files Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/src/memory_bindings.cpp | 3 +-- projects/hipdnn/python/src/module.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/projects/hipdnn/python/src/memory_bindings.cpp b/projects/hipdnn/python/src/memory_bindings.cpp index cbc319ab525..a0ee2289777 100644 --- a/projects/hipdnn/python/src/memory_bindings.cpp +++ b/projects/hipdnn/python/src/memory_bindings.cpp @@ -21,8 +21,7 @@ class DeviceBuffer public: DeviceBuffer(size_t sizeBytes) - : _devicePtr(nullptr) - , _sizeBytes(sizeBytes) + : _sizeBytes(sizeBytes) { if(_sizeBytes > 0) { diff --git a/projects/hipdnn/python/src/module.cpp b/projects/hipdnn/python/src/module.cpp index 08d4ddf44f4..f50beca139b 100644 --- a/projects/hipdnn/python/src/module.cpp +++ b/projects/hipdnn/python/src/module.cpp @@ -11,7 +11,7 @@ namespace nb = nanobind; -NB_MODULE(hipdnn_frontend_python, m) +NB_MODULE(hipdnn_frontend_python, m) // NOLINT(modernize-avoid-c-arrays) { m.doc() = "Python bindings for the hipDNN frontend library"; From f7aa3e73464d537789e4cc8f0a149b7d7080b597 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 14 May 2026 13:32:33 -0400 Subject: [PATCH 19/97] Add missing string include for Windows build Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/src/memory_bindings.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/hipdnn/python/src/memory_bindings.cpp b/projects/hipdnn/python/src/memory_bindings.cpp index a0ee2289777..ec51dc1141c 100644 --- a/projects/hipdnn/python/src/memory_bindings.cpp +++ b/projects/hipdnn/python/src/memory_bindings.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace nb = nanobind; From eb991f48de53865e109dc9054e199dcde1a6c4de Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 14 May 2026 16:14:48 -0400 Subject: [PATCH 20/97] Use Development.Module instead of full Development for Python bindings Only the module development component is needed for nanobind extension modules, not the full Development (which also requires Development.Embed and libpython). This matches the pattern used by other Python bindings in the repo (stinkytofu, origami, rocisa). Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 0254a1fa5da..aa28a897593 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -11,7 +11,7 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) # Find Python and nanobind -find_package(Python COMPONENTS Interpreter Development REQUIRED) +find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) hipdnn_add_dependency(tsl-robin-map VERSION ${HIPDNN_TSL_ROBIN_MAP_VERSION}) hipdnn_add_dependency(nanobind VERSION ${HIPDNN_NANOBIND_VERSION}) From 3d0d08b828c469ae74aff9dce4a5cc19cd4a3da4 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 14 May 2026 16:43:33 -0400 Subject: [PATCH 21/97] Fix standalone wheel build by including Dependencies.cmake and forwarding package locations When pip wheel re-invokes CMake on python/CMakeLists.txt standalone, hipdnn_add_dependency is not available. Include Dependencies.cmake as a fallback. Forward nanobind_DIR, tsl-robin-map_DIR, and version variables so find_package() picks up already-built packages from TheRock without re-downloading. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index aa28a897593..2884b4c449f 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -13,6 +13,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Find Python and nanobind find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) +if(NOT COMMAND hipdnn_add_dependency) + include("${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Dependencies.cmake") +endif() + hipdnn_add_dependency(tsl-robin-map VERSION ${HIPDNN_TSL_ROBIN_MAP_VERSION}) hipdnn_add_dependency(nanobind VERSION ${HIPDNN_NANOBIND_VERSION}) @@ -76,6 +80,10 @@ else() -w \"\${_wheel_dir}\" \"${CMAKE_CURRENT_SOURCE_DIR}\" -C \"cmake.define.CMAKE_PREFIX_PATH=\${CMAKE_INSTALL_PREFIX}\" + -C \"cmake.define.HIPDNN_NANOBIND_VERSION=${HIPDNN_NANOBIND_VERSION}\" + -C \"cmake.define.HIPDNN_TSL_ROBIN_MAP_VERSION=${HIPDNN_TSL_ROBIN_MAP_VERSION}\" + -C \"cmake.define.nanobind_DIR=${nanobind_DIR}\" + -C \"cmake.define.tsl-robin-map_DIR=${tsl-robin-map_DIR}\" RESULT_VARIABLE _result ) if(_result) From d39ac7293ae7e4eede0bf7b3442dc0cab5c10f63 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 14 May 2026 16:50:08 -0400 Subject: [PATCH 22/97] Remove redundant version variables from pip wheel command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The version variables are unnecessary — Dependencies.cmake has matching hardcoded defaults, and when packages are found via nanobind_DIR / tsl-robin-map_DIR the version is not used. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 2884b4c449f..358db5ff43e 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -80,8 +80,6 @@ else() -w \"\${_wheel_dir}\" \"${CMAKE_CURRENT_SOURCE_DIR}\" -C \"cmake.define.CMAKE_PREFIX_PATH=\${CMAKE_INSTALL_PREFIX}\" - -C \"cmake.define.HIPDNN_NANOBIND_VERSION=${HIPDNN_NANOBIND_VERSION}\" - -C \"cmake.define.HIPDNN_TSL_ROBIN_MAP_VERSION=${HIPDNN_TSL_ROBIN_MAP_VERSION}\" -C \"cmake.define.nanobind_DIR=${nanobind_DIR}\" -C \"cmake.define.tsl-robin-map_DIR=${tsl-robin-map_DIR}\" RESULT_VARIABLE _result From 6c4894b3e735bc8808620253da8f5d47529b3b03 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 14 May 2026 16:52:19 -0400 Subject: [PATCH 23/97] Add pytest test suite for Python bindings Add integration tests for conv_fprop, conv_dgrad, conv_wgrad, and matmul that mirror the existing C++ samples. Add unit-style API tests for Tensor, Graph, and DeviceBuffer. Convert the existing engine_id_overloads test to pytest format. Configure pytest in pyproject.toml with gpu and integration markers. Co-Authored-By: Claude Opus 4 --- .../python/hipdnn_frontend/test/__init__.py | 2 + .../python/hipdnn_frontend/test/conftest.py | 105 +++++++++++++++ .../hipdnn_frontend/test/test_conv_dgrad.py | 104 +++++++++++++++ .../hipdnn_frontend/test/test_conv_fprop.py | 124 ++++++++++++++++++ .../hipdnn_frontend/test/test_conv_wgrad.py | 104 +++++++++++++++ .../test/test_device_buffer.py | 55 ++++++++ .../test/test_engine_id_overloads.py | 77 +---------- .../hipdnn_frontend/test/test_graph_api.py | 76 +++++++++++ .../hipdnn_frontend/test/test_matmul.py | 53 ++++++++ .../hipdnn_frontend/test/test_tensor_api.py | 72 ++++++++++ projects/hipdnn/python/pyproject.toml | 8 ++ 11 files changed, 708 insertions(+), 72 deletions(-) create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/__init__.py create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/conftest.py create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/test_conv_dgrad.py create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/test_conv_wgrad.py create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/test_device_buffer.py create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/test_graph_api.py create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/test_matmul.py create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/test_tensor_api.py diff --git a/projects/hipdnn/python/hipdnn_frontend/test/__init__.py b/projects/hipdnn/python/hipdnn_frontend/test/__init__.py new file mode 100644 index 00000000000..c2a8ff22361 --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/__init__.py @@ -0,0 +1,2 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT diff --git a/projects/hipdnn/python/hipdnn_frontend/test/conftest.py b/projects/hipdnn/python/hipdnn_frontend/test/conftest.py new file mode 100644 index 00000000000..9d46ca71e13 --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/conftest.py @@ -0,0 +1,105 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""Shared pytest fixtures for hipDNN Python binding tests.""" + +import numpy as np +import pytest + +import hipdnn_frontend as hipdnn + + +@pytest.fixture() +def handle(): + """Create a hipDNN handle for GPU operations.""" + return hipdnn.create_handle() + + +@pytest.fixture() +def graph(): + """Create a hipDNN Graph configured with FLOAT data types.""" + g = hipdnn.Graph() + g.set_io_data_type(hipdnn.DataType.FLOAT) + g.set_intermediate_data_type(hipdnn.DataType.FLOAT) + g.set_compute_data_type(hipdnn.DataType.FLOAT) + return g + + +def build_conv_fprop_graph( + graph, + n=16, + c=16, + h=16, + w=16, + k=16, + r=3, + s=3, + stride=1, + pad=1, + dilation=1, +): + """Build a complete convolution forward propagation graph. + + Returns: + Tuple of (graph, x_tensor, weight_tensor, y_tensor, out_h, out_w). + """ + out_h = (h + 2 * pad - dilation * (r - 1) - 1) // stride + 1 + out_w = (w + 2 * pad - dilation * (s - 1) - 1) // stride + 1 + + graph.set_name("conv_fprop_test") + + x = hipdnn.Tensor.create([n, c, h, w], hipdnn.DataType.FLOAT) + x.set_name("input_x") + + weight = hipdnn.Tensor.create([k, c, r, s], hipdnn.DataType.FLOAT) + weight.set_name("weight") + + conv_attrs = hipdnn.ConvFpropAttributes() + conv_attrs.set_name("conv_fprop_node") + conv_attrs.set_padding([pad, pad]) + conv_attrs.set_stride([stride, stride]) + conv_attrs.set_dilation([dilation, dilation]) + + y = graph.conv_fprop(x, weight, conv_attrs) + y.set_name("output_y") + y.set_output(True) + + return graph, x, weight, y, out_h, out_w + + +def execute_graph(graph, handle, tensor_uid_to_data): + """Execute a graph with the given tensor data. + + Args: + graph: A fully-built hipDNN graph (validated, built, plans created). + handle: A hipDNN handle. + tensor_uid_to_data: Dict mapping tensor UIDs to numpy arrays. + Output tensors should have zero-initialized arrays. + + Returns: + Dict mapping tensor UIDs to result numpy arrays (copied from device). + """ + buffers = {} + variant_pack = {} + for uid, data in tensor_uid_to_data.items(): + buf = hipdnn.DeviceBuffer(data.nbytes) + buf.copy_from_host(data.tobytes()) + buffers[uid] = (buf, data.shape, data.dtype) + variant_pack[uid] = buf.ptr() + + workspace_size = graph.get_workspace_size() + workspace_buffer = None + workspace_ptr = 0 + if workspace_size > 0: + workspace_buffer = hipdnn.DeviceBuffer(workspace_size) + workspace_ptr = workspace_buffer.ptr() + + exec_result = graph.execute(handle, variant_pack, workspace_ptr) + assert exec_result.is_good(), f"Graph execution failed: {exec_result.get_message()}" + + results = {} + for uid, (buf, shape, dtype) in buffers.items(): + host_bytes = buf.copy_to_host() + results[uid] = np.frombuffer(host_bytes, dtype=dtype).reshape(shape) + + return results diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_dgrad.py b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_dgrad.py new file mode 100644 index 00000000000..d962064d0e5 --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_dgrad.py @@ -0,0 +1,104 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""Integration tests for convolution backward data gradient.""" + +import numpy as np +import pytest + +import hipdnn_frontend as hipdnn + +from conftest import execute_graph + +# Dimensions used across tests +N, C, H, W = 16, 16, 16, 16 +K, R, S = 16, 3, 3 +STRIDE, PAD, DIL = 1, 1, 1 +OUT_H = (H + 2 * PAD - DIL * (R - 1) - 1) // STRIDE + 1 +OUT_W = (W + 2 * PAD - DIL * (S - 1) - 1) // STRIDE + 1 + + +def _build_conv_dgrad_graph(graph): + """Build a conv_dgrad graph returning (graph, dy, weight, dx).""" + graph.set_name("conv_dgrad_test") + + dy = hipdnn.Tensor.create([N, K, OUT_H, OUT_W], hipdnn.DataType.FLOAT) + dy.set_name("output_gradient_dy") + + weight = hipdnn.Tensor.create([K, C, R, S], hipdnn.DataType.FLOAT) + weight.set_name("weight") + + conv_attrs = hipdnn.ConvDgradAttributes() + conv_attrs.set_name("conv_dgrad_node") + conv_attrs.set_pre_padding([PAD, PAD]) + conv_attrs.set_post_padding([PAD, PAD]) + conv_attrs.set_stride([STRIDE, STRIDE]) + conv_attrs.set_dilation([DIL, DIL]) + + dx = graph.conv_dgrad(dy, weight, conv_attrs) + dx.set_name("input_gradient_dx") + dx.set_output(True) + + return graph, dy, weight, dx + + +@pytest.mark.gpu +@pytest.mark.integration +class TestConvDgrad: + """Tests for convolution backward data gradient end-to-end pipeline.""" + + def test_graph_validates_successfully(self, graph): + """Build a conv_dgrad graph and verify validation passes.""" + graph, dy, weight, dx = _build_conv_dgrad_graph(graph) + + result = graph.validate() + assert result.is_good(), f"Validation failed: {result.get_message()}" + + def test_operation_graph_builds(self, graph, handle): + """Build a conv_dgrad operation graph with backend handle.""" + graph, dy, weight, dx = _build_conv_dgrad_graph(graph) + + result = graph.validate() + assert result.is_good(), f"Validation failed: {result.get_message()}" + + result = graph.build_operation_graph(handle) + assert result.is_good(), f"Build operation graph failed: {result.get_message()}" + + def test_execution_plans_created(self, graph, handle): + """Build execution plans for conv_dgrad.""" + graph, dy, weight, dx = _build_conv_dgrad_graph(graph) + + assert graph.validate().is_good() + assert graph.build_operation_graph(handle).is_good() + assert graph.create_execution_plans().is_good() + assert graph.check_support().is_good() + assert graph.build_plans().is_good() + + def test_execution_produces_nonzero_output(self, graph, handle): + """Full end-to-end conv_dgrad: execute and verify non-zero output.""" + graph, dy, weight, dx = _build_conv_dgrad_graph(graph) + + assert graph.validate().is_good() + assert graph.build_operation_graph(handle).is_good() + assert graph.create_execution_plans().is_good() + assert graph.check_support().is_good() + assert graph.build_plans().is_good() + + dy_data = np.random.uniform( + 0.0, + 1.0, + [N, K, OUT_H, OUT_W], + ).astype(np.float32) + w_data = np.random.uniform(0.0, 1.0, [K, C, R, S]).astype(np.float32) + dx_data = np.zeros([N, C, H, W], dtype=np.float32) + + tensor_data = { + dy.get_uid(): dy_data, + weight.get_uid(): w_data, + dx.get_uid(): dx_data, + } + + results = execute_graph(graph, handle, tensor_data) + dx_result = results[dx.get_uid()] + + assert not np.all(dx_result == 0), "Conv dgrad output is all zeros" diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py new file mode 100644 index 00000000000..f3a111cbb5f --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py @@ -0,0 +1,124 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""Integration tests for convolution forward propagation.""" + +import numpy as np +import pytest + +import hipdnn_frontend as hipdnn + +from conftest import build_conv_fprop_graph, execute_graph + +# Dimensions used across tests +N, C, H, W = 16, 16, 16, 16 +K, R, S = 16, 3, 3 +STRIDE, PAD, DIL = 1, 1, 1 +OUT_H = (H + 2 * PAD - DIL * (R - 1) - 1) // STRIDE + 1 +OUT_W = (W + 2 * PAD - DIL * (S - 1) - 1) // STRIDE + 1 + + +@pytest.mark.gpu +@pytest.mark.integration +class TestConvFprop: + """Tests for convolution forward propagation end-to-end pipeline.""" + + def test_graph_validates_successfully(self, graph): + """Build a conv_fprop graph and verify validation passes.""" + graph, x, weight, y, out_h, out_w = build_conv_fprop_graph( + graph, + n=N, + c=C, + h=H, + w=W, + k=K, + r=R, + s=S, + stride=STRIDE, + pad=PAD, + dilation=DIL, + ) + + result = graph.validate() + assert result.is_good(), f"Validation failed: {result.get_message()}" + + def test_operation_graph_builds(self, graph, handle): + """Build a conv_fprop operation graph with backend handle.""" + graph, x, weight, y, out_h, out_w = build_conv_fprop_graph( + graph, + n=N, + c=C, + h=H, + w=W, + k=K, + r=R, + s=S, + stride=STRIDE, + pad=PAD, + dilation=DIL, + ) + + result = graph.validate() + assert result.is_good(), f"Validation failed: {result.get_message()}" + + result = graph.build_operation_graph(handle) + assert result.is_good(), f"Build operation graph failed: {result.get_message()}" + + def test_execution_plans_created(self, graph, handle): + """Build execution plans for conv_fprop.""" + graph, x, weight, y, out_h, out_w = build_conv_fprop_graph( + graph, + n=N, + c=C, + h=H, + w=W, + k=K, + r=R, + s=S, + stride=STRIDE, + pad=PAD, + dilation=DIL, + ) + + assert graph.validate().is_good() + assert graph.build_operation_graph(handle).is_good() + assert graph.create_execution_plans().is_good() + assert graph.check_support().is_good() + assert graph.build_plans().is_good() + + def test_execution_produces_nonzero_output(self, graph, handle): + """Full end-to-end conv_fprop: execute and verify non-zero output.""" + graph, x, weight, y, out_h, out_w = build_conv_fprop_graph( + graph, + n=N, + c=C, + h=H, + w=W, + k=K, + r=R, + s=S, + stride=STRIDE, + pad=PAD, + dilation=DIL, + ) + + assert graph.validate().is_good() + assert graph.build_operation_graph(handle).is_good() + assert graph.create_execution_plans().is_good() + assert graph.check_support().is_good() + assert graph.build_plans().is_good() + + x_data = np.random.uniform(0.0, 1.0, [N, C, H, W]).astype(np.float32) + w_data = np.random.uniform(0.0, 1.0, [K, C, R, S]).astype(np.float32) + y_data = np.zeros([N, K, out_h, out_w], dtype=np.float32) + + tensor_data = { + x.get_uid(): x_data, + weight.get_uid(): w_data, + y.get_uid(): y_data, + } + + results = execute_graph(graph, handle, tensor_data) + y_result = results[y.get_uid()] + + assert not np.all(y_result == 0), "Conv fprop output is all zeros" diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_wgrad.py b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_wgrad.py new file mode 100644 index 00000000000..03ea6721401 --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_wgrad.py @@ -0,0 +1,104 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""Integration tests for convolution backward weight gradient.""" + +import numpy as np +import pytest + +import hipdnn_frontend as hipdnn + +from conftest import execute_graph + +# Dimensions used across tests +N, C, H, W = 16, 16, 16, 16 +K, R, S = 16, 3, 3 +STRIDE, PAD, DIL = 1, 1, 1 +OUT_H = (H + 2 * PAD - DIL * (R - 1) - 1) // STRIDE + 1 +OUT_W = (W + 2 * PAD - DIL * (S - 1) - 1) // STRIDE + 1 + + +def _build_conv_wgrad_graph(graph): + """Build a conv_wgrad graph returning (graph, dy, x, dw).""" + graph.set_name("conv_wgrad_test") + + dy = hipdnn.Tensor.create([N, K, OUT_H, OUT_W], hipdnn.DataType.FLOAT) + dy.set_name("output_gradient_dy") + + x = hipdnn.Tensor.create([N, C, H, W], hipdnn.DataType.FLOAT) + x.set_name("input_x") + + conv_attrs = hipdnn.ConvWgradAttributes() + conv_attrs.set_name("conv_wgrad_node") + conv_attrs.set_pre_padding([PAD, PAD]) + conv_attrs.set_post_padding([PAD, PAD]) + conv_attrs.set_stride([STRIDE, STRIDE]) + conv_attrs.set_dilation([DIL, DIL]) + + dw = graph.conv_wgrad(dy, x, conv_attrs) + dw.set_name("weight_gradient_dw") + dw.set_output(True) + + return graph, dy, x, dw + + +@pytest.mark.gpu +@pytest.mark.integration +class TestConvWgrad: + """Tests for convolution backward weight gradient end-to-end pipeline.""" + + def test_graph_validates_successfully(self, graph): + """Build a conv_wgrad graph and verify validation passes.""" + graph, dy, x, dw = _build_conv_wgrad_graph(graph) + + result = graph.validate() + assert result.is_good(), f"Validation failed: {result.get_message()}" + + def test_operation_graph_builds(self, graph, handle): + """Build a conv_wgrad operation graph with backend handle.""" + graph, dy, x, dw = _build_conv_wgrad_graph(graph) + + result = graph.validate() + assert result.is_good(), f"Validation failed: {result.get_message()}" + + result = graph.build_operation_graph(handle) + assert result.is_good(), f"Build operation graph failed: {result.get_message()}" + + def test_execution_plans_created(self, graph, handle): + """Build execution plans for conv_wgrad.""" + graph, dy, x, dw = _build_conv_wgrad_graph(graph) + + assert graph.validate().is_good() + assert graph.build_operation_graph(handle).is_good() + assert graph.create_execution_plans().is_good() + assert graph.check_support().is_good() + assert graph.build_plans().is_good() + + def test_execution_produces_nonzero_output(self, graph, handle): + """Full end-to-end conv_wgrad: execute and verify non-zero output.""" + graph, dy, x, dw = _build_conv_wgrad_graph(graph) + + assert graph.validate().is_good() + assert graph.build_operation_graph(handle).is_good() + assert graph.create_execution_plans().is_good() + assert graph.check_support().is_good() + assert graph.build_plans().is_good() + + dy_data = np.random.uniform( + 0.0, + 1.0, + [N, K, OUT_H, OUT_W], + ).astype(np.float32) + x_data = np.random.uniform(0.0, 1.0, [N, C, H, W]).astype(np.float32) + dw_data = np.zeros([K, C, R, S], dtype=np.float32) + + tensor_data = { + dy.get_uid(): dy_data, + x.get_uid(): x_data, + dw.get_uid(): dw_data, + } + + results = execute_graph(graph, handle, tensor_data) + dw_result = results[dw.get_uid()] + + assert not np.all(dw_result == 0), "Conv wgrad output is all zeros" diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_device_buffer.py b/projects/hipdnn/python/hipdnn_frontend/test/test_device_buffer.py new file mode 100644 index 00000000000..6b504691031 --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_device_buffer.py @@ -0,0 +1,55 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""GPU tests for DeviceBuffer allocation and data transfer.""" + +import numpy as np +import pytest + +import hipdnn_frontend as hipdnn + + +@pytest.mark.gpu +class TestDeviceBuffer: + """Tests for DeviceBuffer creation and host-device data transfer.""" + + def test_buffer_creation(self): + """DeviceBuffer can be created with a given byte size.""" + buf = hipdnn.DeviceBuffer(1024) + assert buf is not None + + def test_buffer_ptr_nonzero(self): + """DeviceBuffer.ptr() returns a non-zero device pointer.""" + buf = hipdnn.DeviceBuffer(256) + assert buf.ptr() != 0 + + def test_buffer_size(self): + """DeviceBuffer.size() returns the requested byte count.""" + buf = hipdnn.DeviceBuffer(512) + assert buf.size() == 512 + + def test_buffer_host_roundtrip(self): + """Data copied to device and back matches the original.""" + data = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32) + buf = hipdnn.DeviceBuffer(data.nbytes) + + buf.copy_from_host(data.tobytes()) + result_bytes = buf.copy_to_host() + result = np.frombuffer(result_bytes, dtype=np.float32) + + np.testing.assert_array_equal(result, data) + + def test_buffer_zeros(self): + """DeviceBuffer.zeros() fills the buffer with zeros.""" + size = 64 * 4 # 64 float32 values + buf = hipdnn.DeviceBuffer(size) + + # Fill with non-zero first to confirm zeros() works + data = np.ones(64, dtype=np.float32) + buf.copy_from_host(data.tobytes()) + + buf.zeros() + + result_bytes = buf.copy_to_host() + result = np.frombuffer(result_bytes, dtype=np.float32) + np.testing.assert_array_equal(result, np.zeros(64, dtype=np.float32)) diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_engine_id_overloads.py b/projects/hipdnn/python/hipdnn_frontend/test/test_engine_id_overloads.py index 04c971d8592..f3a23aa9e25 100644 --- a/projects/hipdnn/python/hipdnn_frontend/test/test_engine_id_overloads.py +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_engine_id_overloads.py @@ -1,119 +1,90 @@ -#!/usr/bin/env python3 -""" -Test script for set_preferred_engine_id_ext() and get_preferred_engine_id_ext() overloads. +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""Tests for set_preferred_engine_id_ext() and get_preferred_engine_id_ext() overloads. -This script verifies that: +Verifies that: 1. Setting by int64 works 2. Setting by string works 3. Setting to None clears the preference 4. Setting to empty string clears the preference 5. Getting the preference returns the correct value 6. Method chaining works - -USAGE: - # From the python/hipdnn_frontend/test directory, after building and installing: - python test_engine_id_overloads.py """ -import sys -import os - import hipdnn_frontend as fe def test_set_by_int(): """Test setting preferred engine ID by integer.""" - print("Test 1: Set by int...") graph = fe.Graph() - # Set by int graph.set_preferred_engine_id_ext(12345) - # Verify engine_id = graph.get_preferred_engine_id_ext() assert engine_id is not None, "Engine ID should be set" assert engine_id == 12345, f"Expected 12345, got {engine_id}" - print(" OK Set by int works") def test_set_by_string(): """Test setting preferred engine ID by string.""" - print("Test 2: Set by string...") graph = fe.Graph() - # Set by string test_engine_name = "TEST_ENGINE_NAME" graph.set_preferred_engine_id_ext(test_engine_name) - # Verify it was set (we can't easily verify the exact ID without engineNameToId in Python) engine_id = graph.get_preferred_engine_id_ext() assert engine_id is not None, "Engine ID should be set" assert isinstance(engine_id, int), f"Engine ID should be int, got {type(engine_id)}" - print(f" OK Set by string works (ID: {engine_id})") def test_clear_with_none(): """Test clearing preference with None.""" - print("Test 3: Clear with None...") graph = fe.Graph() - # Set then clear graph.set_preferred_engine_id_ext(12345) assert graph.get_preferred_engine_id_ext() is not None, "Should be set" graph.set_preferred_engine_id_ext(None) assert graph.get_preferred_engine_id_ext() is None, "Should be cleared" - print(" OK Clear with None works") def test_clear_with_empty_string(): """Test clearing preference with empty string.""" - print("Test 4: Clear with empty string...") graph = fe.Graph() - # Set then clear graph.set_preferred_engine_id_ext("TEST_ENGINE") assert graph.get_preferred_engine_id_ext() is not None, "Should be set" graph.set_preferred_engine_id_ext("") assert graph.get_preferred_engine_id_ext() is None, "Should be cleared" - print(" OK Clear with empty string works") def test_overload_interaction(): """Test that overloads can override each other.""" - print("Test 5: Overload interaction...") graph = fe.Graph() - # Set by string graph.set_preferred_engine_id_ext("ENGINE_A") id_from_string = graph.get_preferred_engine_id_ext() - # Override with int graph.set_preferred_engine_id_ext(999) id_from_int = graph.get_preferred_engine_id_ext() assert id_from_int == 999, f"Expected 999, got {id_from_int}" assert id_from_int != id_from_string, "IDs should be different" - print(" OK Overload interaction works") def test_method_chaining(): """Test that set_preferred_engine_id_ext() supports method chaining.""" - print("Test 6: Method chaining...") graph = fe.Graph() - # Chain multiple setters result = ( graph.set_name("test_graph") .set_preferred_engine_id_ext(12345) .set_compute_data_type(fe.DataType.FLOAT) ) - # Verify chaining returns the graph assert result is graph, "Chaining should return the same graph object" - - # Verify values were set assert ( graph.get_name() == "test_graph" ), f"Expected name 'test_graph', got '{graph.get_name()}'" @@ -122,15 +93,12 @@ def test_method_chaining(): assert ( graph.get_compute_data_type() == fe.DataType.FLOAT ), f"Expected FLOAT, got {graph.get_compute_data_type()}" - print(" OK Method chaining works") def test_chaining_with_string_overload(): """Test chaining with string overload.""" - print("Test 7: Chaining with string overload...") graph = fe.Graph() - # Chain with string overload result = ( graph.set_name("test_graph") .set_preferred_engine_id_ext("MY_ENGINE") @@ -139,38 +107,3 @@ def test_chaining_with_string_overload(): assert result is graph, "Chaining should return the same graph object" assert graph.get_preferred_engine_id_ext() is not None - print(" OK Chaining with string overload works") - - -def main(): - """Run all tests.""" - print("=" * 60) - print("Testing set_preferred_engine_id_ext() overloads") - print("=" * 60) - - try: - test_set_by_int() - test_set_by_string() - test_clear_with_none() - test_clear_with_empty_string() - test_overload_interaction() - test_method_chaining() - test_chaining_with_string_overload() - - print("=" * 60) - print("OK All tests passed!") - print("=" * 60) - return 0 - except AssertionError as e: - print(f"\nX Test failed: {e}") - return 1 - except Exception as e: - print(f"\nX Unexpected error: {e}") - import traceback - - traceback.print_exc() - return 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_graph_api.py b/projects/hipdnn/python/hipdnn_frontend/test/test_graph_api.py new file mode 100644 index 00000000000..a64f9f8704d --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_graph_api.py @@ -0,0 +1,76 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""API tests for Graph configuration (mostly no GPU required).""" + +import hipdnn_frontend as hipdnn + + +class TestGraphConfiguration: + """Tests for Graph setter and getter methods.""" + + def test_graph_set_name(self): + """set_name() / get_name() roundtrip.""" + g = hipdnn.Graph() + g.set_name("test_graph") + assert g.get_name() == "test_graph" + + def test_graph_set_compute_data_type(self): + """set_compute_data_type() / get_compute_data_type() roundtrip.""" + g = hipdnn.Graph() + g.set_compute_data_type(hipdnn.DataType.FLOAT) + assert g.get_compute_data_type() == hipdnn.DataType.FLOAT + + def test_graph_set_io_data_type(self): + """set_io_data_type() / get_io_data_type() roundtrip.""" + g = hipdnn.Graph() + g.set_io_data_type(hipdnn.DataType.FLOAT) + assert g.get_io_data_type() == hipdnn.DataType.FLOAT + + def test_graph_set_intermediate_data_type(self): + """set_intermediate_data_type() / get_intermediate_data_type() roundtrip.""" + g = hipdnn.Graph() + g.set_intermediate_data_type(hipdnn.DataType.FLOAT) + assert g.get_intermediate_data_type() == hipdnn.DataType.FLOAT + + def test_graph_method_chaining(self): + """Chained setter calls return the same graph object.""" + g = hipdnn.Graph() + result = ( + g.set_name("chained_graph") + .set_io_data_type(hipdnn.DataType.FLOAT) + .set_compute_data_type(hipdnn.DataType.FLOAT) + .set_intermediate_data_type(hipdnn.DataType.FLOAT) + ) + assert result is g + assert g.get_name() == "chained_graph" + + +class TestGraphTensorCreation: + """Tests for creating tensors via the Graph API.""" + + def test_graph_tensor_creation(self): + """graph.tensor() creates a shared tensor from attributes.""" + g = hipdnn.Graph() + + attrs = hipdnn.Tensor() + attrs.set_dim([2, 3, 4]) + attrs.set_data_type(hipdnn.DataType.FLOAT) + attrs.set_stride([12, 4, 1]) + + t = g.tensor(attrs) + assert t is not None + assert t.get_dim() == [2, 3, 4] + assert t.get_data_type() == hipdnn.DataType.FLOAT + + def test_graph_tensor_like(self): + """Graph.tensor_like() creates a tensor with matching dims but new uid.""" + original = hipdnn.Tensor.create([4, 8, 16], hipdnn.DataType.FLOAT) + original.set_name("original") + + copy = hipdnn.Graph.tensor_like(original) + assert copy is not None + assert copy.get_dim() == original.get_dim() + assert copy.get_data_type() == original.get_data_type() + # tensor_like clears the uid, so has_uid should be False + assert not copy.has_uid() diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_matmul.py b/projects/hipdnn/python/hipdnn_frontend/test/test_matmul.py new file mode 100644 index 00000000000..cc81b591309 --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_matmul.py @@ -0,0 +1,53 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""Integration tests for matrix multiplication.""" + +import pytest + +import hipdnn_frontend as hipdnn + +# Dimensions: A [M, K], B [K, N] -> C [M, N] +M, K, N = 4, 3, 5 + + +def _build_matmul_graph(graph): + """Build a matmul graph returning (graph, a, b, c).""" + graph.set_name("matmul_test") + + a = hipdnn.Tensor.create([M, K], hipdnn.DataType.FLOAT) + a.set_name("A") + + b = hipdnn.Tensor.create([K, N], hipdnn.DataType.FLOAT) + b.set_name("B") + + attrs = hipdnn.MatmulAttributes() + attrs.set_name("matmul_node") + + c = graph.matmul(a, b, attrs) + c.set_name("C") + c.set_output(True) + + return graph, a, b, c + + +@pytest.mark.gpu +class TestMatmul: + """Tests for matrix multiplication graph building.""" + + def test_graph_validates(self, graph): + """Create a matmul graph and verify validation passes.""" + graph, a, b, c = _build_matmul_graph(graph) + + result = graph.validate() + assert result.is_good(), f"Validation failed: {result.get_message()}" + + def test_operation_graph_builds(self, graph, handle): + """Validate and build matmul operation graph.""" + graph, a, b, c = _build_matmul_graph(graph) + + result = graph.validate() + assert result.is_good(), f"Validation failed: {result.get_message()}" + + result = graph.build_operation_graph(handle) + assert result.is_good(), f"Build operation graph failed: {result.get_message()}" diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_tensor_api.py b/projects/hipdnn/python/hipdnn_frontend/test/test_tensor_api.py new file mode 100644 index 00000000000..150e6e91c84 --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_tensor_api.py @@ -0,0 +1,72 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""API tests for Tensor creation and configuration (no GPU required).""" + +import hipdnn_frontend as hipdnn + + +class TestTensorCreate: + """Tests for Tensor.create() and basic property accessors.""" + + def test_tensor_create_sets_dimensions(self): + """Tensor dimensions match the shape passed to create().""" + dims = [2, 3, 4, 5] + t = hipdnn.Tensor.create(dims, hipdnn.DataType.FLOAT) + assert t.get_dim() == dims + + def test_tensor_create_sets_data_type(self): + """Tensor data type matches the type passed to create().""" + t = hipdnn.Tensor.create([1, 2, 3], hipdnn.DataType.FLOAT) + assert t.get_data_type() == hipdnn.DataType.FLOAT + + def test_tensor_uid_is_assigned(self): + """Each tensor receives a unique auto-assigned uid.""" + t1 = hipdnn.Tensor.create([1, 2], hipdnn.DataType.FLOAT) + t2 = hipdnn.Tensor.create([3, 4], hipdnn.DataType.FLOAT) + assert t1.get_uid() != t2.get_uid() + + +class TestTensorSetters: + """Tests for Tensor setter methods.""" + + def test_tensor_set_name(self): + """set_name() / get_name() roundtrip.""" + t = hipdnn.Tensor.create([1, 2], hipdnn.DataType.FLOAT) + t.set_name("my_tensor") + assert t.get_name() == "my_tensor" + + def test_tensor_set_stride(self): + """set_stride() / get_stride() roundtrip.""" + t = hipdnn.Tensor.create([2, 3, 4], hipdnn.DataType.FLOAT) + strides = [12, 4, 1] + t.set_stride(strides) + assert t.get_stride() == strides + + def test_tensor_set_output(self): + """set_output() marks tensor as a graph output and supports chaining.""" + t = hipdnn.Tensor.create([1, 2], hipdnn.DataType.FLOAT) + result = t.set_output(True) + # set_output returns self for method chaining + assert result is t + + def test_tensor_set_is_virtual(self): + """set_is_virtual() marks tensor as virtual (intermediate).""" + t = hipdnn.Tensor.create([1, 2], hipdnn.DataType.FLOAT) + t.set_is_virtual(True) + assert t.get_is_virtual() is True + + def test_tensor_method_chaining(self): + """Chained setter calls return the same tensor object.""" + t = hipdnn.Tensor.create([2, 3], hipdnn.DataType.FLOAT) + result = t.set_name("chained").set_uid(42).set_data_type(hipdnn.DataType.FLOAT) + assert result is t + assert t.get_name() == "chained" + assert t.get_uid() == 42 + + def test_tensor_validate(self): + """A properly configured tensor passes validation.""" + t = hipdnn.Tensor.create([2, 3, 4], hipdnn.DataType.FLOAT) + t.set_name("valid_tensor") + result = t.validate() + assert result.is_good(), f"Validation failed: {result.get_message()}" diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index 4b099936a9f..e90db030da5 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -67,6 +67,14 @@ hipdnn_frontend_DIR = "${hipdnn_frontend_DIR}" hipdnn_backend_DIR = "${hipdnn_backend_DIR}" hipdnn_data_sdk_DIR = "${hipdnn_data_sdk_DIR}" +[tool.pytest.ini_options] +testpaths = ["hipdnn_frontend/test"] +addopts = "-v --tb=short" +markers = [ + "gpu: marks tests requiring GPU hardware", + "integration: marks integration tests that exercise full pipeline", +] + [tool.cibuildwheel] # Skip building for certain Python versions or platforms if needed skip = ["pp*", "cp36-*", "cp37-*"] From 503ad67faccfe0bda73417ae6e962c110d61f6be Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 14 May 2026 18:46:47 -0400 Subject: [PATCH 24/97] Address review feedback: forward FetchContent source dirs and remove hardcoded compiler paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forward FETCHCONTENT_SOURCE_DIR for nanobind and tsl-robin-map so the standalone wheel build reuses already-downloaded sources when the parent used FetchContent (where _DIR variables are empty). Remove hardcoded /opt/rocm/llvm/bin/clang compiler paths from pyproject.toml — let the environment or -C flags control the compiler. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 2 ++ projects/hipdnn/python/pyproject.toml | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 358db5ff43e..b6ccd6af793 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -82,6 +82,8 @@ else() -C \"cmake.define.CMAKE_PREFIX_PATH=\${CMAKE_INSTALL_PREFIX}\" -C \"cmake.define.nanobind_DIR=${nanobind_DIR}\" -C \"cmake.define.tsl-robin-map_DIR=${tsl-robin-map_DIR}\" + -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_NANOBIND=${nanobind_SOURCE_DIR}\" + -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_TSL-ROBIN-MAP=${tsl-robin-map_SOURCE_DIR}\" RESULT_VARIABLE _result ) if(_result) diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index 4b099936a9f..107df61ed8d 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -57,10 +57,7 @@ wheel.install-dir = "hipdnn_frontend" wheel.packages = ["hipdnn_frontend"] [tool.scikit-build.cmake.define] -# Pass any additional CMake definitions if needed CMAKE_PREFIX_PATH = "${CMAKE_PREFIX_PATH}" -CMAKE_C_COMPILER = "/opt/rocm/llvm/bin/clang" -CMAKE_CXX_COMPILER = "/opt/rocm/llvm/bin/clang++" hip_DIR = "${hip_DIR}" hipdnn_frontend_DIR = "${hipdnn_frontend_DIR}" From 30c3da02037949697a81c9e2510cb9bb9d815903 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 11:15:12 -0400 Subject: [PATCH 25/97] Forward hip_DIR to standalone wheel build to fix TheRock CI The pip wheel subprocess could not find the HIP package because hip_DIR was not forwarded from the parent CMake configuration. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index b6ccd6af793..4839a21f359 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -84,6 +84,7 @@ else() -C \"cmake.define.tsl-robin-map_DIR=${tsl-robin-map_DIR}\" -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_NANOBIND=${nanobind_SOURCE_DIR}\" -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_TSL-ROBIN-MAP=${tsl-robin-map_SOURCE_DIR}\" + -C \"cmake.define.hip_DIR=${hip_DIR}\" RESULT_VARIABLE _result ) if(_result) From 841dd972a35a56839f1368a0ac6146b85cfab374 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 13:35:00 -0400 Subject: [PATCH 26/97] Forward HIP transitive dependencies to standalone wheel build The pip wheel subprocess finds HIP via hip_DIR but hip-config-amd.cmake calls find_dependency for AMDDeviceLibs, amd_comgr, and hsa-runtime64. Without forwarding these _DIR variables the isolated build environment cannot locate them, causing CMake configuration failure in TheRock CI. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 4839a21f359..68017db1a39 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -85,6 +85,9 @@ else() -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_NANOBIND=${nanobind_SOURCE_DIR}\" -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_TSL-ROBIN-MAP=${tsl-robin-map_SOURCE_DIR}\" -C \"cmake.define.hip_DIR=${hip_DIR}\" + -C \"cmake.define.AMDDeviceLibs_DIR=${AMDDeviceLibs_DIR}\" + -C \"cmake.define.amd_comgr_DIR=${amd_comgr_DIR}\" + -C \"cmake.define.hsa-runtime64_DIR=${hsa-runtime64_DIR}\" RESULT_VARIABLE _result ) if(_result) From 5b119dbc58425a9615ca2d14ad0d1cd13d1b7745 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 13:40:18 -0400 Subject: [PATCH 27/97] Use VERSION parameter directly in nanobind and tsl-robin-map fetch functions Replace _determine_git_tag indirection with direct use of the VERSION parameter for the git tag. The version is already passed from the caller via hipdnn_add_dependency so the macro lookup is unnecessary. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/cmake/Dependencies.cmake | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/projects/hipdnn/cmake/Dependencies.cmake b/projects/hipdnn/cmake/Dependencies.cmake index e6f91752b2e..9fca31d7d9e 100644 --- a/projects/hipdnn/cmake/Dependencies.cmake +++ b/projects/hipdnn/cmake/Dependencies.cmake @@ -249,12 +249,10 @@ endfunction() # Fetches tsl-robin-map function(_fetch_tsl-robin-map VERSION HASH) - _determine_git_tag(v v1.4.1) - fetchcontent_declare( tsl-robin-map GIT_REPOSITORY https://github.com/Tessil/robin-map.git - GIT_TAG ${GIT_TAG} + GIT_TAG v${VERSION} DOWNLOAD_EXTRACT_TIMESTAMP TRUE ) @@ -267,14 +265,12 @@ endfunction() # Fetches nanobind function(_fetch_nanobind VERSION HASH) - _determine_git_tag(v v2.12.0) - set(NB_USE_SUBMODULE_DEPS OFF) fetchcontent_declare( nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git - GIT_TAG ${GIT_TAG} + GIT_TAG v${VERSION} ) fetchcontent_makeavailable(nanobind) From 9c71b962f74aff34524df66369bbe2ce7430a961 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 15:26:54 -0400 Subject: [PATCH 28/97] Decouple Python wheel packaging from CMake build system Replace the scikit-build-core + pip subprocess approach with a standalone wheel assembly script, eliminating the need to forward CMake dependency variables (hip_DIR, AMDDeviceLibs_DIR, etc.) through pip config settings. The superbuild path now stages the compiled .so and calls assemble_wheel.py (stdlib only) to produce the wheel directly. The developer path uses hatchling with a custom build hook that either compiles via CMake or packages a pre-built .so via HIPDNN_PREBUILT_SO. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 60 +++--- projects/hipdnn/python/hatch_build.py | 69 +++++++ projects/hipdnn/python/pyproject.toml | 33 +--- .../hipdnn/python/scripts/assemble_wheel.py | 178 ++++++++++++++++++ 4 files changed, 282 insertions(+), 58 deletions(-) create mode 100644 projects/hipdnn/python/hatch_build.py create mode 100644 projects/hipdnn/python/scripts/assemble_wheel.py diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 68017db1a39..a522c7b603e 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -64,34 +64,32 @@ target_link_libraries(hipdnn_frontend_python PRIVATE hipdnn_frontend hipdnn_back # Installation # ============================================================================ -# SKBUILD is set by scikit-build-core when building a wheel. -if(DEFINED SKBUILD) - # pip install . / pip wheel . → scikit-build-core packages the extension into a wheel - install(TARGETS hipdnn_frontend_python DESTINATION .) -else() - # cmake --install → calls pip wheel to build .whl, stages it in share/hipdnn/wheels/ - install(CODE " - set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") - file(MAKE_DIRECTORY \"\${_wheel_dir}\") - - message(STATUS \"Building hipdnn-frontend wheel\") - execute_process( - COMMAND \"${Python_EXECUTABLE}\" -m pip wheel --no-deps - -w \"\${_wheel_dir}\" - \"${CMAKE_CURRENT_SOURCE_DIR}\" - -C \"cmake.define.CMAKE_PREFIX_PATH=\${CMAKE_INSTALL_PREFIX}\" - -C \"cmake.define.nanobind_DIR=${nanobind_DIR}\" - -C \"cmake.define.tsl-robin-map_DIR=${tsl-robin-map_DIR}\" - -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_NANOBIND=${nanobind_SOURCE_DIR}\" - -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_TSL-ROBIN-MAP=${tsl-robin-map_SOURCE_DIR}\" - -C \"cmake.define.hip_DIR=${hip_DIR}\" - -C \"cmake.define.AMDDeviceLibs_DIR=${AMDDeviceLibs_DIR}\" - -C \"cmake.define.amd_comgr_DIR=${amd_comgr_DIR}\" - -C \"cmake.define.hsa-runtime64_DIR=${hsa-runtime64_DIR}\" - RESULT_VARIABLE _result - ) - if(_result) - message(FATAL_ERROR \"Failed to build hipdnn-frontend wheel\") - endif() - ") -endif() +install(TARGETS hipdnn_frontend_python + LIBRARY DESTINATION lib/hipdnn_python_staging +) + +install(CODE " + set(_staging \"\${CMAKE_INSTALL_PREFIX}/lib/hipdnn_python_staging\") + set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") + file(MAKE_DIRECTORY \"\${_wheel_dir}\") + + file(GLOB _so_files \"\${_staging}/hipdnn_frontend_python*.so\") + list(LENGTH _so_files _so_count) + if(_so_count EQUAL 0) + message(FATAL_ERROR \"No hipdnn_frontend_python .so found in \${_staging}\") + endif() + list(GET _so_files 0 _so_path) + + message(STATUS \"Assembling hipdnn-frontend wheel from \${_so_path}\") + execute_process( + COMMAND \"${Python_EXECUTABLE}\" + \"${CMAKE_CURRENT_SOURCE_DIR}/scripts/assemble_wheel.py\" + --so-path \"\${_so_path}\" + --package-dir \"${CMAKE_CURRENT_SOURCE_DIR}/hipdnn_frontend\" + --output-dir \"\${_wheel_dir}\" + RESULT_VARIABLE _result + ) + if(_result) + message(FATAL_ERROR \"Failed to assemble hipdnn-frontend wheel\") + endif() +") diff --git a/projects/hipdnn/python/hatch_build.py b/projects/hipdnn/python/hatch_build.py new file mode 100644 index 00000000000..a826f3ec539 --- /dev/null +++ b/projects/hipdnn/python/hatch_build.py @@ -0,0 +1,69 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""Hatchling custom build hook: compiles the nanobind extension or copies a pre-built one.""" + +import glob +import os +import shutil +import subprocess +import sys +import tempfile + +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + + +class CustomBuildHook(BuildHookInterface): + PLUGIN_NAME = "custom" + + def initialize(self, version, build_data): + package_dir = os.path.join(self.root, "hipdnn_frontend") + so_path = self._find_or_build_extension() + dest = os.path.join(package_dir, os.path.basename(so_path)) + shutil.copy2(so_path, dest) + build_data["shared_data"] = {"_extension_path": dest} + build_data["force_include"] = { + dest: f"hipdnn_frontend/{os.path.basename(so_path)}", + } + + def finalize(self, version, build_data, artifact_path): + if version == "editable": + return + path = (build_data.get("shared_data") or {}).get("_extension_path") + if path and os.path.isfile(path): + os.remove(path) + + def _find_or_build_extension(self): + prebuilt = os.environ.get("HIPDNN_PREBUILT_SO") + if prebuilt: + if not os.path.isfile(prebuilt): + raise RuntimeError(f"HIPDNN_PREBUILT_SO points to missing file: {prebuilt}") + return prebuilt + return self._compile_extension() + + def _compile_extension(self): + build_dir = os.path.join(self.root, "_hatch_build") + os.makedirs(build_dir, exist_ok=True) + + cmake_args = [ + "cmake", + "-S", self.root, + "-B", build_dir, + "-DCMAKE_BUILD_TYPE=Release", + ] + + prefix_path = os.environ.get("CMAKE_PREFIX_PATH") + if prefix_path: + cmake_args.append(f"-DCMAKE_PREFIX_PATH={prefix_path}") + + subprocess.check_call(cmake_args) + subprocess.check_call(["cmake", "--build", build_dir]) + + pattern = os.path.join(build_dir, "hipdnn_frontend_python*.so") + matches = glob.glob(pattern) + if not matches: + raise RuntimeError( + f"Build succeeded but no .so found matching {pattern}. " + f"Contents: {os.listdir(build_dir)}" + ) + return matches[0] diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index 107df61ed8d..afa70cd0835 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -2,12 +2,8 @@ # SPDX-License-Identifier: MIT [build-system] -requires = [ - "scikit-build-core>=0.4.3", - "nanobind>=2.12.0", - "cmake>=3.18", -] -build-backend = "scikit_build_core.build" +requires = ["hatchling>=1.21.0"] +build-backend = "hatchling.build" [project] name = "hipdnn-frontend" @@ -42,28 +38,11 @@ dev = [ "numpy", ] -[tool.scikit-build] -# Specify the CMake source directory -cmake.source-dir = "." -# Build directory -build-dir = "build" -# Minimum CMake version -cmake.minimum-version = "3.18" -# Build type -cmake.build-type = "Release" -# Where the Python module will be installed -wheel.install-dir = "hipdnn_frontend" -# Package data -wheel.packages = ["hipdnn_frontend"] - -[tool.scikit-build.cmake.define] -CMAKE_PREFIX_PATH = "${CMAKE_PREFIX_PATH}" +[tool.hatch.build.targets.wheel] +packages = ["hipdnn_frontend"] -hip_DIR = "${hip_DIR}" -hipdnn_frontend_DIR = "${hipdnn_frontend_DIR}" -hipdnn_backend_DIR = "${hipdnn_backend_DIR}" -hipdnn_data_sdk_DIR = "${hipdnn_data_sdk_DIR}" +[tool.hatch.build.targets.wheel.hooks.custom] +path = "hatch_build.py" [tool.cibuildwheel] -# Skip building for certain Python versions or platforms if needed skip = ["pp*", "cp36-*", "cp37-*"] diff --git a/projects/hipdnn/python/scripts/assemble_wheel.py b/projects/hipdnn/python/scripts/assemble_wheel.py new file mode 100644 index 00000000000..046a3b448ae --- /dev/null +++ b/projects/hipdnn/python/scripts/assemble_wheel.py @@ -0,0 +1,178 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""Assemble a Python wheel from pre-built artifacts (stdlib only).""" + +import argparse +import base64 +import csv +import hashlib +import io +import os +import re +import stat +import sys +import zipfile + +_PACKAGE_NAME = "hipdnn_frontend" +_DEFAULT_VERSION = "0.1.0" + + +def _parse_so_tags(so_filename): + """Extract Python, ABI, and platform tags from a .so filename. + + Example: hipdnn_frontend_python.cpython-312-x86_64-linux-gnu.so + -> ('cp312', 'cp312', 'linux_x86_64') + """ + m = re.search( + r"\.cpython-(\d+)([a-z]*)-(.+)\.so$", + so_filename, + ) + if not m: + sys.exit(f"Cannot parse tags from filename: {so_filename}") + + ver, flags, platform_raw = m.groups() + py_tag = f"cp{ver}" + abi_tag = f"cp{ver}{flags}" + platform_tag = _gnu_triplet_to_wheel_tag(platform_raw) + return py_tag, abi_tag, platform_tag + + +def _gnu_triplet_to_wheel_tag(triplet): + """Convert a GNU triplet (from .so filename) to a PEP 425 platform tag. + + x86_64-linux-gnu -> linux_x86_64 + aarch64-linux-gnu -> linux_aarch64 + """ + parts = triplet.split("-") + if len(parts) >= 2 and parts[1] == "linux": + return f"linux_{parts[0]}" + return triplet.replace("-", "_").replace(".", "_") + + +def _hash_record(data): + """Return 'sha256=,' for a RECORD entry.""" + digest = hashlib.sha256(data).digest() + b64 = base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii") + return f"sha256={b64}", len(data) + + +def _collect_package_files(package_dir): + """Walk the pure-Python package directory and yield (arcname, filepath) pairs.""" + for dirpath, _, filenames in os.walk(package_dir): + for fname in sorted(filenames): + if fname.endswith((".pyc", "__pycache__")): + continue + filepath = os.path.join(dirpath, fname) + arcname = os.path.join( + _PACKAGE_NAME, + os.path.relpath(filepath, package_dir), + ) + yield arcname, filepath + + +def _build_metadata(version): + return ( + f"Metadata-Version: 2.1\n" + f"Name: {_PACKAGE_NAME.replace('_', '-')}\n" + f"Version: {version}\n" + f"Summary: Python bindings for the hipDNN frontend library\n" + f"Author: Advanced Micro Devices, Inc.\n" + f"License: MIT\n" + f"Requires-Python: >=3.8\n" + f"Requires-Dist: numpy>=1.19.0\n" + ) + + +def _build_wheel_metadata(py_tag, abi_tag, platform_tag): + return ( + f"Wheel-Version: 1.0\n" + f"Generator: assemble_wheel.py\n" + f"Root-Is-Purelib: false\n" + f"Tag: {py_tag}-{abi_tag}-{platform_tag}\n" + ) + + +def assemble(so_path, package_dir, output_dir, version): + so_filename = os.path.basename(so_path) + py_tag, abi_tag, platform_tag = _parse_so_tags(so_filename) + + dist_info = f"{_PACKAGE_NAME}-{version}.dist-info" + wheel_name = f"{_PACKAGE_NAME}-{version}-{py_tag}-{abi_tag}-{platform_tag}.whl" + wheel_path = os.path.join(output_dir, wheel_name) + + records = [] + + with zipfile.ZipFile(wheel_path, "w", zipfile.ZIP_DEFLATED) as whl: + # 1. Pure Python files + for arcname, filepath in _collect_package_files(package_dir): + data = open(filepath, "rb").read() + whl.writestr(arcname, data) + h, sz = _hash_record(data) + records.append((arcname, h, sz)) + + # 2. Compiled extension — use ZIP_STORED for the .so (no compression benefit) + so_arcname = f"{_PACKAGE_NAME}/{so_filename}" + so_data = open(so_path, "rb").read() + info = zipfile.ZipInfo(so_arcname) + info.compress_type = zipfile.ZIP_STORED + info.external_attr = (stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | + stat.S_IRGRP | stat.S_IXGRP | + stat.S_IROTH | stat.S_IXOTH) << 16 + whl.writestr(info, so_data) + h, sz = _hash_record(so_data) + records.append((so_arcname, h, sz)) + + # 3. dist-info/METADATA + metadata = _build_metadata(version).encode() + arcname = f"{dist_info}/METADATA" + whl.writestr(arcname, metadata) + h, sz = _hash_record(metadata) + records.append((arcname, h, sz)) + + # 4. dist-info/WHEEL + wheel_meta = _build_wheel_metadata(py_tag, abi_tag, platform_tag).encode() + arcname = f"{dist_info}/WHEEL" + whl.writestr(arcname, wheel_meta) + h, sz = _hash_record(wheel_meta) + records.append((arcname, h, sz)) + + # 5. dist-info/top_level.txt + top_level = f"{_PACKAGE_NAME}\n".encode() + arcname = f"{dist_info}/top_level.txt" + whl.writestr(arcname, top_level) + h, sz = _hash_record(top_level) + records.append((arcname, h, sz)) + + # 6. dist-info/RECORD (must be last — its own entry has no hash) + buf = io.StringIO() + writer = csv.writer(buf) + for row in records: + writer.writerow(row) + writer.writerow((f"{dist_info}/RECORD", "", "")) + record_data = buf.getvalue().encode() + whl.writestr(f"{dist_info}/RECORD", record_data) + + print(f"Wheel written: {wheel_path}") + return wheel_path + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--so-path", required=True, help="Path to compiled .so") + parser.add_argument("--package-dir", required=True, help="Path to pure-Python package dir") + parser.add_argument("--output-dir", required=True, help="Directory for output .whl") + parser.add_argument("--version", default=_DEFAULT_VERSION, help="Package version") + args = parser.parse_args() + + if not os.path.isfile(args.so_path): + sys.exit(f"Extension not found: {args.so_path}") + if not os.path.isdir(args.package_dir): + sys.exit(f"Package directory not found: {args.package_dir}") + os.makedirs(args.output_dir, exist_ok=True) + + assemble(args.so_path, args.package_dir, args.output_dir, args.version) + + +if __name__ == "__main__": + main() From fcef8827359025a5e5bd9ddb6b7ffc1cbc41b1c8 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 15:36:04 -0400 Subject: [PATCH 29/97] Apply black formatting to Python wheel scripts Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/hatch_build.py | 10 +++++++--- projects/hipdnn/python/scripts/assemble_wheel.py | 16 ++++++++++++---- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/projects/hipdnn/python/hatch_build.py b/projects/hipdnn/python/hatch_build.py index a826f3ec539..304d2e87dda 100644 --- a/projects/hipdnn/python/hatch_build.py +++ b/projects/hipdnn/python/hatch_build.py @@ -37,7 +37,9 @@ def _find_or_build_extension(self): prebuilt = os.environ.get("HIPDNN_PREBUILT_SO") if prebuilt: if not os.path.isfile(prebuilt): - raise RuntimeError(f"HIPDNN_PREBUILT_SO points to missing file: {prebuilt}") + raise RuntimeError( + f"HIPDNN_PREBUILT_SO points to missing file: {prebuilt}" + ) return prebuilt return self._compile_extension() @@ -47,8 +49,10 @@ def _compile_extension(self): cmake_args = [ "cmake", - "-S", self.root, - "-B", build_dir, + "-S", + self.root, + "-B", + build_dir, "-DCMAKE_BUILD_TYPE=Release", ] diff --git a/projects/hipdnn/python/scripts/assemble_wheel.py b/projects/hipdnn/python/scripts/assemble_wheel.py index 046a3b448ae..10f6b3d1569 100644 --- a/projects/hipdnn/python/scripts/assemble_wheel.py +++ b/projects/hipdnn/python/scripts/assemble_wheel.py @@ -116,9 +116,15 @@ def assemble(so_path, package_dir, output_dir, version): so_data = open(so_path, "rb").read() info = zipfile.ZipInfo(so_arcname) info.compress_type = zipfile.ZIP_STORED - info.external_attr = (stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | - stat.S_IRGRP | stat.S_IXGRP | - stat.S_IROTH | stat.S_IXOTH) << 16 + info.external_attr = ( + stat.S_IRUSR + | stat.S_IWUSR + | stat.S_IXUSR + | stat.S_IRGRP + | stat.S_IXGRP + | stat.S_IROTH + | stat.S_IXOTH + ) << 16 whl.writestr(info, so_data) h, sz = _hash_record(so_data) records.append((so_arcname, h, sz)) @@ -160,7 +166,9 @@ def assemble(so_path, package_dir, output_dir, version): def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--so-path", required=True, help="Path to compiled .so") - parser.add_argument("--package-dir", required=True, help="Path to pure-Python package dir") + parser.add_argument( + "--package-dir", required=True, help="Path to pure-Python package dir" + ) parser.add_argument("--output-dir", required=True, help="Directory for output .whl") parser.add_argument("--version", default=_DEFAULT_VERSION, help="Package version") args = parser.parse_args() From a4738c9b957e6e1dc6df401c00274b0faf2c5843 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 15:51:54 -0400 Subject: [PATCH 30/97] Remove unused HIPDNN_PREBUILT_SO env var from hatch build hook Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/hatch_build.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/projects/hipdnn/python/hatch_build.py b/projects/hipdnn/python/hatch_build.py index 304d2e87dda..5ba53a1acb7 100644 --- a/projects/hipdnn/python/hatch_build.py +++ b/projects/hipdnn/python/hatch_build.py @@ -18,7 +18,7 @@ class CustomBuildHook(BuildHookInterface): def initialize(self, version, build_data): package_dir = os.path.join(self.root, "hipdnn_frontend") - so_path = self._find_or_build_extension() + so_path = self._compile_extension() dest = os.path.join(package_dir, os.path.basename(so_path)) shutil.copy2(so_path, dest) build_data["shared_data"] = {"_extension_path": dest} @@ -33,16 +33,6 @@ def finalize(self, version, build_data, artifact_path): if path and os.path.isfile(path): os.remove(path) - def _find_or_build_extension(self): - prebuilt = os.environ.get("HIPDNN_PREBUILT_SO") - if prebuilt: - if not os.path.isfile(prebuilt): - raise RuntimeError( - f"HIPDNN_PREBUILT_SO points to missing file: {prebuilt}" - ) - return prebuilt - return self._compile_extension() - def _compile_extension(self): build_dir = os.path.join(self.root, "_hatch_build") os.makedirs(build_dir, exist_ok=True) From bbed27af29d29683cae2edbb2e61c72b79e90c35 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 15:55:43 -0400 Subject: [PATCH 31/97] Remove pyproject.toml and hatch_build.py The only wheel-building path is the superbuild via assemble_wheel.py. The pip install path added complexity without a real use case since standalone builds require a prior hipdnn install anyway. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/hatch_build.py | 63 --------------------------- projects/hipdnn/python/pyproject.toml | 48 -------------------- 2 files changed, 111 deletions(-) delete mode 100644 projects/hipdnn/python/hatch_build.py delete mode 100644 projects/hipdnn/python/pyproject.toml diff --git a/projects/hipdnn/python/hatch_build.py b/projects/hipdnn/python/hatch_build.py deleted file mode 100644 index 5ba53a1acb7..00000000000 --- a/projects/hipdnn/python/hatch_build.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT - -"""Hatchling custom build hook: compiles the nanobind extension or copies a pre-built one.""" - -import glob -import os -import shutil -import subprocess -import sys -import tempfile - -from hatchling.builders.hooks.plugin.interface import BuildHookInterface - - -class CustomBuildHook(BuildHookInterface): - PLUGIN_NAME = "custom" - - def initialize(self, version, build_data): - package_dir = os.path.join(self.root, "hipdnn_frontend") - so_path = self._compile_extension() - dest = os.path.join(package_dir, os.path.basename(so_path)) - shutil.copy2(so_path, dest) - build_data["shared_data"] = {"_extension_path": dest} - build_data["force_include"] = { - dest: f"hipdnn_frontend/{os.path.basename(so_path)}", - } - - def finalize(self, version, build_data, artifact_path): - if version == "editable": - return - path = (build_data.get("shared_data") or {}).get("_extension_path") - if path and os.path.isfile(path): - os.remove(path) - - def _compile_extension(self): - build_dir = os.path.join(self.root, "_hatch_build") - os.makedirs(build_dir, exist_ok=True) - - cmake_args = [ - "cmake", - "-S", - self.root, - "-B", - build_dir, - "-DCMAKE_BUILD_TYPE=Release", - ] - - prefix_path = os.environ.get("CMAKE_PREFIX_PATH") - if prefix_path: - cmake_args.append(f"-DCMAKE_PREFIX_PATH={prefix_path}") - - subprocess.check_call(cmake_args) - subprocess.check_call(["cmake", "--build", build_dir]) - - pattern = os.path.join(build_dir, "hipdnn_frontend_python*.so") - matches = glob.glob(pattern) - if not matches: - raise RuntimeError( - f"Build succeeded but no .so found matching {pattern}. " - f"Contents: {os.listdir(build_dir)}" - ) - return matches[0] diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml deleted file mode 100644 index afa70cd0835..00000000000 --- a/projects/hipdnn/python/pyproject.toml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT - -[build-system] -requires = ["hatchling>=1.21.0"] -build-backend = "hatchling.build" - -[project] -name = "hipdnn-frontend" -version = "0.1.0" -description = "Python bindings for the hipDNN frontend library" -readme = "README.md" -requires-python = ">=3.8" -license = {text = "MIT"} -authors = [ - {name = "Advanced Micro Devices, Inc."}, -] -classifiers = [ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Topic :: Scientific/Engineering", - "Topic :: Software Development :: Libraries", -] -dependencies = [ - "numpy>=1.19.0", -] - -[project.optional-dependencies] -dev = [ - "pytest>=6.0", - "numpy", -] - -[tool.hatch.build.targets.wheel] -packages = ["hipdnn_frontend"] - -[tool.hatch.build.targets.wheel.hooks.custom] -path = "hatch_build.py" - -[tool.cibuildwheel] -skip = ["pp*", "cp36-*", "cp37-*"] From aac5a05a7fedde8bbf32420cad41bf619dd1c8c1 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 16:52:33 -0400 Subject: [PATCH 32/97] Only include __init__.py and .so in Python wheel Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/scripts/assemble_wheel.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/projects/hipdnn/python/scripts/assemble_wheel.py b/projects/hipdnn/python/scripts/assemble_wheel.py index 10f6b3d1569..e39f7767075 100644 --- a/projects/hipdnn/python/scripts/assemble_wheel.py +++ b/projects/hipdnn/python/scripts/assemble_wheel.py @@ -58,17 +58,10 @@ def _hash_record(data): def _collect_package_files(package_dir): - """Walk the pure-Python package directory and yield (arcname, filepath) pairs.""" - for dirpath, _, filenames in os.walk(package_dir): - for fname in sorted(filenames): - if fname.endswith((".pyc", "__pycache__")): - continue - filepath = os.path.join(dirpath, fname) - arcname = os.path.join( - _PACKAGE_NAME, - os.path.relpath(filepath, package_dir), - ) - yield arcname, filepath + """Yield (arcname, filepath) for __init__.py only.""" + init_path = os.path.join(package_dir, "__init__.py") + if os.path.isfile(init_path): + yield f"{_PACKAGE_NAME}/__init__.py", init_path def _build_metadata(version): From fb906e129e1607f18b55f2595abdfc2b3244caeb Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 17:00:49 -0400 Subject: [PATCH 33/97] Clean up wheel assembly script and staging directory Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 2 ++ .../hipdnn/python/scripts/assemble_wheel.py | 22 ++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index a522c7b603e..ad00e10f109 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -92,4 +92,6 @@ install(CODE " if(_result) message(FATAL_ERROR \"Failed to assemble hipdnn-frontend wheel\") endif() + + file(REMOVE_RECURSE \"\${_staging}\") ") diff --git a/projects/hipdnn/python/scripts/assemble_wheel.py b/projects/hipdnn/python/scripts/assemble_wheel.py index e39f7767075..09977f53218 100644 --- a/projects/hipdnn/python/scripts/assemble_wheel.py +++ b/projects/hipdnn/python/scripts/assemble_wheel.py @@ -16,6 +16,7 @@ _PACKAGE_NAME = "hipdnn_frontend" _DEFAULT_VERSION = "0.1.0" +_REQUIRES_DIST = ["numpy>=1.19.0"] def _parse_so_tags(so_filename): @@ -57,11 +58,12 @@ def _hash_record(data): return f"sha256={b64}", len(data) -def _collect_package_files(package_dir): - """Yield (arcname, filepath) for __init__.py only.""" +def _get_init_file(package_dir): + """Return (arcname, filepath) for __init__.py, or None if missing.""" init_path = os.path.join(package_dir, "__init__.py") if os.path.isfile(init_path): - yield f"{_PACKAGE_NAME}/__init__.py", init_path + return f"{_PACKAGE_NAME}/__init__.py", init_path + return None def _build_metadata(version): @@ -73,7 +75,7 @@ def _build_metadata(version): f"Author: Advanced Micro Devices, Inc.\n" f"License: MIT\n" f"Requires-Python: >=3.8\n" - f"Requires-Dist: numpy>=1.19.0\n" + + "".join(f"Requires-Dist: {dep}\n" for dep in _REQUIRES_DIST) ) @@ -97,16 +99,20 @@ def assemble(so_path, package_dir, output_dir, version): records = [] with zipfile.ZipFile(wheel_path, "w", zipfile.ZIP_DEFLATED) as whl: - # 1. Pure Python files - for arcname, filepath in _collect_package_files(package_dir): - data = open(filepath, "rb").read() + # 1. __init__.py + init_entry = _get_init_file(package_dir) + if init_entry: + arcname, filepath = init_entry + with open(filepath, "rb") as f: + data = f.read() whl.writestr(arcname, data) h, sz = _hash_record(data) records.append((arcname, h, sz)) # 2. Compiled extension — use ZIP_STORED for the .so (no compression benefit) so_arcname = f"{_PACKAGE_NAME}/{so_filename}" - so_data = open(so_path, "rb").read() + with open(so_path, "rb") as f: + so_data = f.read() info = zipfile.ZipInfo(so_arcname) info.compress_type = zipfile.ZIP_STORED info.external_attr = ( From 1cb33009dbe27dc868be3746b6d99f9c0968f294 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 15 May 2026 18:43:09 -0400 Subject: [PATCH 34/97] goes with hatch build --- projects/hipdnn/.gitignore | 2 + projects/hipdnn/python/CMakeLists.txt | 95 +++++++++----------- projects/hipdnn/python/README.md | 2 +- projects/hipdnn/python/_hatch_build.py | 30 +++++++ projects/hipdnn/python/build_python_wheel.py | 78 ++++++++++++++++ projects/hipdnn/python/pyproject.toml | 33 ++----- 6 files changed, 160 insertions(+), 80 deletions(-) create mode 100644 projects/hipdnn/python/_hatch_build.py create mode 100755 projects/hipdnn/python/build_python_wheel.py diff --git a/projects/hipdnn/.gitignore b/projects/hipdnn/.gitignore index 13d7a28d5c6..72ed23cbe77 100644 --- a/projects/hipdnn/.gitignore +++ b/projects/hipdnn/.gitignore @@ -77,3 +77,5 @@ __pycache__/ *.egg-info/ *.profraw + +_hatch_build diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 68017db1a39..6afc534b82c 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -1,16 +1,15 @@ # Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT +# SPDX-License-Identifier: MIT cmake_minimum_required(VERSION 3.18) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -project(hipdnn_python_bindings) +project(hipdnn_python_bindings LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -# Find Python and nanobind find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) if(NOT COMMAND hipdnn_add_dependency) @@ -20,24 +19,21 @@ endif() hipdnn_add_dependency(tsl-robin-map VERSION ${HIPDNN_TSL_ROBIN_MAP_VERSION}) hipdnn_add_dependency(nanobind VERSION ${HIPDNN_NANOBIND_VERSION}) -# Find hipDNN frontend headers set(HIPDNN_FRONTEND_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../frontend/include" CACHE PATH "Path to hipDNN frontend includes" ) -# Find HIP (if needed) find_package(hip REQUIRED) -# When built as a subdirectory of hipdnn, these targets already exist. -# When built standalone (pip wheel / pip install), find the installed libraries. +# When built as a subdirectory of hipDNN, these targets already exist. +# When built standalone, find them from the staged/install prefix. if(NOT TARGET hipdnn_frontend) find_package(hipdnn_frontend CONFIG REQUIRED) endif() if(NOT TARGET hipdnn_backend) - find_package(hipdnn_backend REQUIRED) + find_package(hipdnn_backend CONFIG REQUIRED) endif() -# Create the Python module nanobind_add_module( hipdnn_frontend_python src/module.cpp @@ -49,49 +45,44 @@ nanobind_add_module( src/memory_bindings.cpp ) -# Include directories target_include_directories( - hipdnn_frontend_python PRIVATE ${HIPDNN_FRONTEND_INCLUDE_DIR} ${HIP_INCLUDE_DIRS} + hipdnn_frontend_python + PRIVATE + ${HIPDNN_FRONTEND_INCLUDE_DIR} + ${HIP_INCLUDE_DIRS} ) -set_target_properties(hipdnn_frontend_python PROPERTIES INSTALL_RPATH_USE_LINK_PATH TRUE) - -# Link against hipDNN frontend library if it exists as a separate library Adjust this based on your -# actual library structure -target_link_libraries(hipdnn_frontend_python PRIVATE hipdnn_frontend hipdnn_backend hip::host) - -# ============================================================================ -# Installation -# ============================================================================ - -# SKBUILD is set by scikit-build-core when building a wheel. -if(DEFINED SKBUILD) - # pip install . / pip wheel . → scikit-build-core packages the extension into a wheel - install(TARGETS hipdnn_frontend_python DESTINATION .) -else() - # cmake --install → calls pip wheel to build .whl, stages it in share/hipdnn/wheels/ - install(CODE " - set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") - file(MAKE_DIRECTORY \"\${_wheel_dir}\") - - message(STATUS \"Building hipdnn-frontend wheel\") - execute_process( - COMMAND \"${Python_EXECUTABLE}\" -m pip wheel --no-deps - -w \"\${_wheel_dir}\" - \"${CMAKE_CURRENT_SOURCE_DIR}\" - -C \"cmake.define.CMAKE_PREFIX_PATH=\${CMAKE_INSTALL_PREFIX}\" - -C \"cmake.define.nanobind_DIR=${nanobind_DIR}\" - -C \"cmake.define.tsl-robin-map_DIR=${tsl-robin-map_DIR}\" - -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_NANOBIND=${nanobind_SOURCE_DIR}\" - -C \"cmake.define.FETCHCONTENT_SOURCE_DIR_TSL-ROBIN-MAP=${tsl-robin-map_SOURCE_DIR}\" - -C \"cmake.define.hip_DIR=${hip_DIR}\" - -C \"cmake.define.AMDDeviceLibs_DIR=${AMDDeviceLibs_DIR}\" - -C \"cmake.define.amd_comgr_DIR=${amd_comgr_DIR}\" - -C \"cmake.define.hsa-runtime64_DIR=${hsa-runtime64_DIR}\" - RESULT_VARIABLE _result - ) - if(_result) - message(FATAL_ERROR \"Failed to build hipdnn-frontend wheel\") - endif() - ") -endif() +target_link_libraries( + hipdnn_frontend_python + PRIVATE + hipdnn_frontend + hipdnn_backend + hip::host +) + +set_target_properties( + hipdnn_frontend_python + PROPERTIES + INSTALL_RPATH_USE_LINK_PATH TRUE +) + +# Stage the extension where the wheel build hook can find it. +install(TARGETS hipdnn_frontend_python DESTINATION lib/hipdnn/python) + +install(CODE " + set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") + file(MAKE_DIRECTORY \"\${_wheel_dir}\") + + message(STATUS \"Building hipdnn-frontend wheel\") + execute_process( + COMMAND \"${Python_EXECUTABLE}\" + \"${CMAKE_CURRENT_SOURCE_DIR}/build_python_wheel.py\" + --source-dir \"${CMAKE_CURRENT_SOURCE_DIR}\" + --ext-dir \"\${CMAKE_INSTALL_PREFIX}/lib/hipdnn/python\" + --wheel-dir \"\${_wheel_dir}\" + RESULT_VARIABLE _result + ) + if(_result) + message(FATAL_ERROR \"Failed to build hipdnn-frontend wheel\") + endif() +") diff --git a/projects/hipdnn/python/README.md b/projects/hipdnn/python/README.md index be168dd4863..c9664f563c0 100644 --- a/projects/hipdnn/python/README.md +++ b/projects/hipdnn/python/README.md @@ -61,7 +61,7 @@ pip install --upgrade pip ### 2. Building and Installing the Python Bindings -The Python bindings use scikit-build to handle the CMake build process automatically through pip: +The Python bindings use hatchling to handle the build process automatically through pip: ```bash # Navigate to the hipdnn python directory diff --git a/projects/hipdnn/python/_hatch_build.py b/projects/hipdnn/python/_hatch_build.py new file mode 100644 index 00000000000..24f646a9ba8 --- /dev/null +++ b/projects/hipdnn/python/_hatch_build.py @@ -0,0 +1,30 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +import glob +import os +from pathlib import Path + +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + + +class CustomBuildHook(BuildHookInterface): + """Copy a pre-built nanobind extension into the wheel.""" + + def initialize(self, version: str, build_data: dict) -> None: + ext_dir = os.environ.get("HIPDNN_EXT_DIR", "") + if not ext_dir: + return + + ext_path = Path(ext_dir) + extensions = glob.glob(str(ext_path / "hipdnn_frontend_python*")) + if not extensions: + raise RuntimeError( + f"No hipdnn_frontend_python extension found in {ext_path}" + ) + + for ext in extensions: + name = Path(ext).name + build_data["force_include"][ext] = f"hipdnn_frontend/{name}" diff --git a/projects/hipdnn/python/build_python_wheel.py b/projects/hipdnn/python/build_python_wheel.py new file mode 100755 index 00000000000..3cb0ea6e060 --- /dev/null +++ b/projects/hipdnn/python/build_python_wheel.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +import argparse +import os +from pathlib import Path +import shutil +import subprocess +import sys + + +def main() -> int: + parser = argparse.ArgumentParser(description="Build the hipDNN frontend Python wheel.") + parser.add_argument( + "--source-dir", + type=Path, + required=True, + help="Path to projects/hipdnn/python", + ) + parser.add_argument( + "--ext-dir", + type=Path, + required=True, + help="Directory containing the pre-built hipdnn_frontend_python extension", + ) + parser.add_argument( + "--wheel-dir", + type=Path, + required=True, + help="Directory where the built wheel will be written", + ) + args = parser.parse_args() + + source_dir = args.source_dir.resolve() + ext_dir = args.ext_dir.resolve() + wheel_dir = args.wheel_dir.resolve() + + if not (source_dir / "pyproject.toml").exists(): + raise RuntimeError(f"Missing pyproject.toml in {source_dir}") + + import glob + + if not glob.glob(str(ext_dir / "hipdnn_frontend_python*")): + raise RuntimeError(f"No hipdnn_frontend_python extension found in {ext_dir}") + + shutil.rmtree(wheel_dir, ignore_errors=True) + wheel_dir.mkdir(parents=True, exist_ok=True) + + env = os.environ.copy() + env["HIPDNN_EXT_DIR"] = str(ext_dir) + + cmd = [ + sys.executable, + "-m", + "build", + "--wheel", + "--no-isolation", + "--outdir", + str(wheel_dir), + str(source_dir), + ] + + print("::: Building hipdnn-frontend wheel") + print(f"::: HIPDNN_EXT_DIR={ext_dir}") + print("::: " + " ".join(cmd)) + subprocess.check_call(cmd, cwd=source_dir, env=env) + + wheels = sorted(wheel_dir.glob("hipdnn_frontend-*.whl")) + if not wheels: + raise RuntimeError(f"No hipdnn_frontend wheel produced in {wheel_dir}") + + print(f"::: Built wheel: {wheels[-1]}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index 107df61ed8d..cbc83e647e9 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -2,12 +2,8 @@ # SPDX-License-Identifier: MIT [build-system] -requires = [ - "scikit-build-core>=0.4.3", - "nanobind>=2.12.0", - "cmake>=3.18", -] -build-backend = "scikit_build_core.build" +requires = ["hatchling"] +build-backend = "hatchling.build" [project] name = "hipdnn-frontend" @@ -42,28 +38,11 @@ dev = [ "numpy", ] -[tool.scikit-build] -# Specify the CMake source directory -cmake.source-dir = "." -# Build directory -build-dir = "build" -# Minimum CMake version -cmake.minimum-version = "3.18" -# Build type -cmake.build-type = "Release" -# Where the Python module will be installed -wheel.install-dir = "hipdnn_frontend" -# Package data -wheel.packages = ["hipdnn_frontend"] - -[tool.scikit-build.cmake.define] -CMAKE_PREFIX_PATH = "${CMAKE_PREFIX_PATH}" +[tool.hatch.build] +packages = ["hipdnn_frontend"] -hip_DIR = "${hip_DIR}" -hipdnn_frontend_DIR = "${hipdnn_frontend_DIR}" -hipdnn_backend_DIR = "${hipdnn_backend_DIR}" -hipdnn_data_sdk_DIR = "${hipdnn_data_sdk_DIR}" +[tool.hatch.build.hooks.custom] +path = "_hatch_build.py" [tool.cibuildwheel] -# Skip building for certain Python versions or platforms if needed skip = ["pp*", "cp36-*", "cp37-*"] From 3719e2a88eab9dc7533dcb318cf7ebb57eac9fac Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 10:59:02 -0400 Subject: [PATCH 35/97] Install hatchling in CI and fix black formatting Add hatchling to the pip install step in both Linux and Windows TheRock CI workflows so the wheel build backend is available for --no-isolation builds. Fix black formatting in build_python_wheel.py. Co-Authored-By: Claude Opus 4 --- .github/workflows/therock-ci-linux.yml | 1 + .github/workflows/therock-ci-windows.yml | 1 + projects/hipdnn/python/build_python_wheel.py | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index 9cee5727b5a..944b707e8d5 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -48,6 +48,7 @@ jobs: - name: Install python deps run: | pip install -r TheRock/requirements.txt + pip install hatchling - name: Patch rocm-libraries run: | diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index 012025cf14a..cc6a4fa39a6 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -57,6 +57,7 @@ jobs: - name: Install python deps run: | pip install -r TheRock/requirements.txt + pip install hatchling - name: Patch rocm-libraries run: | diff --git a/projects/hipdnn/python/build_python_wheel.py b/projects/hipdnn/python/build_python_wheel.py index 3cb0ea6e060..fe5e03797ca 100755 --- a/projects/hipdnn/python/build_python_wheel.py +++ b/projects/hipdnn/python/build_python_wheel.py @@ -11,7 +11,9 @@ def main() -> int: - parser = argparse.ArgumentParser(description="Build the hipDNN frontend Python wheel.") + parser = argparse.ArgumentParser( + description="Build the hipDNN frontend Python wheel." + ) parser.add_argument( "--source-dir", type=Path, From 494766ea3998e7a46c510fa244881bb6bcfd2a67 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 12:06:58 -0400 Subject: [PATCH 36/97] Replace hatchling with setuptools for hipdnn-frontend wheel build Removes hatchling as a build dependency in favor of setuptools, which is bundled with pip and requires no additional installation. Co-Authored-By: Claude Opus 4 --- .github/workflows/therock-ci-linux.yml | 1 - .github/workflows/therock-ci-windows.yml | 1 - projects/hipdnn/.gitignore | 2 +- projects/hipdnn/python/README.md | 2 +- projects/hipdnn/python/pyproject.toml | 11 ++++------ .../python/{_hatch_build.py => setup.py} | 21 +++++++++++++------ 6 files changed, 21 insertions(+), 17 deletions(-) rename projects/hipdnn/python/{_hatch_build.py => setup.py} (54%) diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index 944b707e8d5..9cee5727b5a 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -48,7 +48,6 @@ jobs: - name: Install python deps run: | pip install -r TheRock/requirements.txt - pip install hatchling - name: Patch rocm-libraries run: | diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index cc6a4fa39a6..012025cf14a 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -57,7 +57,6 @@ jobs: - name: Install python deps run: | pip install -r TheRock/requirements.txt - pip install hatchling - name: Patch rocm-libraries run: | diff --git a/projects/hipdnn/.gitignore b/projects/hipdnn/.gitignore index 72ed23cbe77..152f3a939de 100644 --- a/projects/hipdnn/.gitignore +++ b/projects/hipdnn/.gitignore @@ -78,4 +78,4 @@ __pycache__/ *.profraw -_hatch_build + diff --git a/projects/hipdnn/python/README.md b/projects/hipdnn/python/README.md index c9664f563c0..ef17bdacfbf 100644 --- a/projects/hipdnn/python/README.md +++ b/projects/hipdnn/python/README.md @@ -61,7 +61,7 @@ pip install --upgrade pip ### 2. Building and Installing the Python Bindings -The Python bindings use hatchling to handle the build process automatically through pip: +The Python bindings use setuptools to handle the build process automatically through pip: ```bash # Navigate to the hipdnn python directory diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index cbc83e647e9..c8467d9b60c 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -2,8 +2,8 @@ # SPDX-License-Identifier: MIT [build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" +requires = ["setuptools"] +build-backend = "setuptools.build_meta" [project] name = "hipdnn-frontend" @@ -38,11 +38,8 @@ dev = [ "numpy", ] -[tool.hatch.build] -packages = ["hipdnn_frontend"] - -[tool.hatch.build.hooks.custom] -path = "_hatch_build.py" +[tool.setuptools.packages.find] +include = ["hipdnn_frontend*"] [tool.cibuildwheel] skip = ["pp*", "cp36-*", "cp37-*"] diff --git a/projects/hipdnn/python/_hatch_build.py b/projects/hipdnn/python/setup.py similarity index 54% rename from projects/hipdnn/python/_hatch_build.py rename to projects/hipdnn/python/setup.py index 24f646a9ba8..8acb4e22f9a 100644 --- a/projects/hipdnn/python/_hatch_build.py +++ b/projects/hipdnn/python/setup.py @@ -5,15 +5,19 @@ import glob import os +import shutil from pathlib import Path -from hatchling.builders.hooks.plugin.interface import BuildHookInterface +from setuptools import setup +from setuptools.command.build_py import build_py -class CustomBuildHook(BuildHookInterface): - """Copy a pre-built nanobind extension into the wheel.""" +class BuildPyWithExtension(build_py): + """Copy pre-built nanobind extension into the package before building.""" + + def run(self): + super().run() - def initialize(self, version: str, build_data: dict) -> None: ext_dir = os.environ.get("HIPDNN_EXT_DIR", "") if not ext_dir: return @@ -25,6 +29,11 @@ def initialize(self, version: str, build_data: dict) -> None: f"No hipdnn_frontend_python extension found in {ext_path}" ) + pkg_dir = Path(self.build_lib) / "hipdnn_frontend" + pkg_dir.mkdir(parents=True, exist_ok=True) + for ext in extensions: - name = Path(ext).name - build_data["force_include"][ext] = f"hipdnn_frontend/{name}" + shutil.copy2(ext, pkg_dir / Path(ext).name) + + +setup(cmdclass={"build_py": BuildPyWithExtension}) From 43c8f00857eb0ceba18efbc44f01003c327db9dd Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 12:27:02 -0400 Subject: [PATCH 37/97] Fix trailing newline in .gitignore for pre-commit Co-Authored-By: Claude Opus 4 --- projects/hipdnn/.gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/hipdnn/.gitignore b/projects/hipdnn/.gitignore index 152f3a939de..ca4baeea9ef 100644 --- a/projects/hipdnn/.gitignore +++ b/projects/hipdnn/.gitignore @@ -78,4 +78,3 @@ __pycache__/ *.profraw - From 8761fdd39bd9bc22d49c8631140e73eaa81b915c Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 12:40:02 -0400 Subject: [PATCH 38/97] Error when HIPDNN_EXT_DIR is not set instead of building broken wheel Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/projects/hipdnn/python/setup.py b/projects/hipdnn/python/setup.py index 8acb4e22f9a..73d6bf9969b 100644 --- a/projects/hipdnn/python/setup.py +++ b/projects/hipdnn/python/setup.py @@ -20,7 +20,11 @@ def run(self): ext_dir = os.environ.get("HIPDNN_EXT_DIR", "") if not ext_dir: - return + raise RuntimeError( + "HIPDNN_EXT_DIR environment variable is not set. " + "It must point to the directory containing the pre-built " + "hipdnn_frontend_python extension." + ) ext_path = Path(ext_dir) extensions = glob.glob(str(ext_path / "hipdnn_frontend_python*")) From cc3e3f72d7119540459d73670238c010027a71c0 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 12:41:23 -0400 Subject: [PATCH 39/97] Fix .gitignore trailing newline (take 2) Co-Authored-By: Claude Opus 4 --- projects/hipdnn/.gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/hipdnn/.gitignore b/projects/hipdnn/.gitignore index ca4baeea9ef..13d7a28d5c6 100644 --- a/projects/hipdnn/.gitignore +++ b/projects/hipdnn/.gitignore @@ -77,4 +77,3 @@ __pycache__/ *.egg-info/ *.profraw - From 99522934267e4f02a3d2287a3834b735996e53f4 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 12:59:43 -0400 Subject: [PATCH 40/97] Remove standalone build guards from python CMakeLists.txt Python bindings are always built as a subdirectory of hipDNN, so the fallback includes and find_package calls for standalone builds are unnecessary. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 6afc534b82c..1a1f7221e43 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -12,10 +12,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) -if(NOT COMMAND hipdnn_add_dependency) - include("${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Dependencies.cmake") -endif() - hipdnn_add_dependency(tsl-robin-map VERSION ${HIPDNN_TSL_ROBIN_MAP_VERSION}) hipdnn_add_dependency(nanobind VERSION ${HIPDNN_NANOBIND_VERSION}) @@ -25,15 +21,6 @@ set(HIPDNN_FRONTEND_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../frontend/include find_package(hip REQUIRED) -# When built as a subdirectory of hipDNN, these targets already exist. -# When built standalone, find them from the staged/install prefix. -if(NOT TARGET hipdnn_frontend) - find_package(hipdnn_frontend CONFIG REQUIRED) -endif() -if(NOT TARGET hipdnn_backend) - find_package(hipdnn_backend CONFIG REQUIRED) -endif() - nanobind_add_module( hipdnn_frontend_python src/module.cpp From b2bca9832ba177afc6195a8571f3d9e39432c323 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 13:04:45 -0400 Subject: [PATCH 41/97] Remove redundant find_package(hip) from python CMakeLists.txt Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 1a1f7221e43..1ecf599c865 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -19,8 +19,6 @@ set(HIPDNN_FRONTEND_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../frontend/include CACHE PATH "Path to hipDNN frontend includes" ) -find_package(hip REQUIRED) - nanobind_add_module( hipdnn_frontend_python src/module.cpp From cc84fed28eaa0d4e1c41c3a3f95a6fe1c2761a4c Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 13:09:40 -0400 Subject: [PATCH 42/97] Remove redundant include dirs and hip::host from python CMakeLists.txt hipdnn_frontend and hipdnn_backend export their include directories and transitively provide hip::host, so explicit references are unnecessary. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 1ecf599c865..7344b3f7520 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -15,10 +15,6 @@ find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) hipdnn_add_dependency(tsl-robin-map VERSION ${HIPDNN_TSL_ROBIN_MAP_VERSION}) hipdnn_add_dependency(nanobind VERSION ${HIPDNN_NANOBIND_VERSION}) -set(HIPDNN_FRONTEND_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../frontend/include" - CACHE PATH "Path to hipDNN frontend includes" -) - nanobind_add_module( hipdnn_frontend_python src/module.cpp @@ -30,19 +26,11 @@ nanobind_add_module( src/memory_bindings.cpp ) -target_include_directories( - hipdnn_frontend_python - PRIVATE - ${HIPDNN_FRONTEND_INCLUDE_DIR} - ${HIP_INCLUDE_DIRS} -) - target_link_libraries( hipdnn_frontend_python PRIVATE hipdnn_frontend hipdnn_backend - hip::host ) set_target_properties( From 2c4aada6acef98d2f6077f7285c5eabc794551c5 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 13:57:16 -0400 Subject: [PATCH 43/97] Exclude samples and tests from hipdnn-frontend wheel Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index c8467d9b60c..e43c2164cb9 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -39,7 +39,7 @@ dev = [ ] [tool.setuptools.packages.find] -include = ["hipdnn_frontend*"] +include = ["hipdnn_frontend"] [tool.cibuildwheel] skip = ["pp*", "cp36-*", "cp37-*"] From 8598e96451a84bbf4a27907fdb9fe8a0dcf8acdf Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 17:54:23 -0400 Subject: [PATCH 44/97] Fix Python binding tests for updated API and pytest import rules Move helper functions (build_conv_fprop_graph, execute_graph) from conftest.py to helpers.py since pytest does not support importing from conftest as a regular module. Update tests for Tensor.create() API changes: UIDs are no longer auto-assigned, and Graph.tensor(attrs) was removed. Register custom pytest markers to suppress warnings. Co-Authored-By: Claude Opus 4 --- .../python/hipdnn_frontend/test/conftest.py | 81 ----------------- .../python/hipdnn_frontend/test/helpers.py | 88 +++++++++++++++++++ .../hipdnn_frontend/test/test_conv_dgrad.py | 2 +- .../hipdnn_frontend/test/test_conv_fprop.py | 2 +- .../hipdnn_frontend/test/test_conv_wgrad.py | 2 +- .../hipdnn_frontend/test/test_graph_api.py | 12 +-- .../hipdnn_frontend/test/test_tensor_api.py | 8 +- projects/hipdnn/python/pyproject.toml | 6 ++ 8 files changed, 107 insertions(+), 94 deletions(-) create mode 100644 projects/hipdnn/python/hipdnn_frontend/test/helpers.py diff --git a/projects/hipdnn/python/hipdnn_frontend/test/conftest.py b/projects/hipdnn/python/hipdnn_frontend/test/conftest.py index 9d46ca71e13..6acb1bc0037 100644 --- a/projects/hipdnn/python/hipdnn_frontend/test/conftest.py +++ b/projects/hipdnn/python/hipdnn_frontend/test/conftest.py @@ -3,7 +3,6 @@ """Shared pytest fixtures for hipDNN Python binding tests.""" -import numpy as np import pytest import hipdnn_frontend as hipdnn @@ -23,83 +22,3 @@ def graph(): g.set_intermediate_data_type(hipdnn.DataType.FLOAT) g.set_compute_data_type(hipdnn.DataType.FLOAT) return g - - -def build_conv_fprop_graph( - graph, - n=16, - c=16, - h=16, - w=16, - k=16, - r=3, - s=3, - stride=1, - pad=1, - dilation=1, -): - """Build a complete convolution forward propagation graph. - - Returns: - Tuple of (graph, x_tensor, weight_tensor, y_tensor, out_h, out_w). - """ - out_h = (h + 2 * pad - dilation * (r - 1) - 1) // stride + 1 - out_w = (w + 2 * pad - dilation * (s - 1) - 1) // stride + 1 - - graph.set_name("conv_fprop_test") - - x = hipdnn.Tensor.create([n, c, h, w], hipdnn.DataType.FLOAT) - x.set_name("input_x") - - weight = hipdnn.Tensor.create([k, c, r, s], hipdnn.DataType.FLOAT) - weight.set_name("weight") - - conv_attrs = hipdnn.ConvFpropAttributes() - conv_attrs.set_name("conv_fprop_node") - conv_attrs.set_padding([pad, pad]) - conv_attrs.set_stride([stride, stride]) - conv_attrs.set_dilation([dilation, dilation]) - - y = graph.conv_fprop(x, weight, conv_attrs) - y.set_name("output_y") - y.set_output(True) - - return graph, x, weight, y, out_h, out_w - - -def execute_graph(graph, handle, tensor_uid_to_data): - """Execute a graph with the given tensor data. - - Args: - graph: A fully-built hipDNN graph (validated, built, plans created). - handle: A hipDNN handle. - tensor_uid_to_data: Dict mapping tensor UIDs to numpy arrays. - Output tensors should have zero-initialized arrays. - - Returns: - Dict mapping tensor UIDs to result numpy arrays (copied from device). - """ - buffers = {} - variant_pack = {} - for uid, data in tensor_uid_to_data.items(): - buf = hipdnn.DeviceBuffer(data.nbytes) - buf.copy_from_host(data.tobytes()) - buffers[uid] = (buf, data.shape, data.dtype) - variant_pack[uid] = buf.ptr() - - workspace_size = graph.get_workspace_size() - workspace_buffer = None - workspace_ptr = 0 - if workspace_size > 0: - workspace_buffer = hipdnn.DeviceBuffer(workspace_size) - workspace_ptr = workspace_buffer.ptr() - - exec_result = graph.execute(handle, variant_pack, workspace_ptr) - assert exec_result.is_good(), f"Graph execution failed: {exec_result.get_message()}" - - results = {} - for uid, (buf, shape, dtype) in buffers.items(): - host_bytes = buf.copy_to_host() - results[uid] = np.frombuffer(host_bytes, dtype=dtype).reshape(shape) - - return results diff --git a/projects/hipdnn/python/hipdnn_frontend/test/helpers.py b/projects/hipdnn/python/hipdnn_frontend/test/helpers.py new file mode 100644 index 00000000000..f35b34ed79e --- /dev/null +++ b/projects/hipdnn/python/hipdnn_frontend/test/helpers.py @@ -0,0 +1,88 @@ +# Copyright © Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +"""Shared helper functions for hipDNN Python binding tests.""" + +import numpy as np + +import hipdnn_frontend as hipdnn + + +def build_conv_fprop_graph( + graph, + n=16, + c=16, + h=16, + w=16, + k=16, + r=3, + s=3, + stride=1, + pad=1, + dilation=1, +): + """Build a complete convolution forward propagation graph. + + Returns: + Tuple of (graph, x_tensor, weight_tensor, y_tensor, out_h, out_w). + """ + out_h = (h + 2 * pad - dilation * (r - 1) - 1) // stride + 1 + out_w = (w + 2 * pad - dilation * (s - 1) - 1) // stride + 1 + + graph.set_name("conv_fprop_test") + + x = hipdnn.Tensor.create([n, c, h, w], hipdnn.DataType.FLOAT) + x.set_name("input_x") + + weight = hipdnn.Tensor.create([k, c, r, s], hipdnn.DataType.FLOAT) + weight.set_name("weight") + + conv_attrs = hipdnn.ConvFpropAttributes() + conv_attrs.set_name("conv_fprop_node") + conv_attrs.set_padding([pad, pad]) + conv_attrs.set_stride([stride, stride]) + conv_attrs.set_dilation([dilation, dilation]) + + y = graph.conv_fprop(x, weight, conv_attrs) + y.set_name("output_y") + y.set_output(True) + + return graph, x, weight, y, out_h, out_w + + +def execute_graph(graph, handle, tensor_uid_to_data): + """Execute a graph with the given tensor data. + + Args: + graph: A fully-built hipDNN graph (validated, built, plans created). + handle: A hipDNN handle. + tensor_uid_to_data: Dict mapping tensor UIDs to numpy arrays. + Output tensors should have zero-initialized arrays. + + Returns: + Dict mapping tensor UIDs to result numpy arrays (copied from device). + """ + buffers = {} + variant_pack = {} + for uid, data in tensor_uid_to_data.items(): + buf = hipdnn.DeviceBuffer(data.nbytes) + buf.copy_from_host(data.tobytes()) + buffers[uid] = (buf, data.shape, data.dtype) + variant_pack[uid] = buf.ptr() + + workspace_size = graph.get_workspace_size() + workspace_buffer = None + workspace_ptr = 0 + if workspace_size > 0: + workspace_buffer = hipdnn.DeviceBuffer(workspace_size) + workspace_ptr = workspace_buffer.ptr() + + exec_result = graph.execute(handle, variant_pack, workspace_ptr) + assert exec_result.is_good(), f"Graph execution failed: {exec_result.get_message()}" + + results = {} + for uid, (buf, shape, dtype) in buffers.items(): + host_bytes = buf.copy_to_host() + results[uid] = np.frombuffer(host_bytes, dtype=dtype).reshape(shape) + + return results diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_dgrad.py b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_dgrad.py index d962064d0e5..76c5752fe07 100644 --- a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_dgrad.py +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_dgrad.py @@ -8,7 +8,7 @@ import hipdnn_frontend as hipdnn -from conftest import execute_graph +from .helpers import execute_graph # Dimensions used across tests N, C, H, W = 16, 16, 16, 16 diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py index f3a111cbb5f..7a17020b04c 100644 --- a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py @@ -8,7 +8,7 @@ import hipdnn_frontend as hipdnn -from conftest import build_conv_fprop_graph, execute_graph +from .helpers import build_conv_fprop_graph, execute_graph # Dimensions used across tests N, C, H, W = 16, 16, 16, 16 diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_wgrad.py b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_wgrad.py index 03ea6721401..85fbc556947 100644 --- a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_wgrad.py +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_wgrad.py @@ -8,7 +8,7 @@ import hipdnn_frontend as hipdnn -from conftest import execute_graph +from .helpers import execute_graph # Dimensions used across tests N, C, H, W = 16, 16, 16, 16 diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_graph_api.py b/projects/hipdnn/python/hipdnn_frontend/test/test_graph_api.py index a64f9f8704d..397da4a41e1 100644 --- a/projects/hipdnn/python/hipdnn_frontend/test/test_graph_api.py +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_graph_api.py @@ -50,18 +50,14 @@ class TestGraphTensorCreation: """Tests for creating tensors via the Graph API.""" def test_graph_tensor_creation(self): - """graph.tensor() creates a shared tensor from attributes.""" - g = hipdnn.Graph() - - attrs = hipdnn.Tensor() - attrs.set_dim([2, 3, 4]) - attrs.set_data_type(hipdnn.DataType.FLOAT) - attrs.set_stride([12, 4, 1]) + """Tensor.create() produces a tensor with expected dims and dtype.""" + t = hipdnn.Tensor.create([2, 3, 4], hipdnn.DataType.FLOAT) + t.set_stride([12, 4, 1]) - t = g.tensor(attrs) assert t is not None assert t.get_dim() == [2, 3, 4] assert t.get_data_type() == hipdnn.DataType.FLOAT + assert t.get_stride() == [12, 4, 1] def test_graph_tensor_like(self): """Graph.tensor_like() creates a tensor with matching dims but new uid.""" diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_tensor_api.py b/projects/hipdnn/python/hipdnn_frontend/test/test_tensor_api.py index 150e6e91c84..1342b296b85 100644 --- a/projects/hipdnn/python/hipdnn_frontend/test/test_tensor_api.py +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_tensor_api.py @@ -20,10 +20,14 @@ def test_tensor_create_sets_data_type(self): t = hipdnn.Tensor.create([1, 2, 3], hipdnn.DataType.FLOAT) assert t.get_data_type() == hipdnn.DataType.FLOAT - def test_tensor_uid_is_assigned(self): - """Each tensor receives a unique auto-assigned uid.""" + def test_tensor_uid_is_not_auto_assigned(self): + """Tensor.create() does not auto-assign a uid; manual set_uid works.""" t1 = hipdnn.Tensor.create([1, 2], hipdnn.DataType.FLOAT) + assert not t1.has_uid() + + t1.set_uid(1) t2 = hipdnn.Tensor.create([3, 4], hipdnn.DataType.FLOAT) + t2.set_uid(2) assert t1.get_uid() != t2.get_uid() diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index e43c2164cb9..c1af6fc184f 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -41,5 +41,11 @@ dev = [ [tool.setuptools.packages.find] include = ["hipdnn_frontend"] +[tool.pytest.ini_options] +markers = [ + "gpu: tests that require GPU hardware", + "integration: end-to-end integration tests", +] + [tool.cibuildwheel] skip = ["pp*", "cp36-*", "cp37-*"] From 338948cac124a21659983662d8a327efd7e998fa Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 17:54:33 -0400 Subject: [PATCH 45/97] Add GitHub Actions workflow for Python binding tests Run pytest suite on GPU-equipped runner with ROCm and miopen-provider built from source. Triggers on pushes to develop/release branches and PRs touching projects/hipdnn/. Co-Authored-By: Claude Opus 4 --- .github/workflows/hipdnn-python-tests.yml | 72 +++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 .github/workflows/hipdnn-python-tests.yml diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml new file mode 100644 index 00000000000..806ace25d3a --- /dev/null +++ b/.github/workflows/hipdnn-python-tests.yml @@ -0,0 +1,72 @@ +name: hipdnn-python-tests + +on: + push: + branches: + - develop + - release/therock-* + paths: + - 'projects/hipdnn/**' + - '.github/workflows/hipdnn-python-tests.yml' + pull_request: + paths: + - 'projects/hipdnn/**' + - '.github/workflows/hipdnn-python-tests.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + python-tests: + runs-on: azure-linux-scale-rocm + steps: + - name: Checkout rocm-libraries + uses: actions/checkout@v6 + + - name: Checkout TheRock + uses: actions/checkout@v6 + with: + repository: ROCm/TheRock + path: TheRock + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + ninja-build \ + python3-venv \ + python3-dev + python3 -m venv .venv + . .venv/bin/activate + pip install boto3 cmake numpy pytest + pip install -r TheRock/requirements.txt + + - name: Install ROCm + run: | + sudo .venv/bin/python3 TheRock/build_tools/install_rocm_from_artifacts.py \ + --latest-release \ + --amdgpu-family gfx94X-dcgpu \ + --output-dir /opt/rocm \ + --base-only + + - name: Build hipDNN with Python bindings and miopen-provider + run: | + . .venv/bin/activate + export PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH + export CXX=/opt/rocm/llvm/bin/clang++ + cmake -B build -GNinja \ + -DROCM_LIBS_ENABLE_COMPONENTS="hipdnn;miopen-provider" \ + -DROCM_PATH=/opt/rocm \ + -DCMAKE_PREFIX_PATH=/opt/rocm \ + -DHIP_PLATFORM=amd + ninja -C build hipdnn_frontend_python libmiopen_plugin.so + + - name: Run Python binding tests + run: | + . .venv/bin/activate + PYTHONPATH=projects/hipdnn/python:build/lib \ + LD_LIBRARY_PATH=build/lib:/opt/rocm/lib \ + HIPDNN_PLUGIN_PATH=build/lib/hipdnn_plugins/engines \ + pytest projects/hipdnn/python/hipdnn_frontend/test/ -v From ba71d4e560edb05d62ed20671c442b743f74fc87 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 17:57:31 -0400 Subject: [PATCH 46/97] Default Python bindings to OFF and enable explicitly in CI Co-Authored-By: Claude Opus 4 --- .github/workflows/hipdnn-python-tests.yml | 1 + projects/hipdnn/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml index 806ace25d3a..c1b5069f765 100644 --- a/.github/workflows/hipdnn-python-tests.yml +++ b/.github/workflows/hipdnn-python-tests.yml @@ -58,6 +58,7 @@ jobs: export CXX=/opt/rocm/llvm/bin/clang++ cmake -B build -GNinja \ -DROCM_LIBS_ENABLE_COMPONENTS="hipdnn;miopen-provider" \ + -DHIPDNN_BUILD_PYTHON_BINDINGS=ON \ -DROCM_PATH=/opt/rocm \ -DCMAKE_PREFIX_PATH=/opt/rocm \ -DHIP_PLATFORM=amd diff --git a/projects/hipdnn/CMakeLists.txt b/projects/hipdnn/CMakeLists.txt index e0c85cba9cd..1f038291c51 100644 --- a/projects/hipdnn/CMakeLists.txt +++ b/projects/hipdnn/CMakeLists.txt @@ -155,7 +155,7 @@ if(DEFINED HIP_DNN_SKIP_TESTS) endif() option(HIPDNN_SKIP_TESTS "Skips building all tests" OFF) -option(HIPDNN_BUILD_PYTHON_BINDINGS "Build Python bindings (requires Python and nanobind)" ON) +option(HIPDNN_BUILD_PYTHON_BINDINGS "Build Python bindings (requires Python and nanobind)" OFF) option(HIPDNN_ENABLE_COVERAGE "Build with code coverage flags" OFF) option(BUILD_ADDRESS_SANITIZER "Build with Address Sanitizer enabled" OFF) From b528596af40ab14010cd5b9f0298579d61811ca7 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 18:05:57 -0400 Subject: [PATCH 47/97] Update TheRock hash and default python bindings to OFF Co-Authored-By: Claude Opus 4 --- .github/workflows/therock-ci-linux.yml | 2 +- .github/workflows/therock-ci-nightly.yml | 2 +- .github/workflows/therock-ci-windows.yml | 2 +- .github/workflows/therock-ci.yml | 2 +- .github/workflows/therock-test-component.yml | 2 +- .github/workflows/therock-test-packages.yml | 2 +- projects/hipdnn/CMakeLists.txt | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index 9cee5727b5a..410df07b340 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -43,7 +43,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 70a6335b27d48f3781ba07eab89269f75a00f527 # 2026-05-03 commit + ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e - name: Install python deps run: | diff --git a/.github/workflows/therock-ci-nightly.yml b/.github/workflows/therock-ci-nightly.yml index feb05756136..b062a83cb98 100644 --- a/.github/workflows/therock-ci-nightly.yml +++ b/.github/workflows/therock-ci-nightly.yml @@ -35,7 +35,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 70a6335b27d48f3781ba07eab89269f75a00f527 # 2026-05-03 commit + ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index 012025cf14a..4f61469b3e7 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -47,7 +47,7 @@ jobs: with: repository: "ROCm/TheRock" path: "TheRock" - ref: 70a6335b27d48f3781ba07eab89269f75a00f527 # 2026-05-03 commit + ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index 1e854a87702..f3e25718303 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -59,7 +59,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 70a6335b27d48f3781ba07eab89269f75a00f527 # 2026-05-03 commit + ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index d3d41d4e8e2..ff2ced0c261 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -70,7 +70,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 70a6335b27d48f3781ba07eab89269f75a00f527 # 2026-05-03 commit + ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e - name: Configure git for long paths on Windows if: ${{ runner.os == 'Windows' }} diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index c24ec37262a..94d752ee239 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -43,7 +43,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 70a6335b27d48f3781ba07eab89269f75a00f527 # 2026-05-03 commit + ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e - name: Setting up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/projects/hipdnn/CMakeLists.txt b/projects/hipdnn/CMakeLists.txt index e0c85cba9cd..1f038291c51 100644 --- a/projects/hipdnn/CMakeLists.txt +++ b/projects/hipdnn/CMakeLists.txt @@ -155,7 +155,7 @@ if(DEFINED HIP_DNN_SKIP_TESTS) endif() option(HIPDNN_SKIP_TESTS "Skips building all tests" OFF) -option(HIPDNN_BUILD_PYTHON_BINDINGS "Build Python bindings (requires Python and nanobind)" ON) +option(HIPDNN_BUILD_PYTHON_BINDINGS "Build Python bindings (requires Python and nanobind)" OFF) option(HIPDNN_ENABLE_COVERAGE "Build with code coverage flags" OFF) option(BUILD_ADDRESS_SANITIZER "Build with Address Sanitizer enabled" OFF) From 78659c374c3595f929b46de401940cd604a04138 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 18:10:34 -0400 Subject: [PATCH 48/97] Restore CI workflow files from develop Co-Authored-By: Claude Opus 4 --- .github/workflows/therock-ci-linux.yml | 25 +++++++++++++------- .github/workflows/therock-ci-nightly.yml | 15 +++++++++++- .github/workflows/therock-ci-windows.yml | 20 +++++++++------- .github/workflows/therock-ci.yml | 17 ++++++++++++- .github/workflows/therock-test-component.yml | 11 ++++----- .github/workflows/therock-test-packages.yml | 2 +- 6 files changed, 64 insertions(+), 26 deletions(-) diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index 410df07b340..ebef1ab19dc 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -3,6 +3,12 @@ name: TheRock CI Linux on: workflow_call: inputs: + therock_ref: + type: string + required: true + docker_image: + type: string + required: true cmake_options: type: string projects_to_test: @@ -13,6 +19,8 @@ on: type: string test_runs_on: type: string + build_runs_on: + type: string permissions: contents: read @@ -20,11 +28,11 @@ permissions: jobs: therock-build-linux: name: Build (${{ inputs.amdgpu_families }}) - runs-on: azure-linux-scale-rocm + runs-on: ${{ inputs.build_runs_on }} permissions: id-token: write container: - image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2 + image: ${{ inputs.docker_image }} options: -v /runner/config:/home/awsconfig/ strategy: fail-fast: true @@ -43,7 +51,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e + ref: ${{ inputs.therock_ref }} - name: Install python deps run: | @@ -132,17 +140,16 @@ jobs: --build-dir TheRock/build \ --upload - - name: Notify MIOpen Teams Channel on Failure - # Channel name "MIOpen CI Reporting" - if: ${{ contains(inputs.projects_to_test, 'miopen') && failure() && !github.event.pull_request.head.repo.fork }} + - name: Notify Teams Channel on Failure + if: ${{ failure() && !github.event.pull_request.head.repo.fork }} run: | python3 .github/scripts/notify_teams.py \ - --project miopen \ --failure-stage build \ --log-path TheRock/build/logs \ - --webhook-url "${{ secrets.MIOPEN_CI_WEBHOOK_URL }}" \ + --webhook-urls '{"miopen":"${{ secrets.MIOPEN_CI_WEBHOOK_URL }}","hipdnn":"${{ secrets.HIPDNN_CI_WEBHOOK_URL }}"}' \ --pr-number "${{ github.event.pull_request.number }}" \ - --pr-title "${{ github.event.pull_request.title }}" + --pr-title "${{ github.event.pull_request.title }}" \ + --component-name "${{ inputs.projects_to_test }}" therock-test-linux: name: Test (${{ inputs.amdgpu_families }}) diff --git a/.github/workflows/therock-ci-nightly.yml b/.github/workflows/therock-ci-nightly.yml index b062a83cb98..247913493a4 100644 --- a/.github/workflows/therock-ci-nightly.yml +++ b/.github/workflows/therock-ci-nightly.yml @@ -17,8 +17,12 @@ jobs: name: "Setup" runs-on: ubuntu-24.04 outputs: + therock_ref: ${{ steps.ci-env.outputs.therock-ref }} + docker_image: ${{ steps.ci-env.outputs.docker-image }} linux_projects: ${{ steps.linux_projects.outputs.linux_projects }} + linux_build_runs_on: ${{ steps.linux_projects.outputs.build_runs_on }} windows_projects: ${{ steps.windows_projects.outputs.windows_projects }} + windows_build_runs_on: ${{ steps.windows_projects.outputs.build_runs_on }} test_type: ${{ steps.linux_projects.outputs.test_type }} linux_package_targets: ${{ steps.configure_linux.outputs.package_targets }} windows_package_targets: ${{ steps.configure_windows.outputs.package_targets }} @@ -30,12 +34,16 @@ jobs: sparse-checkout-cone-mode: true fetch-depth: 2 + - name: Load CI environment + id: ci-env + uses: ./.github/actions/ci-env + - name: Checkout TheRock repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" path: TheRock - ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e + ref: ${{ steps.ci-env.outputs.therock-ref }} - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 @@ -96,11 +104,14 @@ jobs: uses: ./.github/workflows/therock-ci-linux.yml secrets: inherit with: + therock_ref: ${{ needs.setup.outputs.therock_ref }} + docker_image: ${{ needs.setup.outputs.docker_image }} cmake_options: ${{ matrix.projects.cmake_options }} projects_to_test: ${{ matrix.projects.projects_to_test }} test_type: ${{ needs.setup.outputs.test_type }} amdgpu_families: ${{ matrix.target_bundle.amdgpu_family }} test_runs_on: ${{ matrix.target_bundle.test_machine }} + build_runs_on: ${{ needs.setup.outputs.linux_build_runs_on }} therock-ci-windows: name: Windows (${{ matrix.projects.projects_to_test }} | ${{ matrix.target_bundle.amdgpu_family }}) @@ -117,11 +128,13 @@ jobs: uses: ./.github/workflows/therock-ci-windows.yml secrets: inherit with: + therock_ref: ${{ needs.setup.outputs.therock_ref }} cmake_options: ${{ matrix.projects.cmake_options }} projects_to_test: ${{ matrix.projects.projects_to_test }} test_type: ${{ needs.setup.outputs.test_type }} amdgpu_families: ${{ matrix.target_bundle.amdgpu_family }} test_runs_on: ${{ matrix.target_bundle.test_machine }} + build_runs_on: ${{ needs.setup.outputs.windows_build_runs_on }} therock_ci_nightly_summary: name: TheRock CI Nightly Summary diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index 4f61469b3e7..fadf7f80865 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -3,6 +3,9 @@ name: TheRock CI Windows on: workflow_call: inputs: + therock_ref: + type: string + required: true cmake_options: type: string projects_to_test: @@ -13,6 +16,8 @@ on: type: string test_runs_on: type: string + build_runs_on: + type: string permissions: contents: read @@ -20,7 +25,7 @@ permissions: jobs: therock-build-windows: name: Build (${{ inputs.amdgpu_families }}) - runs-on: azure-windows-scale-rocm + runs-on: ${{ inputs.build_runs_on }} outputs: AMDGPU_FAMILIES: ${{ env.AMDGPU_FAMILIES }} permissions: @@ -47,7 +52,7 @@ jobs: with: repository: "ROCm/TheRock" path: "TheRock" - ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e + ref: ${{ inputs.therock_ref }} - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 @@ -154,17 +159,16 @@ jobs: --build-dir ${{ env.BUILD_DIR }} \ --upload - - name: Notify MIOpen Teams Channel on Failure - # Channel name "MIOpen CI Reporting" - if: ${{ contains(inputs.projects_to_test, 'miopen') && failure() && !github.event.pull_request.head.repo.fork }} + - name: Notify Teams Channel on Failure + if: ${{ failure() && !github.event.pull_request.head.repo.fork }} run: | python3 .github/scripts/notify_teams.py \ - --project miopen \ --failure-stage build \ --log-path "${{ env.BUILD_DIR }}/logs" \ - --webhook-url "${{ secrets.MIOPEN_CI_WEBHOOK_URL }}" \ + --webhook-urls '{"miopen":"${{ secrets.MIOPEN_CI_WEBHOOK_URL }}","hipdnn":"${{ secrets.HIPDNN_CI_WEBHOOK_URL }}"}' \ --pr-number "${{ github.event.pull_request.number }}" \ - --pr-title "${{ github.event.pull_request.title }}" + --pr-title "${{ github.event.pull_request.title }}" \ + --component-name "${{ inputs.projects_to_test }}" therock-test-windows: name: Test (${{ inputs.amdgpu_families }}) diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index f3e25718303..9e11c78bd56 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -41,8 +41,12 @@ jobs: name: "Setup" runs-on: ubuntu-24.04 outputs: + therock_ref: ${{ steps.ci-env.outputs.therock-ref }} + docker_image: ${{ steps.ci-env.outputs.docker-image }} linux_projects: ${{ steps.linux_projects.outputs.linux_projects }} + linux_build_runs_on: ${{ steps.linux_projects.outputs.build_runs_on }} windows_projects: ${{ steps.windows_projects.outputs.windows_projects }} + windows_build_runs_on: ${{ steps.windows_projects.outputs.build_runs_on }} test_type: ${{ steps.linux_projects.outputs.test_type }} linux_package_targets: ${{ steps.configure_linux.outputs.package_targets }} windows_package_targets: ${{ steps.configure_windows.outputs.package_targets }} @@ -54,12 +58,18 @@ jobs: sparse-checkout-cone-mode: true fetch-depth: 2 + # Loads shared CI constants (TheRock ref, Docker image, runner labels) + # from the ci-env composite action so they are defined in one place. + - name: Load CI environment + id: ci-env + uses: ./.github/actions/ci-env + - name: Checkout TheRock repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" path: TheRock - ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e + ref: ${{ steps.ci-env.outputs.therock-ref }} - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 @@ -136,11 +146,14 @@ jobs: uses: ./.github/workflows/therock-ci-linux.yml secrets: inherit with: + therock_ref: ${{ needs.setup.outputs.therock_ref }} + docker_image: ${{ needs.setup.outputs.docker_image }} cmake_options: ${{ matrix.projects.cmake_options }} projects_to_test: ${{ matrix.projects.projects_to_test }} test_type: ${{ needs.setup.outputs.test_type }} amdgpu_families: ${{ matrix.target_bundle.amdgpu_family }} test_runs_on: ${{ matrix.target_bundle.test_machine }} + build_runs_on: ${{ needs.setup.outputs.linux_build_runs_on }} therock-ci-windows: name: Windows (${{ matrix.projects.projects_to_test }} | ${{ matrix.target_bundle.amdgpu_family }}) @@ -157,11 +170,13 @@ jobs: uses: ./.github/workflows/therock-ci-windows.yml secrets: inherit with: + therock_ref: ${{ needs.setup.outputs.therock_ref }} cmake_options: ${{ matrix.projects.cmake_options }} projects_to_test: ${{ matrix.projects.projects_to_test }} test_type: ${{ needs.setup.outputs.test_type }} amdgpu_families: ${{ matrix.target_bundle.amdgpu_family }} test_runs_on: ${{ matrix.target_bundle.test_machine }} + build_runs_on: ${{ needs.setup.outputs.windows_build_runs_on }} therock_ci_summary: name: TheRock CI Summary diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index ff2ced0c261..7f6f04a6a07 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -70,7 +70,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e + ref: 4e469b56368b633fad7e77b559db6607da0028eb # 2026-05-13 commit - name: Configure git for long paths on Windows if: ${{ runner.os == 'Windows' }} @@ -110,17 +110,16 @@ jobs: # Run test and capture output (still display to console) ${{ fromJSON(inputs.component).test_script }} 2>&1 | tee ./test_logs/test_output.log - - name: Notify MIOpen Teams Channel on Failure - # Channel name "MIOpen CI Reporting" - if: ${{ contains(fromJSON(inputs.component).job_name, 'miopen') && failure() && !github.event.pull_request.head.repo.fork }} + - name: Notify Teams Channel on Failure + if: ${{ failure() && !github.event.pull_request.head.repo.fork }} run: | python3 rocm-libraries/.github/scripts/notify_teams.py \ - --project miopen \ --failure-stage test \ --log-path ./test_logs/test_output.log \ - --webhook-url "${{ secrets.MIOPEN_CI_WEBHOOK_URL }}" \ + --webhook-urls '{"miopen":"${{ secrets.MIOPEN_CI_WEBHOOK_URL }}","hipdnn":"${{ secrets.HIPDNN_CI_WEBHOOK_URL }}"}' \ --pr-number "${{ github.event.pull_request.number }}" \ --pr-title "${{ github.event.pull_request.title }}" \ + --component-name "${{ fromJSON(inputs.component).job_name }}" \ --job-name "${{ fromJSON(inputs.component).job_name }} (shard ${{ matrix.shard }}/${{ fromJSON(inputs.component).total_shards }})" # GitHub's 'Complete job' step is unaware of launched executables diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index 94d752ee239..edec0bb63dc 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -43,7 +43,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 85b5d0e4ab9984246694d8c67dbffb40a0add50e + ref: 4e469b56368b633fad7e77b559db6607da0028eb # 2026-05-13 commit - name: Setting up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 From 6dc9e9252029d12cf9196c350855d65c78fe4005 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 18:34:53 -0400 Subject: [PATCH 49/97] Read nanobind extension from build dir instead of install prefix The .so no longer needs to be installed to lib/hipdnn/python/ as a staging step. The wheel builder now reads it directly from the CMake build directory, so only the wheel ends up in the install tree. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 7344b3f7520..128540d7dbf 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -39,9 +39,6 @@ set_target_properties( INSTALL_RPATH_USE_LINK_PATH TRUE ) -# Stage the extension where the wheel build hook can find it. -install(TARGETS hipdnn_frontend_python DESTINATION lib/hipdnn/python) - install(CODE " set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") file(MAKE_DIRECTORY \"\${_wheel_dir}\") @@ -51,7 +48,7 @@ install(CODE " COMMAND \"${Python_EXECUTABLE}\" \"${CMAKE_CURRENT_SOURCE_DIR}/build_python_wheel.py\" --source-dir \"${CMAKE_CURRENT_SOURCE_DIR}\" - --ext-dir \"\${CMAKE_INSTALL_PREFIX}/lib/hipdnn/python\" + --ext-dir \"${CMAKE_CURRENT_BINARY_DIR}\" --wheel-dir \"\${_wheel_dir}\" RESULT_VARIABLE _result ) From c97e6e5958929ef51de9f3542be5f439408bdac9 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 18:37:33 -0400 Subject: [PATCH 50/97] Restore workflow files from develop Co-Authored-By: Claude Opus 4 --- .github/workflows/docs-pr-preview-cleanup.yml | 8 +- .github/workflows/hipdnn-superbuild-ci.yml | 259 ++++++++++++++++++ .github/workflows/rocisa-ci.yml | 22 +- .github/workflows/stinkytofu-ci.yml | 22 +- .../therock-multi-arch-ci-nightly.yml | 30 ++ .github/workflows/therock-multi-arch-ci.yml | 22 +- .github/workflows/update-docs.yml | 15 +- 7 files changed, 354 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/hipdnn-superbuild-ci.yml create mode 100644 .github/workflows/therock-multi-arch-ci-nightly.yml diff --git a/.github/workflows/docs-pr-preview-cleanup.yml b/.github/workflows/docs-pr-preview-cleanup.yml index cd174ca7447..7a3558dff7d 100644 --- a/.github/workflows/docs-pr-preview-cleanup.yml +++ b/.github/workflows/docs-pr-preview-cleanup.yml @@ -45,7 +45,13 @@ jobs: # Extract the string immediately following "projects/" (up to the next "/") from $comment project_alias=$(echo "$comment" | grep --only-matching --perl-regexp 'projects/\K[^/]+') - echo "project_slug=advanced-micro-devices-${project_alias}" >> $GITHUB_OUTPUT + + # Load the map and look up the rtd_slug by rtd_alias + project_slug=$(jq -r --arg alias "$project_alias" \ + '.projects[] | select(.rtd_alias == $alias) | .rtd_slug' \ + .github/docs-config.json) + + echo "project_slug=$project_slug" >> $GITHUB_OUTPUT - name: Deactivate RTD preview version if: steps.rtd.outputs.project_slug diff --git a/.github/workflows/hipdnn-superbuild-ci.yml b/.github/workflows/hipdnn-superbuild-ci.yml new file mode 100644 index 00000000000..ddf749df752 --- /dev/null +++ b/.github/workflows/hipdnn-superbuild-ci.yml @@ -0,0 +1,259 @@ +# Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT +# +# Lightweight CI for hipDNN + providers using the rocm-libraries superbuild +# and pre-built ROCm wheels (no TheRock source build required). + +name: hipDNN Superbuild CI + +on: + pull_request: + types: [opened, synchronize, reopened, labeled] + paths: + - 'projects/hipdnn/**' + - 'dnn-providers/**' + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.number || github.sha }} + cancel-in-progress: true + +env: + # Labels that trigger this workflow. Extend this list to gate on additional labels. + TRIGGER_LABELS: '["project: hipdnn", "project: miopen-provider", "project: hipblaslt-provider", "project: hip-kernel-provider", "project: hipdnn-integration-tests"]' + +jobs: + gate: + name: Check Labels + runs-on: ubuntu-24.04 + outputs: + should_run: ${{ steps.check.outputs.should_run }} + steps: + - name: Evaluate PR labels + id: check + env: + PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} + run: | + echo "PR labels: $PR_LABELS" + + # Check if any trigger label is present + TRIGGER='${{ env.TRIGGER_LABELS }}' + MATCH=$(echo "$PR_LABELS" | jq -e --argjson trigger "$TRIGGER" \ + '[.[] | select(. as $l | $trigger | index($l))] | length > 0') + + if [ "$MATCH" = "true" ]; then + echo "Trigger label found" + echo "should_run=true" >> "$GITHUB_OUTPUT" + else + echo "No trigger label found" + echo "should_run=false" >> "$GITHUB_OUTPUT" + fi + + hipdnn-windows-superbuild: + name: HipDNN Windows Superbuild + needs: gate + if: needs.gate.outputs.should_run == 'true' + runs-on: azure-windows-scale-rocm + defaults: + run: + shell: bash + env: + BUILD_DIR: B:\build + VENV_PATH: B:\rocm_wheels + GPU_TARGET: gfx1151 + steps: + - name: Runner health status + shell: pwsh + run: | + Write-Host "=== OS ===" + (Get-CimInstance Win32_OperatingSystem).Caption + Write-Host "=== CPU ===" + $cpu = Get-CimInstance Win32_Processor + Write-Host "$($cpu.Name) ($($cpu.NumberOfLogicalProcessors) logical cores)" + Write-Host "=== Memory ===" + $os = Get-CimInstance Win32_OperatingSystem + $totalGB = [math]::Round($os.TotalVisibleMemorySize / 1MB, 1) + $freeGB = [math]::Round($os.FreePhysicalMemory / 1MB, 1) + Write-Host "Total: ${totalGB} GB Free: ${freeGB} GB" + Write-Host "=== Disk ===" + Get-PSDrive -PSProvider FileSystem | + Select-Object Name, + @{N='Used(GB)';E={[math]::Round($_.Used/1GB,1)}}, + @{N='Free(GB)';E={[math]::Round($_.Free/1GB,1)}}, + Root | + Format-Table -AutoSize | Out-String | Write-Host + + - name: Configure git longpaths + run: git config --global core.longpaths true + + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + sparse-checkout: | + .dvc + .github + cmake + dnn-providers + projects/hipdnn + projects/miopen + test + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + + - name: Configure MSVC + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + + - name: Install build tools + run: | + choco source disable -n=chocolatey + choco source add -n=internal -s http://10.0.167.96:8081/repository/choco-group/ --priority=1 + choco install --no-progress -y ccache + choco install --no-progress -y ninja --version 1.12.1 + + - name: Install ROCm wheels + shell: pwsh + run: | + Write-Host "Creating Python virtual environment at $env:VENV_PATH" + python -m venv $env:VENV_PATH + & "$env:VENV_PATH\Scripts\Activate.ps1" + + Write-Host "Installing ROCm wheels from nightlies (target: $env:GPU_TARGET)" + pip install --index-url "https://rocm.nightlies.amd.com/v2/$env:GPU_TARGET/" "rocm[libraries,devel]" + if ($LASTEXITCODE -ne 0) { throw "Failed to install ROCm wheels" } + + Write-Host "Initializing ROCm SDK" + rocm-sdk init + if ($LASTEXITCODE -ne 0) { throw "Failed to initialize ROCm SDK" } + + $RocmSdkPath = "$env:VENV_PATH\Lib\site-packages\_rocm_sdk_devel" -replace '\\', '/' + Write-Host "ROCM_SDK_PATH=$RocmSdkPath" + echo "ROCM_SDK_PATH=$RocmSdkPath" >> $env:GITHUB_ENV + + - name: Configure superbuild + run: | + cmake --preset hipdnn-providers-all -GNinja \ + -DROCM_PATH="$ROCM_SDK_PATH" \ + -DCMAKE_PROGRAM_PATH="$ROCM_SDK_PATH/lib/llvm/bin" \ + -DGPU_TARGETS="$GPU_TARGET" \ + -DROCM_LIBS_ENABLE_ROOT_CTEST=ON \ + -DENABLE_CLANG_FORMAT=OFF \ + -DENABLE_CLANG_TIDY=OFF + + - name: Build + run: cmake --build build + + # Windows tests have a few issues where we are loading plugins and thats causing issues with super build tests + # as they end up trying to init a gpu on a non gpu machine. This work is ticketed to be fixed later. + #- name: Add ROCm bin to PATH + # run: echo "$ROCM_SDK_PATH/bin" >> "$GITHUB_PATH" + + #- name: Run tests + # run: ctest --test-dir build --output-on-failure + + hipdnn-linux-superbuild: + name: HipDNN Linux Superbuild + needs: gate + if: needs.gate.outputs.should_run == 'true' + runs-on: azure-linux-scale-rocm + env: + GPU_TARGET: gfx94X-dcgpu + steps: + - name: Runner health status + run: | + echo "=== OS ===" + cat /etc/os-release | head -2 + echo "=== CPU ===" + lscpu | grep -E 'Model name|^CPU\(s\)' + echo "=== Memory ===" + free -h | grep Mem + echo "=== Disk ===" + df -h / + + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + sparse-checkout: | + .dvc + .github + cmake + dnn-providers + projects/hipdnn + projects/miopen + test + + - name: Set environment + run: echo "VENV_PATH=${{ runner.temp }}/rocm_wheels" >> "$GITHUB_ENV" + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.12' + + - name: Install build tools + run: | + sudo apt-get update + sudo apt-get install -y ninja-build ccache lsb-release wget software-properties-common gnupg + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 20 + sudo apt-get install -y clang-tidy-20 clang-tools-20 + + - name: Install ROCm wheels + run: | + echo "Creating Python virtual environment at $VENV_PATH" + python -m venv "$VENV_PATH" + source "$VENV_PATH/bin/activate" + + echo "Installing ROCm wheels from nightlies (target: $GPU_TARGET)" + pip install --index-url "https://rocm.nightlies.amd.com/v2/$GPU_TARGET/" "rocm[libraries,devel]" + + echo "Initializing ROCm SDK" + rocm-sdk init + + SITE_PACKAGES=$("$VENV_PATH/bin/python" -c "import site; print(site.getsitepackages()[0])") + ROCM_SDK_PATH="$SITE_PACKAGES/_rocm_sdk_devel" + echo "ROCM_SDK_PATH=$ROCM_SDK_PATH" + echo "ROCM_SDK_PATH=$ROCM_SDK_PATH" >> "$GITHUB_ENV" + + - name: Configure superbuild + run: | + cmake --preset hipdnn-providers-all -GNinja \ + -DROCM_PATH="$ROCM_SDK_PATH" \ + -DCMAKE_PROGRAM_PATH="$ROCM_SDK_PATH/lib/llvm/bin" \ + -DGPU_TARGETS="$GPU_TARGET" \ + -DROCM_LIBS_ENABLE_ROOT_CTEST=ON \ + -DENABLE_CLANG_FORMAT=OFF \ + -DENABLE_CLANG_TIDY=ON \ + -DCMAKE_CXX_CLANG_TIDY=/usr/bin/clang-tidy-20 + + - name: Build + run: cmake --build build + + - name: Run tests + run: ctest --test-dir build --output-on-failure + + hipdnn_build_summary: + name: hipDNN Superbuild Summary + if: always() + needs: + - gate + - hipdnn-windows-superbuild + - hipdnn-linux-superbuild + runs-on: ubuntu-24.04 + steps: + - name: Report results + run: | + echo '${{ toJson(needs) }}' + FAILED_JOBS="$(echo '${{ toJson(needs) }}' \ + | jq --raw-output \ + 'map_values(select(.result!="success" and .result!="skipped")) | keys | join(",")' \ + )" + if [[ "${FAILED_JOBS}" != "" ]]; then + echo "Failed jobs: ${FAILED_JOBS}" + exit 1 + fi diff --git a/.github/workflows/rocisa-ci.yml b/.github/workflows/rocisa-ci.yml index 30c65eb1262..39086582348 100644 --- a/.github/workflows/rocisa-ci.yml +++ b/.github/workflows/rocisa-ci.yml @@ -24,10 +24,25 @@ concurrency: cancel-in-progress: true jobs: + setup: + name: "Setup" + runs-on: ubuntu-24.04 + outputs: + docker-image: ${{ steps.env.outputs.docker-image }} + linux-runner: ${{ steps.env.outputs.linux-runner }} + windows-runner: ${{ steps.env.outputs.windows-runner }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + sparse-checkout: .github/actions/ci-env + - uses: ./.github/actions/ci-env + id: env + Linux: - runs-on: azure-linux-scale-rocm + needs: setup + runs-on: ${{ needs.setup.outputs.linux-runner }} container: - image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2 + image: ${{ needs.setup.outputs.docker-image }} steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -44,7 +59,8 @@ jobs: test_path: test Windows: - runs-on: azure-windows-scale-rocm + needs: setup + runs-on: ${{ needs.setup.outputs.windows-runner }} defaults: run: shell: bash diff --git a/.github/workflows/stinkytofu-ci.yml b/.github/workflows/stinkytofu-ci.yml index 2da0dea8298..2547dcdd240 100644 --- a/.github/workflows/stinkytofu-ci.yml +++ b/.github/workflows/stinkytofu-ci.yml @@ -22,10 +22,25 @@ concurrency: cancel-in-progress: true jobs: + setup: + name: "Setup" + runs-on: ubuntu-24.04 + outputs: + docker-image: ${{ steps.env.outputs.docker-image }} + linux-runner: ${{ steps.env.outputs.linux-runner }} + windows-runner: ${{ steps.env.outputs.windows-runner }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + sparse-checkout: .github/actions/ci-env + - uses: ./.github/actions/ci-env + id: env + Linux: - runs-on: azure-linux-scale-rocm + needs: setup + runs-on: ${{ needs.setup.outputs.linux-runner }} container: - image: ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2 + image: ${{ needs.setup.outputs.docker-image }} env: ROCM_PATH: /opt/rocm steps: @@ -66,7 +81,8 @@ jobs: test_path: python_module/tests Windows: - runs-on: azure-windows-scale-rocm + needs: setup + runs-on: ${{ needs.setup.outputs.windows-runner }} env: BUILD_DIR: B:\build defaults: diff --git a/.github/workflows/therock-multi-arch-ci-nightly.yml b/.github/workflows/therock-multi-arch-ci-nightly.yml new file mode 100644 index 00000000000..6adcedb5736 --- /dev/null +++ b/.github/workflows/therock-multi-arch-ci-nightly.yml @@ -0,0 +1,30 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +# Nightly trigger for Multi-Arch CI +# +# This workflow triggers therock-multi-arch-ci.yml via workflow_dispatch +# with specific GPU families. This is needed because TheRock's setup_multi_arch +# workflow runs all families when triggered by schedule, ignoring inputs. +# By using workflow_dispatch, TheRock respects our specified families. + +name: TheRock Multi-Arch CI Nightly Trigger + +on: + schedule: + - cron: "0 7 * * *" # Runs nightly at 7 AM UTC + +permissions: + actions: write + contents: read + +jobs: + multi-arch-ci-nightly-trigger: + runs-on: ubuntu-latest + steps: + - name: Trigger Multi-Arch CI with specific GPU families + uses: benc-uk/workflow-dispatch@e2e5e9a103e331dad343f381a29e654aea3cf8fc # v1.2.4 + with: + workflow: therock-multi-arch-ci.yml + ref: develop + inputs: '{ "linux_amdgpu_families": "gfx94X,gfx950", "windows_amdgpu_families": "gfx1151" }' diff --git a/.github/workflows/therock-multi-arch-ci.yml b/.github/workflows/therock-multi-arch-ci.yml index 51e324af729..236e7c70278 100644 --- a/.github/workflows/therock-multi-arch-ci.yml +++ b/.github/workflows/therock-multi-arch-ci.yml @@ -15,8 +15,6 @@ name: TheRock Multi-Arch CI on: - schedule: - - cron: "0 7 * * *" # Runs nightly at 7 AM UTC workflow_dispatch: inputs: linux_amdgpu_families: @@ -57,19 +55,19 @@ concurrency: jobs: setup: - uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@645a35ca59d15254e73348fde32ff31d63eb60b4 # 2026-05-06 + uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@rocm-libraries/multi-arch-ci-custom # TheRock@rocm-libraries/multi-arch-ci-custom with: build_variant: "release" - # Limit to gfx1151 and gfx94X-dcgpu for rocm-libraries CI - linux_amdgpu_families: ${{ inputs.linux_amdgpu_families || 'gfx1151,gfx94X-dcgpu' }} + # Limit GPU families for rocm-libraries CI + linux_amdgpu_families: ${{ inputs.linux_amdgpu_families || 'gfx94X,gfx950' }} windows_amdgpu_families: ${{ inputs.windows_amdgpu_families || 'gfx1151' }} linux_test_labels: ${{ inputs.linux_test_labels || '' }} windows_test_labels: ${{ inputs.windows_test_labels || '' }} prebuilt_stages: ${{ inputs.prebuilt_stages || '' }} baseline_run_id: ${{ inputs.baseline_run_id || '' }} repository: ROCm/TheRock - ref: 645a35ca59d15254e73348fde32ff31d63eb60b4 # 2026-05-06 - external_repo_config: '{"repository":"${{ github.repository }}","ref":"${{ github.sha }}","checkout_path":"external-rocm-libraries","source_package":"ROCM_LIBRARIES","fetch_sources_args":"--no-include-rocm-libraries"}' + ref: rocm-libraries/multi-arch-ci-custom # TheRock@rocm-libraries/multi-arch-ci-custom + external_repo: '{"repository":"${{ github.repository }}","ref":"${{ github.sha }}"}' linux_build_and_test: name: Linux::${{ fromJSON(needs.setup.outputs.linux_build_config || '{}').build_variant_label || 'skip' }} @@ -79,7 +77,7 @@ jobs: needs.setup.outputs.linux_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@645a35ca59d15254e73348fde32ff31d63eb60b4 # 2026-05-06 + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@rocm-libraries/multi-arch-ci-custom # TheRock@rocm-libraries/multi-arch-ci-custom secrets: inherit with: build_config: ${{ needs.setup.outputs.linux_build_config }} @@ -88,7 +86,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: 645a35ca59d15254e73348fde32ff31d63eb60b4 # 2026-05-06 + ref: rocm-libraries/multi-arch-ci-custom # TheRock@rocm-libraries/multi-arch-ci-custom permissions: contents: read id-token: write @@ -101,7 +99,7 @@ jobs: needs.setup.outputs.windows_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@645a35ca59d15254e73348fde32ff31d63eb60b4 # 2026-05-06 + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@rocm-libraries/multi-arch-ci-custom # TheRock@rocm-libraries/multi-arch-ci-custom secrets: inherit with: build_config: ${{ needs.setup.outputs.windows_build_config }} @@ -110,7 +108,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: 645a35ca59d15254e73348fde32ff31d63eb60b4 # 2026-05-06 + ref: rocm-libraries/multi-arch-ci-custom # TheRock@rocm-libraries/multi-arch-ci-custom permissions: contents: read id-token: write @@ -128,7 +126,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 645a35ca59d15254e73348fde32ff31d63eb60b4 # 2026-05-06 + ref: rocm-libraries/multi-arch-ci-custom # TheRock@rocm-libraries/multi-arch-ci-custom sparse-checkout: build_tools/github_actions sparse-checkout-cone-mode: true diff --git a/.github/workflows/update-docs.yml b/.github/workflows/update-docs.yml index 1a7ff4ee32a..af7b774f128 100644 --- a/.github/workflows/update-docs.yml +++ b/.github/workflows/update-docs.yml @@ -73,11 +73,16 @@ jobs: ) for project in $changed_projects; do - slug="advanced-micro-devices-$project" - if echo "$project_list" | grep -Fxq "$slug"; then + + # Load the map and look up the rtd_slug by project name (folder name) + project_slug=$(jq -r --arg folder_name "$project" \ + '.projects[] | select(.folder == $folder_name) | .rtd_slug' \ + .github/docs-config.json) + + if echo "$project_list" | grep -Fxq "$project_slug"; then latest_branch=$(curl -s \ -H "Authorization: Token $RTD_TOKEN" \ - "https://app.readthedocs.com/api/v3/projects/$slug/" \ + "https://app.readthedocs.com/api/v3/projects/$project_slug/" \ | jq -r ".default_branch" \ | tr -d '[:space:]') echo "Latest branch for $project: $latest_branch" @@ -85,7 +90,7 @@ jobs: echo "Detected latest release branch, trigger builds for 'latest' version" response=$(curl -f -s -X POST \ -H "Authorization: Token $RTD_TOKEN" \ - "https://readthedocs.com/api/v3/projects/$slug/versions/latest/builds/") || { + "https://readthedocs.com/api/v3/projects/$project_slug/versions/latest/builds/") || { echo "Failed to trigger RTD build for $project. Response: $response" exit 1 } @@ -94,7 +99,7 @@ jobs: echo "Triggering RTD build for $project, version: $version_slug" response=$(curl -f -s -X POST \ -H "Authorization: Token $RTD_TOKEN" \ - "https://readthedocs.com/api/v3/projects/$slug/versions/$version_slug/builds/") || { + "https://readthedocs.com/api/v3/projects/$project_slug/versions/$version_slug/builds/") || { echo "Failed to trigger RTD build for $project. Response: $response" exit 1 } From 2a9a5d3ddfe58ca9c327c420f5bd6aa6d2293914 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 19 May 2026 18:53:47 -0400 Subject: [PATCH 51/97] Disable clang-format check in Python test CI workflow The runner does not have clang-format installed and we don't need it for building and running tests. Co-Authored-By: Claude Opus 4 --- .github/workflows/hipdnn-python-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml index c1b5069f765..66493ac7e96 100644 --- a/.github/workflows/hipdnn-python-tests.yml +++ b/.github/workflows/hipdnn-python-tests.yml @@ -59,6 +59,7 @@ jobs: cmake -B build -GNinja \ -DROCM_LIBS_ENABLE_COMPONENTS="hipdnn;miopen-provider" \ -DHIPDNN_BUILD_PYTHON_BINDINGS=ON \ + -DENABLE_CLANG_FORMAT=OFF \ -DROCM_PATH=/opt/rocm \ -DCMAKE_PREFIX_PATH=/opt/rocm \ -DHIP_PLATFORM=amd From e75ddfcac81d9e494268c8889936caf47f71aa99 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 20 May 2026 14:55:32 -0400 Subject: [PATCH 52/97] Switch hipDNN python bindings from setuptools to scikit-build-core Replace setuptools with scikit-build-core as the PEP 517 build backend. Use the SKBUILD variable (set automatically by scikit-build-core) to support dual-mode: standalone pip install and subdirectory cmake builds. - Delete setup.py and build_python_wheel.py glue scripts - Update pyproject.toml to use scikit-build-core backend - Rewrite CMakeLists.txt with SKBUILD dual-mode install rules - SKBUILD: find_package for deps, install to wheel layout - Subdirectory: hipdnn_add_dependency, install to staging area Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 38 +++++----- projects/hipdnn/python/build_python_wheel.py | 80 -------------------- projects/hipdnn/python/pyproject.toml | 15 ++-- projects/hipdnn/python/setup.py | 43 ----------- 4 files changed, 25 insertions(+), 151 deletions(-) delete mode 100755 projects/hipdnn/python/build_python_wheel.py delete mode 100644 projects/hipdnn/python/setup.py diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 128540d7dbf..b4f39c157f6 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -12,8 +12,15 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) -hipdnn_add_dependency(tsl-robin-map VERSION ${HIPDNN_TSL_ROBIN_MAP_VERSION}) -hipdnn_add_dependency(nanobind VERSION ${HIPDNN_NANOBIND_VERSION}) +if(DEFINED SKBUILD) + list(APPEND CMAKE_PREFIX_PATH /opt/rocm) + find_package(nanobind REQUIRED CONFIG) + find_package(hipdnn_frontend REQUIRED CONFIG) + find_package(hipdnn_backend REQUIRED CONFIG) +else() + hipdnn_add_dependency(tsl-robin-map VERSION ${HIPDNN_TSL_ROBIN_MAP_VERSION}) + hipdnn_add_dependency(nanobind VERSION ${HIPDNN_NANOBIND_VERSION}) +endif() nanobind_add_module( hipdnn_frontend_python @@ -39,20 +46,13 @@ set_target_properties( INSTALL_RPATH_USE_LINK_PATH TRUE ) -install(CODE " - set(_wheel_dir \"\${CMAKE_INSTALL_PREFIX}/share/hipdnn/wheels\") - file(MAKE_DIRECTORY \"\${_wheel_dir}\") - - message(STATUS \"Building hipdnn-frontend wheel\") - execute_process( - COMMAND \"${Python_EXECUTABLE}\" - \"${CMAKE_CURRENT_SOURCE_DIR}/build_python_wheel.py\" - --source-dir \"${CMAKE_CURRENT_SOURCE_DIR}\" - --ext-dir \"${CMAKE_CURRENT_BINARY_DIR}\" - --wheel-dir \"\${_wheel_dir}\" - RESULT_VARIABLE _result - ) - if(_result) - message(FATAL_ERROR \"Failed to build hipdnn-frontend wheel\") - endif() -") +if(DEFINED SKBUILD) + install(TARGETS hipdnn_frontend_python DESTINATION hipdnn_frontend) + install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/hipdnn_frontend/__init__.py" + DESTINATION hipdnn_frontend) +else() + set(_staging "share/hipdnn/python/hipdnn_frontend") + install(TARGETS hipdnn_frontend_python DESTINATION "${_staging}") + install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/hipdnn_frontend/__init__.py" + DESTINATION "${_staging}") +endif() diff --git a/projects/hipdnn/python/build_python_wheel.py b/projects/hipdnn/python/build_python_wheel.py deleted file mode 100755 index fe5e03797ca..00000000000 --- a/projects/hipdnn/python/build_python_wheel.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 -# Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT - -import argparse -import os -from pathlib import Path -import shutil -import subprocess -import sys - - -def main() -> int: - parser = argparse.ArgumentParser( - description="Build the hipDNN frontend Python wheel." - ) - parser.add_argument( - "--source-dir", - type=Path, - required=True, - help="Path to projects/hipdnn/python", - ) - parser.add_argument( - "--ext-dir", - type=Path, - required=True, - help="Directory containing the pre-built hipdnn_frontend_python extension", - ) - parser.add_argument( - "--wheel-dir", - type=Path, - required=True, - help="Directory where the built wheel will be written", - ) - args = parser.parse_args() - - source_dir = args.source_dir.resolve() - ext_dir = args.ext_dir.resolve() - wheel_dir = args.wheel_dir.resolve() - - if not (source_dir / "pyproject.toml").exists(): - raise RuntimeError(f"Missing pyproject.toml in {source_dir}") - - import glob - - if not glob.glob(str(ext_dir / "hipdnn_frontend_python*")): - raise RuntimeError(f"No hipdnn_frontend_python extension found in {ext_dir}") - - shutil.rmtree(wheel_dir, ignore_errors=True) - wheel_dir.mkdir(parents=True, exist_ok=True) - - env = os.environ.copy() - env["HIPDNN_EXT_DIR"] = str(ext_dir) - - cmd = [ - sys.executable, - "-m", - "build", - "--wheel", - "--no-isolation", - "--outdir", - str(wheel_dir), - str(source_dir), - ] - - print("::: Building hipdnn-frontend wheel") - print(f"::: HIPDNN_EXT_DIR={ext_dir}") - print("::: " + " ".join(cmd)) - subprocess.check_call(cmd, cwd=source_dir, env=env) - - wheels = sorted(wheel_dir.glob("hipdnn_frontend-*.whl")) - if not wheels: - raise RuntimeError(f"No hipdnn_frontend wheel produced in {wheel_dir}") - - print(f"::: Built wheel: {wheels[-1]}") - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index e43c2164cb9..ae61202e3df 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -2,15 +2,15 @@ # SPDX-License-Identifier: MIT [build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" +requires = ["scikit-build-core>=0.10", "nanobind>=2.0"] +build-backend = "scikit_build_core.build" [project] name = "hipdnn-frontend" version = "0.1.0" description = "Python bindings for the hipDNN frontend library" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" license = {text = "MIT"} authors = [ {name = "Advanced Micro Devices, Inc."}, @@ -20,7 +20,6 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -38,8 +37,6 @@ dev = [ "numpy", ] -[tool.setuptools.packages.find] -include = ["hipdnn_frontend"] - -[tool.cibuildwheel] -skip = ["pp*", "cp36-*", "cp37-*"] +[tool.scikit-build] +cmake.build-type = "Release" +wheel.packages = [] diff --git a/projects/hipdnn/python/setup.py b/projects/hipdnn/python/setup.py deleted file mode 100644 index 73d6bf9969b..00000000000 --- a/projects/hipdnn/python/setup.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT - -from __future__ import annotations - -import glob -import os -import shutil -from pathlib import Path - -from setuptools import setup -from setuptools.command.build_py import build_py - - -class BuildPyWithExtension(build_py): - """Copy pre-built nanobind extension into the package before building.""" - - def run(self): - super().run() - - ext_dir = os.environ.get("HIPDNN_EXT_DIR", "") - if not ext_dir: - raise RuntimeError( - "HIPDNN_EXT_DIR environment variable is not set. " - "It must point to the directory containing the pre-built " - "hipdnn_frontend_python extension." - ) - - ext_path = Path(ext_dir) - extensions = glob.glob(str(ext_path / "hipdnn_frontend_python*")) - if not extensions: - raise RuntimeError( - f"No hipdnn_frontend_python extension found in {ext_path}" - ) - - pkg_dir = Path(self.build_lib) / "hipdnn_frontend" - pkg_dir.mkdir(parents=True, exist_ok=True) - - for ext in extensions: - shutil.copy2(ext, pkg_dir / Path(ext).name) - - -setup(cmdclass={"build_py": BuildPyWithExtension}) From 554980472d70fba1f2c1b7612672ad3bfe6f2fbb Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 20 May 2026 15:14:32 -0400 Subject: [PATCH 53/97] Update Python bindings README for scikit-build-core Update prerequisites (CMake 3.18, Python 3.9), build instructions to use scikit-build-core instead of setuptools, and project structure descriptions to reflect the current file layout. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/README.md | 41 +++++++++++++------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/projects/hipdnn/python/README.md b/projects/hipdnn/python/README.md index ef17bdacfbf..a8294ffa5b3 100644 --- a/projects/hipdnn/python/README.md +++ b/projects/hipdnn/python/README.md @@ -20,22 +20,22 @@ python │ ├── attributes_bindings.cpp # Bindings for attribute classes │ └── types_bindings.cpp # Bindings for custom types and enums ├── hipdnn_frontend -│ ├── __init__.py # Initializes the hipdnn_frontend package +│ ├── __init__.py # Initializes the hipdnn_frontend package │ └── samples -│ ├── bn_inference.py # Batch normalization inference sample(DISABLED) -│ ├── conv_fprop.py # Convolution forward propagation sample -│ ├── conv_dgrad.py # Convolution backward data gradient sample -│ └── conv_wgrad.py # Convolution backward weight gradient sample -├── CMakeLists.txt # CMake configuration file -├── pyproject.toml # Python project configuration +│ ├── bn_inference.py # Batch normalization inference sample (DISABLED) +│ ├── conv_fprop.py # Convolution forward propagation sample +│ ├── conv_dgrad.py # Convolution backward data gradient sample +│ └── conv_wgrad.py # Convolution backward weight gradient sample +├── CMakeLists.txt # CMake configuration (scikit-build-core + subdirectory dual-mode) +├── pyproject.toml # Python project configuration (scikit-build-core backend) └── README.md # Project documentation ``` ## Prerequisites -- CMake 3.15 or higher +- CMake 3.18 or higher - A C++ compiler with C++17 support (e.g. clang++) -- Python 3.8 or higher +- Python 3.9 or higher - ROCm/HIP runtime and libraries - hipDNN frontend library (built and installed) @@ -61,35 +61,28 @@ pip install --upgrade pip ### 2. Building and Installing the Python Bindings -The Python bindings use setuptools to handle the build process automatically through pip: +The Python bindings use [scikit-build-core](https://scikit-build-core.readthedocs.io/) as the build backend, which drives CMake automatically through pip. + +hipDNN must be installed (e.g. at `/opt/rocm`) before building the bindings: ```bash # Navigate to the hipdnn python directory cd python -export CMAKE_PREFIX_PATH=/path/to/hipdnn/install:$CMAKE_PREFIX_PATH pip install -v . - -or - -pip install -v . --config-settings=cmake.define.CMAKE_PREFIX_PATH=/path/to/hipdnn/install ``` -### 3. Development Installation +If hipDNN is installed somewhere other than `/opt/rocm`, pass the prefix: -For development work where you want to rebuild and apply the changes you have two options: - -#### Editable Installation ```bash -# from within the hipdnn python directory -pip install -e . +pip install -v . -Ccmake.define.CMAKE_PREFIX_PATH=/path/to/hipdnn/install ``` -#### Uninstall and Reinstall Flow -Instead of doing an editable install, if you prefer a full reinstall after C++ changes: +### 3. Development Installation + +After C++ changes, uninstall and reinstall: ```bash -# from within the hipdnn python directory pip uninstall hipdnn-frontend -y pip install -v . ``` From ddf98f1423391197dedb51360329cd73fb723b4f Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 20 May 2026 15:24:55 -0400 Subject: [PATCH 54/97] Add cmake to python bindings build requirements Restore cmake>=3.18 as a build dependency so pip installs it automatically in environments without a system cmake. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index ae61202e3df..4ebe4b91154 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: MIT [build-system] -requires = ["scikit-build-core>=0.10", "nanobind>=2.0"] +requires = ["scikit-build-core>=0.10", "nanobind>=2.0", "cmake>=3.18"] build-backend = "scikit_build_core.build" [project] From 2221111dc00fa25c1d094a13606c552f974b251b Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 20 May 2026 16:07:54 -0400 Subject: [PATCH 55/97] Address PR review feedback for python bindings packaging - Add DOWNLOAD_EXTRACT_TIMESTAMP TRUE to nanobind FetchContent for CMake >= 3.24 consistency - Make ROCM_PATH configurable in standalone SKBUILD path instead of hardcoding /opt/rocm - Enable HIPDNN_BUILD_PYTHON_BINDINGS in clang-tidy CI so python3-dev dependency is used - Tighten nanobind version floor in pyproject.toml to >=2.12.0 to match FetchContent pin Co-Authored-By: Claude Opus 4 --- .github/workflows/clang-tidy.yml | 3 ++- projects/hipdnn/cmake/Dependencies.cmake | 1 + projects/hipdnn/python/CMakeLists.txt | 5 ++++- projects/hipdnn/python/pyproject.toml | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 4179cc7b8e8..2af00fec7fe 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -99,7 +99,8 @@ jobs: -DROCM_PATH=/opt/rocm \ -DCMAKE_PREFIX_PATH=/opt/rocm \ -DENABLE_CLANG_FORMAT=OFF \ - -DHIP_PLATFORM=amd + -DHIP_PLATFORM=amd \ + -DHIPDNN_BUILD_PYTHON_BINDINGS=ON ninja -C build-hipdnn tidy # Disabled: see https://github.com/ROCm/rocm-libraries/issues/5067 diff --git a/projects/hipdnn/cmake/Dependencies.cmake b/projects/hipdnn/cmake/Dependencies.cmake index a28a16e0cb8..ec4c233d381 100644 --- a/projects/hipdnn/cmake/Dependencies.cmake +++ b/projects/hipdnn/cmake/Dependencies.cmake @@ -273,6 +273,7 @@ function(_fetch_nanobind VERSION HASH) nanobind GIT_REPOSITORY https://github.com/wjakob/nanobind.git GIT_TAG v${VERSION} + DOWNLOAD_EXTRACT_TIMESTAMP TRUE ) fetchcontent_makeavailable(nanobind) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index b4f39c157f6..eac12ff1709 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -13,7 +13,10 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) if(DEFINED SKBUILD) - list(APPEND CMAKE_PREFIX_PATH /opt/rocm) + if(NOT DEFINED ROCM_PATH) + set(ROCM_PATH "/opt/rocm") + endif() + list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}") find_package(nanobind REQUIRED CONFIG) find_package(hipdnn_frontend REQUIRED CONFIG) find_package(hipdnn_backend REQUIRED CONFIG) diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index 4ebe4b91154..dec4eef64b0 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -2,7 +2,7 @@ # SPDX-License-Identifier: MIT [build-system] -requires = ["scikit-build-core>=0.10", "nanobind>=2.0", "cmake>=3.18"] +requires = ["scikit-build-core>=0.10", "nanobind>=2.12.0", "cmake>=3.18"] build-backend = "scikit_build_core.build" [project] From 6bc2aeaaf41f1eaf541011f3edb1f816750917fb Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 20 May 2026 16:15:41 -0400 Subject: [PATCH 56/97] Guard project() and cmake settings behind SKBUILD in python CMakeLists - Wrap cmake_minimum_required, project(), and C++ standard settings in if(DEFINED SKBUILD) to avoid resetting CMAKE_INSTALL_PREFIX when used as a subdirectory - Fix DOWNLOAD_EXTRACT_TIMESTAMP formatting in tsl-robin-map for consistency Co-Authored-By: Claude Opus 4 --- projects/hipdnn/cmake/Dependencies.cmake | 3 +-- projects/hipdnn/python/CMakeLists.txt | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/projects/hipdnn/cmake/Dependencies.cmake b/projects/hipdnn/cmake/Dependencies.cmake index ec4c233d381..78609232f19 100644 --- a/projects/hipdnn/cmake/Dependencies.cmake +++ b/projects/hipdnn/cmake/Dependencies.cmake @@ -255,8 +255,7 @@ function(_fetch_tsl-robin-map VERSION HASH) tsl-robin-map GIT_REPOSITORY https://github.com/Tessil/robin-map.git GIT_TAG v${VERSION} - DOWNLOAD_EXTRACT_TIMESTAMP - TRUE + DOWNLOAD_EXTRACT_TIMESTAMP TRUE ) fetchcontent_makeavailable(tsl-robin-map) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index eac12ff1709..3eacaa56cee 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -1,15 +1,15 @@ # Copyright © Advanced Micro Devices, Inc., or its affiliates. # SPDX-License-Identifier: MIT -cmake_minimum_required(VERSION 3.18) +if(DEFINED SKBUILD) + cmake_minimum_required(VERSION 3.18) + project(hipdnn_python_bindings LANGUAGES CXX) + set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD_REQUIRED ON) +endif() set(CMAKE_POSITION_INDEPENDENT_CODE ON) -project(hipdnn_python_bindings LANGUAGES CXX) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) if(DEFINED SKBUILD) From 3a69eed45925061b46ca222913d45e5fc2b988ab Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 20 May 2026 16:48:30 -0400 Subject: [PATCH 57/97] Use repo-standard ROCM_PATH detection pattern in python CMakeLists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reuse the ENV{ROCM_PATH} → CACHE fallback pattern used across the repo's toolchain files instead of a custom check. Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 3eacaa56cee..983d0f7a6fd 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -13,9 +13,10 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) if(DEFINED SKBUILD) - if(NOT DEFINED ROCM_PATH) - set(ROCM_PATH "/opt/rocm") + if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Path to the ROCm installation.") endif() + set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to the ROCm installation.") list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}") find_package(nanobind REQUIRED CONFIG) find_package(hipdnn_frontend REQUIRED CONFIG) From f4f0e4b171f6cea29534df9a99e303dbe9733a38 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 20 May 2026 17:16:03 -0400 Subject: [PATCH 58/97] Update Python bindings README for review feedback - Add subdirectory build section documenting HIPDNN_BUILD_PYTHON_BINDINGS - Add bindings.hpp and test/ to project structure - Collapse samples into single line, add matmul sample - Move hipDNN install prerequisite to standalone build section - Remove verbose per-sample descriptions Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/README.md | 74 +++++++++++++------------------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/projects/hipdnn/python/README.md b/projects/hipdnn/python/README.md index a8294ffa5b3..724be7eaedd 100644 --- a/projects/hipdnn/python/README.md +++ b/projects/hipdnn/python/README.md @@ -7,12 +7,11 @@ This project provides Python bindings for the hipDNN frontend library using the ## Project Structure -The project is organized as follows: - ``` python ├── src │ ├── module.cpp # Main entry point for the nanobind module +│ ├── bindings.hpp # Shared declarations for binding functions │ ├── graph_bindings.cpp # Bindings for the Graph class and its methods │ ├── handle_bindings.cpp # Bindings for handle management │ ├── memory_bindings.cpp # Bindings for device memory management @@ -21,11 +20,8 @@ python │ └── types_bindings.cpp # Bindings for custom types and enums ├── hipdnn_frontend │ ├── __init__.py # Initializes the hipdnn_frontend package -│ └── samples -│ ├── bn_inference.py # Batch normalization inference sample (DISABLED) -│ ├── conv_fprop.py # Convolution forward propagation sample -│ ├── conv_dgrad.py # Convolution backward data gradient sample -│ └── conv_wgrad.py # Convolution backward weight gradient sample +│ ├── samples/ # Sample scripts (conv_fprop, conv_dgrad, conv_wgrad, matmul) +│ └── test/ # Tests for the Python bindings ├── CMakeLists.txt # CMake configuration (scikit-build-core + subdirectory dual-mode) ├── pyproject.toml # Python project configuration (scikit-build-core backend) └── README.md # Project documentation @@ -37,11 +33,28 @@ python - A C++ compiler with C++17 support (e.g. clang++) - Python 3.9 or higher - ROCm/HIP runtime and libraries -- hipDNN frontend library (built and installed) -## Getting Started +## Building + +There are two ways to build the Python bindings: + +### Subdirectory build (via parent hipDNN CMake) -### 1. Setting up a Python Virtual Environment +When building hipDNN from the project root, enable the `HIPDNN_BUILD_PYTHON_BINDINGS` option (off by default): + +```bash +cmake -S projects/hipdnn -B build -GNinja -DHIPDNN_BUILD_PYTHON_BINDINGS=ON +cmake --build build +cmake --install build --prefix /path/to/install +``` + +The bindings are staged to `share/hipdnn/python/hipdnn_frontend/` under the install prefix. + +### Standalone build (via pip) + +hipDNN must already be built and installed (e.g. at `/opt/rocm`). + +#### 1. Setting up a Python Virtual Environment It's recommended to use a Python virtual environment to isolate the project dependencies: @@ -59,11 +72,7 @@ source hipdnn_env/bin/activate pip install --upgrade pip ``` -### 2. Building and Installing the Python Bindings - -The Python bindings use [scikit-build-core](https://scikit-build-core.readthedocs.io/) as the build backend, which drives CMake automatically through pip. - -hipDNN must be installed (e.g. at `/opt/rocm`) before building the bindings: +#### 2. Building and Installing ```bash # Navigate to the hipdnn python directory @@ -78,7 +87,7 @@ If hipDNN is installed somewhere other than `/opt/rocm`, pass the prefix: pip install -v . -Ccmake.define.CMAKE_PREFIX_PATH=/path/to/hipdnn/install ``` -### 3. Development Installation +#### 3. Development Installation After C++ changes, uninstall and reinstall: @@ -87,34 +96,13 @@ pip uninstall hipdnn-frontend -y pip install -v . ``` -### 4. Running the Sample Applications - -The repository includes several sample applications demonstrating different operations: - -#### Convolution Forward Propagation -```bash -python conv_fprop.py -``` - -This sample demonstrates: -- Setting up a convolution forward pass -- Configuring padding, stride, and dilation parameters -- Executing the convolution and displaying results +## Running the Samples -#### Convolution Backward Data Gradient -```bash -python conv_dgrad.py -``` +Sample scripts are located in `hipdnn_frontend/samples/`: -This sample demonstrates: -- Computing input gradients (dx) given output gradients (dy) and weights -- Used in backpropagation for training neural networks - -#### Convolution Backward Weight Gradient ```bash -python conv_wgrad.py +python hipdnn_frontend/samples/conv_fprop.py +python hipdnn_frontend/samples/conv_dgrad.py +python hipdnn_frontend/samples/conv_wgrad.py +python hipdnn_frontend/samples/matmul.py ``` - -This sample demonstrates: -- Computing weight gradients (dw) given output gradients (dy) and input (x) -- Used for updating convolution filter weights during training From 6ef9fd87f4aa2a50e01ceb16695c28d59a0ecb2d Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 20 May 2026 17:19:22 -0400 Subject: [PATCH 59/97] Improve HIPDNN_BUILD_PYTHON_BINDINGS description in README Co-Authored-By: Claude Opus 4 --- projects/hipdnn/python/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/hipdnn/python/README.md b/projects/hipdnn/python/README.md index 724be7eaedd..89f2b58f055 100644 --- a/projects/hipdnn/python/README.md +++ b/projects/hipdnn/python/README.md @@ -40,7 +40,7 @@ There are two ways to build the Python bindings: ### Subdirectory build (via parent hipDNN CMake) -When building hipDNN from the project root, enable the `HIPDNN_BUILD_PYTHON_BINDINGS` option (off by default): +Set `HIPDNN_BUILD_PYTHON_BINDINGS=ON` to compile the nanobind extension module alongside hipDNN and stage it for packaging. It is off by default because it requires Python development headers and fetches nanobind/tsl-robin-map as additional dependencies. ```bash cmake -S projects/hipdnn -B build -GNinja -DHIPDNN_BUILD_PYTHON_BINDINGS=ON From aeb90a5e11a6fd204be0759b830c7cd4ff5795bd Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 21 May 2026 18:38:26 -0400 Subject: [PATCH 60/97] Preload hipDNN backend library via rocm_sdk for wheel installs When installed via ROCm wheels, libhipdnn_backend.so lives in a separate package directory outside LD_LIBRARY_PATH. Use rocm_sdk.preload_libraries to load it with RTLD_GLOBAL before importing the extension module. Co-Authored-By: Claude Opus 4 --- .../hipdnn/python/hipdnn_frontend/__init__.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/projects/hipdnn/python/hipdnn_frontend/__init__.py b/projects/hipdnn/python/hipdnn_frontend/__init__.py index 3f0ab0c6a65..4c65d8e85d7 100644 --- a/projects/hipdnn/python/hipdnn_frontend/__init__.py +++ b/projects/hipdnn/python/hipdnn_frontend/__init__.py @@ -9,6 +9,26 @@ high-level Python interface. """ +# Preload hipDNN backend library when installed via ROCm wheels. +# The Python extension (hipdnn_frontend_python.so) depends on libhipdnn_backend.so +# which lives in a separate wheel package directory — not on LD_LIBRARY_PATH. +# rocm_sdk.preload_libraries loads it with RTLD_GLOBAL so the extension finds it. +try: + import rocm_sdk +except ImportError: + rocm_sdk = None + +if rocm_sdk is not None: + try: + rocm_sdk.preload_libraries("hipdnn") + except Exception as e: + raise ImportError( + "Failed to preload libhipdnn_backend.so via rocm_sdk. " + "Ensure the hipDNN library package is installed " + "(e.g., pip install rocm[libraries]).\n" + f"Original error: {e}" + ) from e + # Import everything from the compiled extension module try: # The compiled extension module From a74e562d8e726e77976d9617aca1c0ae62b93a5d Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 16:01:53 +0000 Subject: [PATCH 61/97] Document TheRock CI integration in ai-rules.md Adds a CI System section describing TheRock as hipDNN's CI build system, the TheRock hash pin location, the CI build flow, the 3rd-party dependency constraints (no FetchContent from GitHub), the CI workflow files, and the hipDNN CMake flags set by therock_matrix.py. Co-Authored-By: Claude Opus 4.7 --- projects/hipdnn/docs/ai-rules.md | 57 ++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/projects/hipdnn/docs/ai-rules.md b/projects/hipdnn/docs/ai-rules.md index d4c2ba25d54..5f8f6eef6d8 100644 --- a/projects/hipdnn/docs/ai-rules.md +++ b/projects/hipdnn/docs/ai-rules.md @@ -150,6 +150,63 @@ When requested to build/test: --- +## CI System (TheRock) + +hipDNN CI runs through [TheRock](https://github.com/ROCm/TheRock), AMD's unified ROCm build system. GitHub Actions check out TheRock at a pinned commit hash, then TheRock builds and tests hipDNN as one of its components. + +### TheRock Hash Pin + +The TheRock commit hash is pinned in `.github/actions/ci-env/action.yml` (the `therock-ref` output). All CI workflows read from this single source of truth. When updating the TheRock pin, also update the inline hash in `.github/workflows/therock-test-packages.yml` and `.github/workflows/therock-test-component.yml` which have secondary copies. + +### CI Build Flow + +1. **Checkout**: GitHub Actions checks out TheRock at the pinned hash into a `TheRock/` subdirectory alongside the rocm-libraries checkout. +2. **DVC pull**: `dvc pull -v` hydrates large binary files (MIOpen kernel DBs, benchmarking workloads) from the S3 remote (`s3://therock-dvc/rocm-libraries`). +3. **Source fetch**: `TheRock/build_tools/fetch_sources.py` pre-fetches all third-party sources TheRock needs. This runs before CMake configure so the build is fully offline afterward. +4. **Configure**: CMake configures with `THEROCK_ROCM_LIBRARIES_SOURCE_DIR` pointing to the local rocm-libraries checkout and project-specific flags (e.g., `THEROCK_ENABLE_HIPDNN_INTEGRATION_TESTS`). +5. **Build**: `cmake --build TheRock/build --target therock-archives therock-dist`. +6. **Upload**: Built artifacts are uploaded to S3 so GPU test runners can download them. +7. **Test**: A separate GPU runner downloads the artifacts and runs component test scripts inside a container. + +### 3rd Party Dependencies and FetchContent + +**All 3rd party dependencies are pre-staged from S3 in CI — do not add CMake `FetchContent` calls that download from GitHub at configure time.** TheRock's `fetch_sources.py` handles dependency fetching before CMake runs. A `FetchContent` that hits GitHub during the build will fail in CI. + +hipDNN's `cmake/Dependencies.cmake` has a `HIPDNN_NO_DOWNLOAD` option that sets `FETCHCONTENT_FULLY_DISCONNECTED`. Dependencies declared via `FetchContent` (GTest, flatbuffers, spdlog, nlohmann_json, tsl-robin-map, nanobind) must also be findable as pre-installed packages so TheRock can supply them. + +When adding a new dependency: +- Add it to `cmake/Dependencies.cmake` with `find_package()` first, `FetchContent` as fallback +- Coordinate with TheRock to ensure the dependency is included in `fetch_sources.py` +- The S3 DVC remote (`s3://therock-dvc/rocm-libraries`) stores large binary assets, not source dependencies + +### CI Workflow Files + +| Workflow | Purpose | +|----------|---------| +| `therock-ci.yml` | Top-level orchestrator for PR/push CI | +| `therock-ci-linux.yml` | Reusable Linux build+test workflow | +| `therock-ci-windows.yml` | Reusable Windows build+test workflow | +| `therock-test-packages.yml` | Per-component test matrix fan-out | +| `therock-test-component.yml` | Individual test component with sharding | +| `hipdnn-superbuild-ci.yml` | Superbuild CI using pre-built ROCm wheels (no TheRock source build) | +| `hipdnn-python-tests.yml` | Python binding tests | + +### hipDNN CMake Flags in TheRock CI + +These flags are set by `.github/scripts/therock_matrix.py` when hipDNN changes are detected: + +``` +THEROCK_ENABLE_HIPBLASLTPROVIDER=ON +THEROCK_ENABLE_HIPKERNELPROVIDER=ON +THEROCK_ENABLE_MIOPENPROVIDER=ON +THEROCK_ENABLE_HIPDNN_SAMPLES=ON +THEROCK_ENABLE_COMPOSABLE_KERNEL=ON +THEROCK_ENABLE_HIPDNN_INTEGRATION_TESTS=ON +THEROCK_ENABLE_IREE_LIBS=ON +``` + +--- + ## C++ Code Style ### Naming Conventions From 4ba5d60ee23e5439d3ebe6ab518c4135bfa73fd4 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 16:53:07 +0000 Subject: [PATCH 62/97] [hipDNN] Fix clang-tidy findings in backend and data_sdk - EnginePluginResourceManager dtor: mark cleanup lambda noexcept and wrap log calls / iteration in try/catch so the destructor cannot throw (bugprone-exception-escape). - PlatformUtils.linux.hpp: make dlsym() result pointer const (misc-const-correctness). - GraphDescriptor::toString: use char literal '}' instead of string literal "}" (performance-faster-string-find). --- .../src/descriptors/GraphDescriptor.cpp | 2 +- .../plugin/EnginePluginResourceManager.cpp | 60 ++++++++++++------- .../utilities/PlatformUtils.linux.hpp | 2 +- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/projects/hipdnn/backend/src/descriptors/GraphDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/GraphDescriptor.cpp index 9003cce2868..bcd0d9bf64b 100644 --- a/projects/hipdnn/backend/src/descriptors/GraphDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/GraphDescriptor.cpp @@ -505,7 +505,7 @@ std::string GraphDescriptor::toString() const str += _handle != nullptr ? fmt::format("{:p}", static_cast(_handle)) : "null"; str += ", name=" + (_name.empty() ? std::string("(empty)") : _name); str += ", serializedGraphSize=" + std::to_string(_graphSerializedBuffer.size()); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp index eaaea25272c..c4a44fd2df0 100644 --- a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp +++ b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp @@ -280,30 +280,48 @@ EnginePluginResourceManager::EnginePluginResourceManager(std::shared_ptrdestroyHandle(handle); - } - catch(const std::exception& e) - { - HIPDNN_BACKEND_LOG_WARN("Failed to destroy handle for plugin '{}' during cleanup: {}", - plugin->name(), - e.what()); - } - catch(...) + auto safeDestroyHandle + = [](const EnginePlugin* plugin, hipdnnEnginePluginHandle_t handle) noexcept { + try + { + plugin->destroyHandle(handle); + } + catch(const std::exception& e) + { + try + { + HIPDNN_BACKEND_LOG_WARN( + "Failed to destroy handle for plugin '{}' during cleanup: {}", + plugin->name(), + e.what()); + } + catch(...) + { + } + } + catch(...) + { + try + { + HIPDNN_BACKEND_LOG_WARN( + "Failed to destroy handle for plugin '{}' during cleanup: unknown error", + plugin->name()); + } + catch(...) + { + } + } + }; + + try + { + for(const auto& [handle, plugin] : _handleToPlugin) { - HIPDNN_BACKEND_LOG_WARN( - "Failed to destroy handle for plugin '{}' during cleanup: unknown error", - plugin->name()); + safeDestroyHandle(plugin, handle); } - }; - - // Destroy plugin handles - for(const auto& [handle, plugin] : _handleToPlugin) + } + catch(...) { - safeDestroyHandle(plugin, handle); } } diff --git a/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp b/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp index be08fb995e7..accd99aa3b1 100644 --- a/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp +++ b/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp @@ -96,7 +96,7 @@ inline void* getSymbol(SharedLibraryHandle handle, const char* symbolName) inline std::filesystem::path getLoadedLibraryDirectoryForSymbol(const char* symbolName) { auto _ = dlerror(); - void* symbol = dlsym(RTLD_DEFAULT, symbolName); + void* const symbol = dlsym(RTLD_DEFAULT, symbolName); const char* error = dlerror(); if(error != nullptr) { From 8b885f36722b0f02b8e69c7f603cd698fe0f863d Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 17:08:13 +0000 Subject: [PATCH 63/97] [hipDNN] Simplify EnginePluginResourceManager dtor exception handling Remove the noexcept marker from the inner cleanup lambda and the nested try/catch around each log call. Per-iteration try/catch around safeDestroyHandle keeps cleanup of remaining handles going if a log call throws, and keeps the destructor from escaping an exception (bugprone-exception-escape). --- .../plugin/EnginePluginResourceManager.cpp | 60 +++++++------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp index c4a44fd2df0..f51e97445f2 100644 --- a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp +++ b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp @@ -280,48 +280,34 @@ EnginePluginResourceManager::EnginePluginResourceManager(std::shared_ptrdestroyHandle(handle); - } - catch(const std::exception& e) - { - try - { - HIPDNN_BACKEND_LOG_WARN( - "Failed to destroy handle for plugin '{}' during cleanup: {}", - plugin->name(), - e.what()); - } - catch(...) - { - } - } - catch(...) - { - try - { - HIPDNN_BACKEND_LOG_WARN( - "Failed to destroy handle for plugin '{}' during cleanup: unknown error", - plugin->name()); - } - catch(...) - { - } - } - }; + auto safeDestroyHandle = [](const EnginePlugin* plugin, hipdnnEnginePluginHandle_t handle) { + try + { + plugin->destroyHandle(handle); + } + catch(const std::exception& e) + { + HIPDNN_BACKEND_LOG_WARN("Failed to destroy handle for plugin '{}' during cleanup: {}", + plugin->name(), + e.what()); + } + catch(...) + { + HIPDNN_BACKEND_LOG_WARN( + "Failed to destroy handle for plugin '{}' during cleanup: unknown error", + plugin->name()); + } + }; - try + for(const auto& [handle, plugin] : _handleToPlugin) { - for(const auto& [handle, plugin] : _handleToPlugin) + try { safeDestroyHandle(plugin, handle); } - } - catch(...) - { + catch(...) + { + } } } From b46a0833fc56c0137277be859e0750bbd0d8d740 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 17:08:41 +0000 Subject: [PATCH 64/97] [hipDNN] Document why try/catch needed around safeDestroyHandle --- .../hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp index f51e97445f2..95a13130197 100644 --- a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp +++ b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp @@ -301,6 +301,10 @@ EnginePluginResourceManager::~EnginePluginResourceManager() for(const auto& [handle, plugin] : _handleToPlugin) { + // Destructors are implicitly noexcept: a log call inside safeDestroyHandle + // (fmt formatting, sink I/O, allocation) can throw, which would call + // std::terminate. Swallow per-iteration so remaining handles still get + // cleaned up. try { safeDestroyHandle(plugin, handle); From 22eaa0b36a9775ffa9356a0be1b279b49140beee Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 17:09:56 +0000 Subject: [PATCH 65/97] [hipDNN] Restore lambda and loop section comments in dtor --- .../hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp index 95a13130197..c81b0effe75 100644 --- a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp +++ b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp @@ -280,6 +280,7 @@ EnginePluginResourceManager::EnginePluginResourceManager(std::shared_ptr Date: Tue, 26 May 2026 17:10:27 +0000 Subject: [PATCH 66/97] [hipDNN] Trim dtor exception comment --- .../hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp index c81b0effe75..f482eb412fc 100644 --- a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp +++ b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp @@ -304,9 +304,7 @@ EnginePluginResourceManager::~EnginePluginResourceManager() for(const auto& [handle, plugin] : _handleToPlugin) { // Destructors are implicitly noexcept: a log call inside safeDestroyHandle - // (fmt formatting, sink I/O, allocation) can throw, which would call - // std::terminate. Swallow per-iteration so remaining handles still get - // cleaned up. + // (fmt formatting, sink I/O, allocation) can throw. try { safeDestroyHandle(plugin, handle); From d7c423517487cd517f7bbfc81fff189a9ab37cff Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 17:20:51 +0000 Subject: [PATCH 67/97] [hipDNN] Suppress bugprone-empty-catch on dtor cleanup loop clang-tidy bugprone-empty-catch -> warning treated as error in CI. The catch is intentionally empty: re-logging from here would recurse into the same throwing fmt/sink path the outer catch is guarding against. --- .../hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp index f482eb412fc..652f348a4e6 100644 --- a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp +++ b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp @@ -309,6 +309,7 @@ EnginePluginResourceManager::~EnginePluginResourceManager() { safeDestroyHandle(plugin, handle); } + // NOLINTNEXTLINE(bugprone-empty-catch): re-logging here would recurse into the same throw catch(...) { } From 22175f72ef2a78a0bfe9a72947f34ea0e4c58271 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 14:19:00 -0400 Subject: [PATCH 68/97] [hipDNN] Address additional clang-tidy findings - PlatformUtils.linux.hpp: add pointee const to dlsym result (misc-const-correctness) - EngineDescriptor.cpp: use char literal '}' for string append (performance-faster-string-find) - MiopenBatchnormFwdTrainingPlan.cpp: hoist prev/next running mean/variance pointers to void* const via ternary (misc-const-correctness) - MiopenBatchnormBwdPlan.cpp: hoist biasPtr to void* const via ternary (misc-const-correctness) --- .../engines/plans/MiopenBatchnormBwdPlan.cpp | 12 +++-- .../plans/MiopenBatchnormFwdTrainingPlan.cpp | 49 ++++++++++--------- .../src/descriptors/EngineDescriptor.cpp | 2 +- .../utilities/PlatformUtils.linux.hpp | 2 +- 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormBwdPlan.cpp b/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormBwdPlan.cpp index 35f9bec1491..a8d3412299d 100644 --- a/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormBwdPlan.cpp +++ b/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormBwdPlan.cpp @@ -197,16 +197,18 @@ void BatchnormBwdPlan::execute(const HipdnnMiopenHandle& handle, } // For non-fused case, scale descriptor and bias descriptor are equivalent + const bool fused = _params.optActivation().has_value() && _params.optBias().has_value(); miopenTensorDescriptor_t biasDescriptor = _params.scale().tensorDescriptor(); - void* biasPtr = nullptr; miopenActivationDescriptor_t activationDescriptor = nullptr; + void* const biasPtr = fused ? miopen_utils::findDeviceBuffer(_params.optBias().value().uid(), + deviceBuffers, + numDeviceBuffers) + .ptr + : nullptr; - if(_params.optActivation().has_value() && _params.optBias().has_value()) + if(fused) { - auto biasBuffer = miopen_utils::findDeviceBuffer( - _params.optBias().value().uid(), deviceBuffers, numDeviceBuffers); biasDescriptor = _params.optBias().value().tensorDescriptor(); - biasPtr = biasBuffer.ptr; activationDescriptor = _params.optActivation().value().activationDescriptor(); } diff --git a/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormFwdTrainingPlan.cpp b/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormFwdTrainingPlan.cpp index 5dbf487a96b..282c67bfeaf 100644 --- a/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormFwdTrainingPlan.cpp +++ b/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormFwdTrainingPlan.cpp @@ -310,30 +310,31 @@ void BatchnormFwdTrainingPlan::execute(const HipdnnMiopenHandle& handle, savedVarDesc = _trainingParams.invVariance().tensorDescriptor(); } - void* prevRunningMeanPtr = nullptr; - void* prevRunningVariancePtr = nullptr; - void* nextRunningMeanPtr = nullptr; - void* nextRunningVariancePtr = nullptr; - - if(_trainingParams.hasRunningStats()) - { - prevRunningMeanPtr = miopen_utils::findDeviceBuffer(_trainingParams.prevRunningMean().uid(), - deviceBuffers, - numDeviceBuffers) - .ptr; - prevRunningVariancePtr - = miopen_utils::findDeviceBuffer( - _trainingParams.prevRunningVariance().uid(), deviceBuffers, numDeviceBuffers) - .ptr; - nextRunningMeanPtr = miopen_utils::findDeviceBuffer(_trainingParams.nextRunningMean().uid(), - deviceBuffers, - numDeviceBuffers) - .ptr; - nextRunningVariancePtr - = miopen_utils::findDeviceBuffer( - _trainingParams.nextRunningVariance().uid(), deviceBuffers, numDeviceBuffers) - .ptr; - } + const bool hasRunningStats = _trainingParams.hasRunningStats(); + void* const prevRunningMeanPtr + = hasRunningStats ? miopen_utils::findDeviceBuffer(_trainingParams.prevRunningMean().uid(), + deviceBuffers, + numDeviceBuffers) + .ptr + : nullptr; + void* const prevRunningVariancePtr + = hasRunningStats + ? miopen_utils::findDeviceBuffer( + _trainingParams.prevRunningVariance().uid(), deviceBuffers, numDeviceBuffers) + .ptr + : nullptr; + void* const nextRunningMeanPtr + = hasRunningStats ? miopen_utils::findDeviceBuffer(_trainingParams.nextRunningMean().uid(), + deviceBuffers, + numDeviceBuffers) + .ptr + : nullptr; + void* const nextRunningVariancePtr + = hasRunningStats + ? miopen_utils::findDeviceBuffer( + _trainingParams.nextRunningVariance().uid(), deviceBuffers, numDeviceBuffers) + .ptr + : nullptr; // Check if activation fusion is enabled const auto& optActivation = _trainingParams.optActivation(); diff --git a/projects/hipdnn/backend/src/descriptors/EngineDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/EngineDescriptor.cpp index e662dedbc5a..51478d8bd55 100644 --- a/projects/hipdnn/backend/src/descriptors/EngineDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/EngineDescriptor.cpp @@ -411,7 +411,7 @@ std::string EngineDescriptor::toString() const str += _engineIdSet ? std::to_string(_engineId) : "unset"; str += _graph ? ", graph=" + fmt::format("{:p}", static_cast(_graph.get())) : ", graph=null"; - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp b/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp index accd99aa3b1..d094b5a6c71 100644 --- a/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp +++ b/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp @@ -96,7 +96,7 @@ inline void* getSymbol(SharedLibraryHandle handle, const char* symbolName) inline std::filesystem::path getLoadedLibraryDirectoryForSymbol(const char* symbolName) { auto _ = dlerror(); - void* const symbol = dlsym(RTLD_DEFAULT, symbolName); + const void* const symbol = dlsym(RTLD_DEFAULT, symbolName); const char* error = dlerror(); if(error != nullptr) { From 8aaa3c732cc783643f7d7b25e367a788dbd566fb Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 14:35:40 -0400 Subject: [PATCH 69/97] fix: replace single-char string literals with char literals in descriptor codegen clang-tidy performance-faster-string-find treats `str += "}"` and `str += "]"` as warnings (treated as errors in CI). Replace with `str += '}';` and `str += ']';` in all generated descriptor `.cpp` files and the `descriptor.cpp.j2` template that produces them. --- .../descriptors/BatchnormBackwardOperationDescriptor.cpp | 2 +- .../descriptors/BatchnormInferenceOperationDescriptor.cpp | 2 +- .../BatchnormInferenceVarianceExtOperationDescriptor.cpp | 2 +- .../src/descriptors/BatchnormOperationDescriptor.cpp | 2 +- .../descriptors/BlockScaleDequantizeOperationDescriptor.cpp | 2 +- .../descriptors/BlockScaleQuantizeOperationDescriptor.cpp | 2 +- .../src/descriptors/ConvolutionBwdOperationDescriptor.cpp | 2 +- .../src/descriptors/ConvolutionFwdOperationDescriptor.cpp | 2 +- .../src/descriptors/ConvolutionWrwOperationDescriptor.cpp | 2 +- .../backend/src/descriptors/CustomOpOperationDescriptor.cpp | 2 +- .../backend/src/descriptors/EngineHeuristicDescriptor.cpp | 4 ++-- .../backend/src/descriptors/ExecutionPlanDescriptor.cpp | 2 +- projects/hipdnn/backend/src/descriptors/KnobDescriptor.cpp | 6 +++--- .../backend/src/descriptors/KnobSettingDescriptor.cpp | 2 +- .../src/descriptors/LayernormOperationDescriptor.cpp | 2 +- .../backend/src/descriptors/MatmulOperationDescriptor.cpp | 2 +- .../src/descriptors/PointwiseOperationDescriptor.cpp | 2 +- .../src/descriptors/RMSNormBackwardOperationDescriptor.cpp | 2 +- .../backend/src/descriptors/RMSNormOperationDescriptor.cpp | 2 +- .../src/descriptors/ReductionOperationDescriptor.cpp | 2 +- .../src/descriptors/ResampleFwdOperationDescriptor.cpp | 2 +- .../backend/src/descriptors/SdpaBwdOperationDescriptor.cpp | 2 +- .../backend/src/descriptors/SdpaFwdOperationDescriptor.cpp | 2 +- .../hipdnn/backend/src/descriptors/TensorDescriptor.cpp | 2 +- .../hipdnn/backend/src/descriptors/VariantDescriptor.cpp | 2 +- .../tools/DescriptorGenerator/templates/descriptor.cpp.j2 | 2 +- 26 files changed, 29 insertions(+), 29 deletions(-) diff --git a/projects/hipdnn/backend/src/descriptors/BatchnormBackwardOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BatchnormBackwardOperationDescriptor.cpp index e634dff8d24..0d1b2edb697 100644 --- a/projects/hipdnn/backend/src/descriptors/BatchnormBackwardOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BatchnormBackwardOperationDescriptor.cpp @@ -341,7 +341,7 @@ std::string BatchnormBackwardOperationDescriptor::toString() const str += ", peer_stats_uids=" + vecToString(_data.peer_stats_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BatchnormInferenceOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BatchnormInferenceOperationDescriptor.cpp index b895ed659b9..2f7768a433e 100644 --- a/projects/hipdnn/backend/src/descriptors/BatchnormInferenceOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BatchnormInferenceOperationDescriptor.cpp @@ -260,7 +260,7 @@ std::string BatchnormInferenceOperationDescriptor::toString() const str += ", y_uid=" + std::to_string(_data.y_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BatchnormInferenceVarianceExtOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BatchnormInferenceVarianceExtOperationDescriptor.cpp index 1941f0c0ce2..34af06a703a 100644 --- a/projects/hipdnn/backend/src/descriptors/BatchnormInferenceVarianceExtOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BatchnormInferenceVarianceExtOperationDescriptor.cpp @@ -290,7 +290,7 @@ std::string BatchnormInferenceVarianceExtOperationDescriptor::toString() const str += ", epsilon_uid=" + std::to_string(_data.epsilon_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BatchnormOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BatchnormOperationDescriptor.cpp index 225d601fb71..ef4756908fd 100644 --- a/projects/hipdnn/backend/src/descriptors/BatchnormOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BatchnormOperationDescriptor.cpp @@ -450,7 +450,7 @@ std::string BatchnormOperationDescriptor::toString() const str += ", peer_stats_uids=" + vecToString(_data.peer_stats_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BlockScaleDequantizeOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BlockScaleDequantizeOperationDescriptor.cpp index 658f5ceb8b0..71b59d09a0d 100644 --- a/projects/hipdnn/backend/src/descriptors/BlockScaleDequantizeOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BlockScaleDequantizeOperationDescriptor.cpp @@ -245,7 +245,7 @@ std::string BlockScaleDequantizeOperationDescriptor::toString() const str += ", is_negative_scale=" + std::to_string(static_cast(_data.is_negative_scale)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BlockScaleQuantizeOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BlockScaleQuantizeOperationDescriptor.cpp index f3118cff1be..c97f2ea13da 100644 --- a/projects/hipdnn/backend/src/descriptors/BlockScaleQuantizeOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BlockScaleQuantizeOperationDescriptor.cpp @@ -257,7 +257,7 @@ std::string BlockScaleQuantizeOperationDescriptor::toString() const str += std::string(", transpose=") + (_data.transpose ? "true" : "false"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ConvolutionBwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ConvolutionBwdOperationDescriptor.cpp index e48b6c16fec..4aa2922557b 100644 --- a/projects/hipdnn/backend/src/descriptors/ConvolutionBwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ConvolutionBwdOperationDescriptor.cpp @@ -175,7 +175,7 @@ std::string ConvolutionBwdOperationDescriptor::toString() const str += hipdnn_flatbuffers_sdk::data_objects::EnumNameConvMode(_data.conv_mode); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ConvolutionFwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ConvolutionFwdOperationDescriptor.cpp index c2e9cfcae55..3b3e88d5ae4 100644 --- a/projects/hipdnn/backend/src/descriptors/ConvolutionFwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ConvolutionFwdOperationDescriptor.cpp @@ -197,7 +197,7 @@ std::string ConvolutionFwdOperationDescriptor::toString() const str += ", conv_mode=" + std::to_string(static_cast(_data.conv_mode)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ConvolutionWrwOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ConvolutionWrwOperationDescriptor.cpp index 4604a6239b5..1290218a0a8 100644 --- a/projects/hipdnn/backend/src/descriptors/ConvolutionWrwOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ConvolutionWrwOperationDescriptor.cpp @@ -175,7 +175,7 @@ std::string ConvolutionWrwOperationDescriptor::toString() const str += hipdnn_flatbuffers_sdk::data_objects::EnumNameConvMode(_data.conv_mode); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/CustomOpOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/CustomOpOperationDescriptor.cpp index 23038500f6f..f3a426b4970 100644 --- a/projects/hipdnn/backend/src/descriptors/CustomOpOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/CustomOpOperationDescriptor.cpp @@ -243,7 +243,7 @@ std::string CustomOpOperationDescriptor::toString() const str += ", data_size=" + std::to_string(_data.data.size()); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/EngineHeuristicDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/EngineHeuristicDescriptor.cpp index 63bcd78bd77..bea9114925d 100644 --- a/projects/hipdnn/backend/src/descriptors/EngineHeuristicDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/EngineHeuristicDescriptor.cpp @@ -773,9 +773,9 @@ std::string EngineHeuristicDescriptor::toString() const } str += hipdnn_data_sdk::utilities::formatEngineIdHex(_policyOrder[i]); } - str += "]"; + str += ']'; } - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ExecutionPlanDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ExecutionPlanDescriptor.cpp index 107710a14b6..b39eb32d7d4 100644 --- a/projects/hipdnn/backend/src/descriptors/ExecutionPlanDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ExecutionPlanDescriptor.cpp @@ -469,7 +469,7 @@ std::string ExecutionPlanDescriptor::toString() const str += _engineConfig ? ", engineConfig=" + fmt::format("{:p}", static_cast(_engineConfig.get())) : ", engineConfig=null"; - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/KnobDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/KnobDescriptor.cpp index 069f9791a78..a547c3282d2 100644 --- a/projects/hipdnn/backend/src/descriptors/KnobDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/KnobDescriptor.cpp @@ -846,7 +846,7 @@ std::string KnobDescriptor::toString() const { str += "max=" + std::to_string(*_maxValueDouble); } - str += "}"; + str += '}'; } if(!_validValuesString.empty() || _stringMaxLength.has_value()) { @@ -855,10 +855,10 @@ std::string KnobDescriptor::toString() const { str += " maxLen=" + std::to_string(*_stringMaxLength); } - str += "}"; + str += '}'; } - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/KnobSettingDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/KnobSettingDescriptor.cpp index 5f45a0f7056..8841ea9d68d 100644 --- a/projects/hipdnn/backend/src/descriptors/KnobSettingDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/KnobSettingDescriptor.cpp @@ -135,7 +135,7 @@ std::string KnobSettingDescriptor::toString() const { std::string str = "KnobSettingDescriptor: {knobId=" + _knobId; str += ", valueType=" + std::to_string(static_cast(_value.type)); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/LayernormOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/LayernormOperationDescriptor.cpp index 9c14a05f6ad..12004144af2 100644 --- a/projects/hipdnn/backend/src/descriptors/LayernormOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/LayernormOperationDescriptor.cpp @@ -373,7 +373,7 @@ std::string LayernormOperationDescriptor::toString() const str += ", forward_phase=" + std::to_string(static_cast(_data.forward_phase)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/MatmulOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/MatmulOperationDescriptor.cpp index 1a445f2936d..626ac6cfab1 100644 --- a/projects/hipdnn/backend/src/descriptors/MatmulOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/MatmulOperationDescriptor.cpp @@ -193,7 +193,7 @@ std::string MatmulOperationDescriptor::toString() const str += ", c_uid=" + std::to_string(_data.c_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/PointwiseOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/PointwiseOperationDescriptor.cpp index c942ba0e69c..4430317f36d 100644 --- a/projects/hipdnn/backend/src/descriptors/PointwiseOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/PointwiseOperationDescriptor.cpp @@ -369,7 +369,7 @@ std::string PointwiseOperationDescriptor::toString() const + (_data.softplus_beta ? std::to_string(*_data.softplus_beta) : "nullopt"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/RMSNormBackwardOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/RMSNormBackwardOperationDescriptor.cpp index fcedc6e08b9..c02b63ca0c4 100644 --- a/projects/hipdnn/backend/src/descriptors/RMSNormBackwardOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/RMSNormBackwardOperationDescriptor.cpp @@ -283,7 +283,7 @@ std::string RMSNormBackwardOperationDescriptor::toString() const + (_data.dbias_tensor_uid ? std::to_string(*_data.dbias_tensor_uid) : "nullopt"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/RMSNormOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/RMSNormOperationDescriptor.cpp index a39a56b1bbb..2902945054e 100644 --- a/projects/hipdnn/backend/src/descriptors/RMSNormOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/RMSNormOperationDescriptor.cpp @@ -288,7 +288,7 @@ std::string RMSNormOperationDescriptor::toString() const str += ", forward_phase=" + std::to_string(static_cast(_data.forward_phase)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ReductionOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ReductionOperationDescriptor.cpp index aa4741405f4..8e0812dc96c 100644 --- a/projects/hipdnn/backend/src/descriptors/ReductionOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ReductionOperationDescriptor.cpp @@ -213,7 +213,7 @@ std::string ReductionOperationDescriptor::toString() const str += (_data.is_deterministic ? "true" : "false"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ResampleFwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ResampleFwdOperationDescriptor.cpp index 3da52bf645f..6af025ef310 100644 --- a/projects/hipdnn/backend/src/descriptors/ResampleFwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ResampleFwdOperationDescriptor.cpp @@ -341,7 +341,7 @@ std::string ResampleFwdOperationDescriptor::toString() const : "nullopt"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/SdpaBwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/SdpaBwdOperationDescriptor.cpp index 702338edd5f..438f02a08bc 100644 --- a/projects/hipdnn/backend/src/descriptors/SdpaBwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/SdpaBwdOperationDescriptor.cpp @@ -740,7 +740,7 @@ std::string SdpaBwdOperationDescriptor::toString() const str += ", diagonal_alignment=" + std::to_string(static_cast(_data.diagonal_alignment)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/SdpaFwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/SdpaFwdOperationDescriptor.cpp index 52cf0a5ff46..e79fc3ce83e 100644 --- a/projects/hipdnn/backend/src/descriptors/SdpaFwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/SdpaFwdOperationDescriptor.cpp @@ -873,7 +873,7 @@ std::string SdpaFwdOperationDescriptor::toString() const str += ", implementation=" + std::to_string(static_cast(_data.implementation)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/TensorDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/TensorDescriptor.cpp index 30a353369b3..fc2608b293a 100644 --- a/projects/hipdnn/backend/src/descriptors/TensorDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/TensorDescriptor.cpp @@ -515,7 +515,7 @@ std::string TensorDescriptor::toString() const break; } } - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/VariantDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/VariantDescriptor.cpp index 382aaec975f..75aaae07e46 100644 --- a/projects/hipdnn/backend/src/descriptors/VariantDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/VariantDescriptor.cpp @@ -367,7 +367,7 @@ std::string VariantDescriptor::toString() const str += ", overrideShapes=" + std::to_string(_overrideShapes.size()); str += ", overrideStrides=" + std::to_string(_overrideStrides.size()); } - str += "}"; + str += '}'; return str; } diff --git a/projects/hipdnn/tools/DescriptorGenerator/templates/descriptor.cpp.j2 b/projects/hipdnn/tools/DescriptorGenerator/templates/descriptor.cpp.j2 index 3be26a2e9ae..c1ab7732cd4 100644 --- a/projects/hipdnn/tools/DescriptorGenerator/templates/descriptor.cpp.j2 +++ b/projects/hipdnn/tools/DescriptorGenerator/templates/descriptor.cpp.j2 @@ -452,7 +452,7 @@ std::string {{ op.class_name }}::toString() const str += ", compute_data_type="; str += {{ op.fbs_namespace }}::EnumNameDataType(_computeDataType); {% endif %} - str += "}"; + str += '}'; return str; } From a65b54674436fd44967e259b54b60e1b3c27c331 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 14:46:13 -0400 Subject: [PATCH 70/97] ci: disable clang-tidy in hipdnn-python-tests workflow The python-tests job builds the full repo (hipdnn + miopen-provider) with ENABLE_CLANG_TIDY defaulting to ON on Linux, so clang-tidy ran across miopen-provider sources. The dedicated `clang-tidy (hipdnn)` job intentionally scopes to `projects/hipdnn` only, and miopen-provider clang-tidy is disabled upstream (issue #5067). Disable clang-tidy here so the python-tests job verifies the Python bindings build/test rather than re-running C++ linting that is already gated (or intentionally not gated) elsewhere. --- .github/workflows/hipdnn-python-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml index 66493ac7e96..0e2331330a6 100644 --- a/.github/workflows/hipdnn-python-tests.yml +++ b/.github/workflows/hipdnn-python-tests.yml @@ -60,6 +60,7 @@ jobs: -DROCM_LIBS_ENABLE_COMPONENTS="hipdnn;miopen-provider" \ -DHIPDNN_BUILD_PYTHON_BINDINGS=ON \ -DENABLE_CLANG_FORMAT=OFF \ + -DENABLE_CLANG_TIDY=OFF \ -DROCM_PATH=/opt/rocm \ -DCMAKE_PREFIX_PATH=/opt/rocm \ -DHIP_PLATFORM=amd From 3d88022eaf03f08e4d71d699afd5c5631f1ad231 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 15:05:36 -0400 Subject: [PATCH 71/97] ci: move hipdnn-python-tests to GPU runner Tests require a ROCm-capable GPU; azure-linux-scale-rocm has none, causing pytest to fail with 'no ROCm-capable device is detected'. Switch to linux-gfx942-1gpu-ossci-rocm (canonical 1-GPU gfx94X label per TheRock amdgpu_family_matrix.py). --- .github/workflows/hipdnn-python-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml index 0e2331330a6..f85ec9c694d 100644 --- a/.github/workflows/hipdnn-python-tests.yml +++ b/.github/workflows/hipdnn-python-tests.yml @@ -20,7 +20,7 @@ concurrency: jobs: python-tests: - runs-on: azure-linux-scale-rocm + runs-on: linux-gfx942-1gpu-ossci-rocm steps: - name: Checkout rocm-libraries uses: actions/checkout@v6 From e4e852c29c7d7f1309ef910b9b167e8ad7c034e7 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 15:20:25 -0400 Subject: [PATCH 72/97] ci: revert hipdnn-python-tests runner and clang-tidy toggle Switch runs-on back to azure-linux-scale-rocm and re-enable clang-tidy (remove -DENABLE_CLANG_TIDY=OFF) per user direction. --- .github/workflows/hipdnn-python-tests.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml index f85ec9c694d..66493ac7e96 100644 --- a/.github/workflows/hipdnn-python-tests.yml +++ b/.github/workflows/hipdnn-python-tests.yml @@ -20,7 +20,7 @@ concurrency: jobs: python-tests: - runs-on: linux-gfx942-1gpu-ossci-rocm + runs-on: azure-linux-scale-rocm steps: - name: Checkout rocm-libraries uses: actions/checkout@v6 @@ -60,7 +60,6 @@ jobs: -DROCM_LIBS_ENABLE_COMPONENTS="hipdnn;miopen-provider" \ -DHIPDNN_BUILD_PYTHON_BINDINGS=ON \ -DENABLE_CLANG_FORMAT=OFF \ - -DENABLE_CLANG_TIDY=OFF \ -DROCM_PATH=/opt/rocm \ -DCMAKE_PREFIX_PATH=/opt/rocm \ -DHIP_PLATFORM=amd From 0b7825d27a062aec42e3c067082a66ae37eb279e Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 15:22:30 -0400 Subject: [PATCH 73/97] ci: drop GPU-dependent pytest step from hipdnn-python-tests azure-linux-scale-rocm has no GPU; pytest fails with 'no ROCm-capable device is detected'. Keep build-only verification until a GPU runner strategy is sorted out. --- .github/workflows/hipdnn-python-tests.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml index 66493ac7e96..49bc65d1772 100644 --- a/.github/workflows/hipdnn-python-tests.yml +++ b/.github/workflows/hipdnn-python-tests.yml @@ -64,11 +64,3 @@ jobs: -DCMAKE_PREFIX_PATH=/opt/rocm \ -DHIP_PLATFORM=amd ninja -C build hipdnn_frontend_python libmiopen_plugin.so - - - name: Run Python binding tests - run: | - . .venv/bin/activate - PYTHONPATH=projects/hipdnn/python:build/lib \ - LD_LIBRARY_PATH=build/lib:/opt/rocm/lib \ - HIPDNN_PLUGIN_PATH=build/lib/hipdnn_plugins/engines \ - pytest projects/hipdnn/python/hipdnn_frontend/test/ -v From 02d982a389bd437a797b971d82699302b9ac185a Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 15:25:25 -0400 Subject: [PATCH 74/97] ci: run CPU-only Python binding tests, skip @pytest.mark.gpu Re-add pytest step but pass -m 'not gpu' so it runs only the non-GPU tests (tensor/graph API, etc.) on the CPU-only azure-linux-scale-rocm runner. Register the 'gpu' marker in pyproject.toml so pytest doesn't warn on the filter expression. --- .github/workflows/hipdnn-python-tests.yml | 8 ++++++++ projects/hipdnn/python/pyproject.toml | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml index 49bc65d1772..6e817a118a2 100644 --- a/.github/workflows/hipdnn-python-tests.yml +++ b/.github/workflows/hipdnn-python-tests.yml @@ -64,3 +64,11 @@ jobs: -DCMAKE_PREFIX_PATH=/opt/rocm \ -DHIP_PLATFORM=amd ninja -C build hipdnn_frontend_python libmiopen_plugin.so + + - name: Run Python binding tests (CPU-only) + run: | + . .venv/bin/activate + PYTHONPATH=projects/hipdnn/python:build/lib \ + LD_LIBRARY_PATH=build/lib:/opt/rocm/lib \ + HIPDNN_PLUGIN_PATH=build/lib/hipdnn_plugins/engines \ + pytest projects/hipdnn/python/hipdnn_frontend/test/ -v -m "not gpu" diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index dec4eef64b0..e73a28e8340 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -40,3 +40,8 @@ dev = [ [tool.scikit-build] cmake.build-type = "Release" wheel.packages = [] + +[tool.pytest.ini_options] +markers = [ + "gpu: test requires a ROCm-capable GPU", +] From 6f50127e4ed1b24c64e90d473053d8ff3df00ea0 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 15:34:32 -0400 Subject: [PATCH 75/97] test: register 'integration' pytest marker test_conv_{dgrad,fprop,wgrad}.py use @pytest.mark.integration; register it in pyproject.toml so pytest doesn't emit PytestUnknownMarkWarning. --- projects/hipdnn/python/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/hipdnn/python/pyproject.toml b/projects/hipdnn/python/pyproject.toml index e73a28e8340..281bb798ecb 100644 --- a/projects/hipdnn/python/pyproject.toml +++ b/projects/hipdnn/python/pyproject.toml @@ -44,4 +44,5 @@ wheel.packages = [] [tool.pytest.ini_options] markers = [ "gpu: test requires a ROCm-capable GPU", + "integration: end-to-end integration test (slower)", ] From 7099c88f91db181198c3c9a4e1ba20aad2b24bb6 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Tue, 26 May 2026 15:46:43 -0400 Subject: [PATCH 76/97] ci: disable clang-tidy in python-tests workflow Dedicated 'clang-tidy (hipdnn)' job covers clang-tidy checks. Running it again in python-tests with the CI runner's older clang-tidy version produces false positives that the version-pinned dedicated job does not flag. --- .github/workflows/hipdnn-python-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml index 6e817a118a2..6b8d0ca96ee 100644 --- a/.github/workflows/hipdnn-python-tests.yml +++ b/.github/workflows/hipdnn-python-tests.yml @@ -60,6 +60,7 @@ jobs: -DROCM_LIBS_ENABLE_COMPONENTS="hipdnn;miopen-provider" \ -DHIPDNN_BUILD_PYTHON_BINDINGS=ON \ -DENABLE_CLANG_FORMAT=OFF \ + -DENABLE_CLANG_TIDY=OFF \ -DROCM_PATH=/opt/rocm \ -DCMAKE_PREFIX_PATH=/opt/rocm \ -DHIP_PLATFORM=amd From a4f6723f2d42efaa64ae10519aea42ddbaa10c56 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 27 May 2026 13:59:57 -0400 Subject: [PATCH 77/97] Revert cpp/hpp changes and remove python-tests workflow Reverts all .cpp and .hpp diffs vs origin/develop and deletes the hipdnn-python-tests GitHub Actions workflow. Branch changes preserved in WIP patch file for later reapply. Co-Authored-By: Claude Opus 4.7 --- .github/workflows/hipdnn-python-tests.yml | 75 ------------------- .../engines/plans/MiopenBatchnormBwdPlan.cpp | 12 ++- .../plans/MiopenBatchnormFwdTrainingPlan.cpp | 49 ++++++------ .../blas_ex/testing_gemm_batched_ex.hpp | 4 +- .../include/blas_ex/testing_gemm_ex.hpp | 4 +- .../testing_gemm_strided_batched_ex.hpp | 4 +- .../BatchnormBackwardOperationDescriptor.cpp | 2 +- .../BatchnormInferenceOperationDescriptor.cpp | 2 +- ...nferenceVarianceExtOperationDescriptor.cpp | 2 +- .../BatchnormOperationDescriptor.cpp | 2 +- ...lockScaleDequantizeOperationDescriptor.cpp | 2 +- .../BlockScaleQuantizeOperationDescriptor.cpp | 2 +- .../ConvolutionBwdOperationDescriptor.cpp | 2 +- .../ConvolutionFwdOperationDescriptor.cpp | 2 +- .../ConvolutionWrwOperationDescriptor.cpp | 2 +- .../CustomOpOperationDescriptor.cpp | 2 +- .../src/descriptors/EngineDescriptor.cpp | 2 +- .../descriptors/EngineHeuristicDescriptor.cpp | 4 +- .../descriptors/ExecutionPlanDescriptor.cpp | 2 +- .../src/descriptors/GraphDescriptor.cpp | 2 +- .../src/descriptors/KnobDescriptor.cpp | 6 +- .../src/descriptors/KnobSettingDescriptor.cpp | 2 +- .../LayernormOperationDescriptor.cpp | 2 +- .../descriptors/MatmulOperationDescriptor.cpp | 2 +- .../PointwiseOperationDescriptor.cpp | 2 +- .../RMSNormBackwardOperationDescriptor.cpp | 2 +- .../RMSNormOperationDescriptor.cpp | 2 +- .../ReductionOperationDescriptor.cpp | 2 +- .../ResampleFwdOperationDescriptor.cpp | 2 +- .../SdpaBwdOperationDescriptor.cpp | 2 +- .../SdpaFwdOperationDescriptor.cpp | 2 +- .../src/descriptors/TensorDescriptor.cpp | 2 +- .../src/descriptors/VariantDescriptor.cpp | 2 +- .../plugin/EnginePluginResourceManager.cpp | 11 +-- .../utilities/PlatformUtils.linux.hpp | 2 +- .../src/kernels/default_configurations.hpp | 3 +- .../src/kernels/reduction_functions.hpp | 40 +++------- .../include/utility/config.hpp | 6 +- .../batchnorm/backward_per_activation.cpp | 1 + .../backward_per_activation_fused.cpp | 12 ++- .../src/solver/batchnorm/backward_spatial.cpp | 3 + .../solver/batchnorm/forward_inference.cpp | 1 + .../batchnorm/forward_per_activation.cpp | 1 + .../forward_per_activation_fused.cpp | 1 + .../src/solver/batchnorm/forward_spatial.cpp | 9 ++- .../miopen/src/solver/conv/conv_winoRxS.cpp | 6 +- .../src/solver/conv/conv_wino_fury_RxS.cpp | 8 +- .../miopen/src/solver/conv_winoRxS_fused.cpp | 6 +- projects/miopen/src/solver/mha/mha_common.hpp | 1 + 49 files changed, 117 insertions(+), 202 deletions(-) delete mode 100644 .github/workflows/hipdnn-python-tests.yml diff --git a/.github/workflows/hipdnn-python-tests.yml b/.github/workflows/hipdnn-python-tests.yml deleted file mode 100644 index 6b8d0ca96ee..00000000000 --- a/.github/workflows/hipdnn-python-tests.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: hipdnn-python-tests - -on: - push: - branches: - - develop - - release/therock-* - paths: - - 'projects/hipdnn/**' - - '.github/workflows/hipdnn-python-tests.yml' - pull_request: - paths: - - 'projects/hipdnn/**' - - '.github/workflows/hipdnn-python-tests.yml' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.event.number || github.sha }} - cancel-in-progress: true - -jobs: - python-tests: - runs-on: azure-linux-scale-rocm - steps: - - name: Checkout rocm-libraries - uses: actions/checkout@v6 - - - name: Checkout TheRock - uses: actions/checkout@v6 - with: - repository: ROCm/TheRock - path: TheRock - - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y \ - ninja-build \ - python3-venv \ - python3-dev - python3 -m venv .venv - . .venv/bin/activate - pip install boto3 cmake numpy pytest - pip install -r TheRock/requirements.txt - - - name: Install ROCm - run: | - sudo .venv/bin/python3 TheRock/build_tools/install_rocm_from_artifacts.py \ - --latest-release \ - --amdgpu-family gfx94X-dcgpu \ - --output-dir /opt/rocm \ - --base-only - - - name: Build hipDNN with Python bindings and miopen-provider - run: | - . .venv/bin/activate - export PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH - export CXX=/opt/rocm/llvm/bin/clang++ - cmake -B build -GNinja \ - -DROCM_LIBS_ENABLE_COMPONENTS="hipdnn;miopen-provider" \ - -DHIPDNN_BUILD_PYTHON_BINDINGS=ON \ - -DENABLE_CLANG_FORMAT=OFF \ - -DENABLE_CLANG_TIDY=OFF \ - -DROCM_PATH=/opt/rocm \ - -DCMAKE_PREFIX_PATH=/opt/rocm \ - -DHIP_PLATFORM=amd - ninja -C build hipdnn_frontend_python libmiopen_plugin.so - - - name: Run Python binding tests (CPU-only) - run: | - . .venv/bin/activate - PYTHONPATH=projects/hipdnn/python:build/lib \ - LD_LIBRARY_PATH=build/lib:/opt/rocm/lib \ - HIPDNN_PLUGIN_PATH=build/lib/hipdnn_plugins/engines \ - pytest projects/hipdnn/python/hipdnn_frontend/test/ -v -m "not gpu" diff --git a/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormBwdPlan.cpp b/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormBwdPlan.cpp index a8d3412299d..35f9bec1491 100644 --- a/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormBwdPlan.cpp +++ b/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormBwdPlan.cpp @@ -197,18 +197,16 @@ void BatchnormBwdPlan::execute(const HipdnnMiopenHandle& handle, } // For non-fused case, scale descriptor and bias descriptor are equivalent - const bool fused = _params.optActivation().has_value() && _params.optBias().has_value(); miopenTensorDescriptor_t biasDescriptor = _params.scale().tensorDescriptor(); + void* biasPtr = nullptr; miopenActivationDescriptor_t activationDescriptor = nullptr; - void* const biasPtr = fused ? miopen_utils::findDeviceBuffer(_params.optBias().value().uid(), - deviceBuffers, - numDeviceBuffers) - .ptr - : nullptr; - if(fused) + if(_params.optActivation().has_value() && _params.optBias().has_value()) { + auto biasBuffer = miopen_utils::findDeviceBuffer( + _params.optBias().value().uid(), deviceBuffers, numDeviceBuffers); biasDescriptor = _params.optBias().value().tensorDescriptor(); + biasPtr = biasBuffer.ptr; activationDescriptor = _params.optActivation().value().activationDescriptor(); } diff --git a/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormFwdTrainingPlan.cpp b/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormFwdTrainingPlan.cpp index 282c67bfeaf..5dbf487a96b 100644 --- a/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormFwdTrainingPlan.cpp +++ b/dnn-providers/miopen-provider/engines/plans/MiopenBatchnormFwdTrainingPlan.cpp @@ -310,31 +310,30 @@ void BatchnormFwdTrainingPlan::execute(const HipdnnMiopenHandle& handle, savedVarDesc = _trainingParams.invVariance().tensorDescriptor(); } - const bool hasRunningStats = _trainingParams.hasRunningStats(); - void* const prevRunningMeanPtr - = hasRunningStats ? miopen_utils::findDeviceBuffer(_trainingParams.prevRunningMean().uid(), - deviceBuffers, - numDeviceBuffers) - .ptr - : nullptr; - void* const prevRunningVariancePtr - = hasRunningStats - ? miopen_utils::findDeviceBuffer( - _trainingParams.prevRunningVariance().uid(), deviceBuffers, numDeviceBuffers) - .ptr - : nullptr; - void* const nextRunningMeanPtr - = hasRunningStats ? miopen_utils::findDeviceBuffer(_trainingParams.nextRunningMean().uid(), - deviceBuffers, - numDeviceBuffers) - .ptr - : nullptr; - void* const nextRunningVariancePtr - = hasRunningStats - ? miopen_utils::findDeviceBuffer( - _trainingParams.nextRunningVariance().uid(), deviceBuffers, numDeviceBuffers) - .ptr - : nullptr; + void* prevRunningMeanPtr = nullptr; + void* prevRunningVariancePtr = nullptr; + void* nextRunningMeanPtr = nullptr; + void* nextRunningVariancePtr = nullptr; + + if(_trainingParams.hasRunningStats()) + { + prevRunningMeanPtr = miopen_utils::findDeviceBuffer(_trainingParams.prevRunningMean().uid(), + deviceBuffers, + numDeviceBuffers) + .ptr; + prevRunningVariancePtr + = miopen_utils::findDeviceBuffer( + _trainingParams.prevRunningVariance().uid(), deviceBuffers, numDeviceBuffers) + .ptr; + nextRunningMeanPtr = miopen_utils::findDeviceBuffer(_trainingParams.nextRunningMean().uid(), + deviceBuffers, + numDeviceBuffers) + .ptr; + nextRunningVariancePtr + = miopen_utils::findDeviceBuffer( + _trainingParams.nextRunningVariance().uid(), deviceBuffers, numDeviceBuffers) + .ptr; + } // Check if activation fusion is enabled const auto& optActivation = _trainingParams.optActivation(); diff --git a/projects/hipblas/clients/include/blas_ex/testing_gemm_batched_ex.hpp b/projects/hipblas/clients/include/blas_ex/testing_gemm_batched_ex.hpp index 558ecf7b72e..c624fd3a767 100644 --- a/projects/hipblas/clients/include/blas_ex/testing_gemm_batched_ex.hpp +++ b/projects/hipblas/clients/include/blas_ex/testing_gemm_batched_ex.hpp @@ -272,8 +272,8 @@ void testing_gemm_batched_ex_bad_arg(const Arguments& arg) computeType, algo, flags)); - // If K == 0, alpha, A, and B can be nullptr - DAPI_CHECK(hipblasGemmBatchedExFn, (handle, transA, transB, M, N, 0, nullptr, + // If K == 0, A, and B can be nullptr + DAPI_CHECK(hipblasGemmBatchedExFn, (handle, transA, transB, M, N, 0, alpha, nullptr, aType, lda, nullptr, bType, ldb, beta, (void**)dC.ptr_on_device(), cType, ldc, batch_count, diff --git a/projects/hipblas/clients/include/blas_ex/testing_gemm_ex.hpp b/projects/hipblas/clients/include/blas_ex/testing_gemm_ex.hpp index 62f33d4db53..7d693d8a980 100644 --- a/projects/hipblas/clients/include/blas_ex/testing_gemm_ex.hpp +++ b/projects/hipblas/clients/include/blas_ex/testing_gemm_ex.hpp @@ -269,8 +269,8 @@ void testing_gemm_ex_bad_arg(const Arguments& arg) computeType, algo, flags)); - // If K == 0, alpha, A, and B can be nullptr - DAPI_CHECK(hipblasGemmExFn, (handle, transA, transB, M, N, 0, nullptr, + // If K == 0, A, and B can be nullptr + DAPI_CHECK(hipblasGemmExFn, (handle, transA, transB, M, N, 0, alpha, nullptr, aType, lda, nullptr, bType, ldb, beta, dC, cType, ldc, diff --git a/projects/hipblas/clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp b/projects/hipblas/clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp index e6a03b85c40..8aa4993433b 100644 --- a/projects/hipblas/clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp +++ b/projects/hipblas/clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp @@ -285,8 +285,8 @@ void testing_gemm_strided_batched_ex_bad_arg(const Arguments& arg) computeType, algo, flags)); - // If K == 0, alpha, A, and B can be nullptr - DAPI_CHECK(hipblasGemmStridedBatchedExFn, (handle, transA, transB, M, N, 0, nullptr, + // If K == 0, A, and B can be nullptr + DAPI_CHECK(hipblasGemmStridedBatchedExFn, (handle, transA, transB, M, N, 0, alpha, nullptr, aType, lda, stride_A, nullptr, bType, ldb, stride_B, beta, dC, cType, ldc, stride_C, batch_count, diff --git a/projects/hipdnn/backend/src/descriptors/BatchnormBackwardOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BatchnormBackwardOperationDescriptor.cpp index 0d1b2edb697..e634dff8d24 100644 --- a/projects/hipdnn/backend/src/descriptors/BatchnormBackwardOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BatchnormBackwardOperationDescriptor.cpp @@ -341,7 +341,7 @@ std::string BatchnormBackwardOperationDescriptor::toString() const str += ", peer_stats_uids=" + vecToString(_data.peer_stats_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BatchnormInferenceOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BatchnormInferenceOperationDescriptor.cpp index 2f7768a433e..b895ed659b9 100644 --- a/projects/hipdnn/backend/src/descriptors/BatchnormInferenceOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BatchnormInferenceOperationDescriptor.cpp @@ -260,7 +260,7 @@ std::string BatchnormInferenceOperationDescriptor::toString() const str += ", y_uid=" + std::to_string(_data.y_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BatchnormInferenceVarianceExtOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BatchnormInferenceVarianceExtOperationDescriptor.cpp index 34af06a703a..1941f0c0ce2 100644 --- a/projects/hipdnn/backend/src/descriptors/BatchnormInferenceVarianceExtOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BatchnormInferenceVarianceExtOperationDescriptor.cpp @@ -290,7 +290,7 @@ std::string BatchnormInferenceVarianceExtOperationDescriptor::toString() const str += ", epsilon_uid=" + std::to_string(_data.epsilon_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BatchnormOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BatchnormOperationDescriptor.cpp index ef4756908fd..225d601fb71 100644 --- a/projects/hipdnn/backend/src/descriptors/BatchnormOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BatchnormOperationDescriptor.cpp @@ -450,7 +450,7 @@ std::string BatchnormOperationDescriptor::toString() const str += ", peer_stats_uids=" + vecToString(_data.peer_stats_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BlockScaleDequantizeOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BlockScaleDequantizeOperationDescriptor.cpp index 71b59d09a0d..658f5ceb8b0 100644 --- a/projects/hipdnn/backend/src/descriptors/BlockScaleDequantizeOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BlockScaleDequantizeOperationDescriptor.cpp @@ -245,7 +245,7 @@ std::string BlockScaleDequantizeOperationDescriptor::toString() const str += ", is_negative_scale=" + std::to_string(static_cast(_data.is_negative_scale)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/BlockScaleQuantizeOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/BlockScaleQuantizeOperationDescriptor.cpp index c97f2ea13da..f3118cff1be 100644 --- a/projects/hipdnn/backend/src/descriptors/BlockScaleQuantizeOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/BlockScaleQuantizeOperationDescriptor.cpp @@ -257,7 +257,7 @@ std::string BlockScaleQuantizeOperationDescriptor::toString() const str += std::string(", transpose=") + (_data.transpose ? "true" : "false"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ConvolutionBwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ConvolutionBwdOperationDescriptor.cpp index 4aa2922557b..e48b6c16fec 100644 --- a/projects/hipdnn/backend/src/descriptors/ConvolutionBwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ConvolutionBwdOperationDescriptor.cpp @@ -175,7 +175,7 @@ std::string ConvolutionBwdOperationDescriptor::toString() const str += hipdnn_flatbuffers_sdk::data_objects::EnumNameConvMode(_data.conv_mode); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ConvolutionFwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ConvolutionFwdOperationDescriptor.cpp index 3b3e88d5ae4..c2e9cfcae55 100644 --- a/projects/hipdnn/backend/src/descriptors/ConvolutionFwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ConvolutionFwdOperationDescriptor.cpp @@ -197,7 +197,7 @@ std::string ConvolutionFwdOperationDescriptor::toString() const str += ", conv_mode=" + std::to_string(static_cast(_data.conv_mode)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ConvolutionWrwOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ConvolutionWrwOperationDescriptor.cpp index 1290218a0a8..4604a6239b5 100644 --- a/projects/hipdnn/backend/src/descriptors/ConvolutionWrwOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ConvolutionWrwOperationDescriptor.cpp @@ -175,7 +175,7 @@ std::string ConvolutionWrwOperationDescriptor::toString() const str += hipdnn_flatbuffers_sdk::data_objects::EnumNameConvMode(_data.conv_mode); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/CustomOpOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/CustomOpOperationDescriptor.cpp index f3a426b4970..23038500f6f 100644 --- a/projects/hipdnn/backend/src/descriptors/CustomOpOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/CustomOpOperationDescriptor.cpp @@ -243,7 +243,7 @@ std::string CustomOpOperationDescriptor::toString() const str += ", data_size=" + std::to_string(_data.data.size()); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/EngineDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/EngineDescriptor.cpp index 51478d8bd55..e662dedbc5a 100644 --- a/projects/hipdnn/backend/src/descriptors/EngineDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/EngineDescriptor.cpp @@ -411,7 +411,7 @@ std::string EngineDescriptor::toString() const str += _engineIdSet ? std::to_string(_engineId) : "unset"; str += _graph ? ", graph=" + fmt::format("{:p}", static_cast(_graph.get())) : ", graph=null"; - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/EngineHeuristicDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/EngineHeuristicDescriptor.cpp index bea9114925d..63bcd78bd77 100644 --- a/projects/hipdnn/backend/src/descriptors/EngineHeuristicDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/EngineHeuristicDescriptor.cpp @@ -773,9 +773,9 @@ std::string EngineHeuristicDescriptor::toString() const } str += hipdnn_data_sdk::utilities::formatEngineIdHex(_policyOrder[i]); } - str += ']'; + str += "]"; } - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ExecutionPlanDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ExecutionPlanDescriptor.cpp index b39eb32d7d4..107710a14b6 100644 --- a/projects/hipdnn/backend/src/descriptors/ExecutionPlanDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ExecutionPlanDescriptor.cpp @@ -469,7 +469,7 @@ std::string ExecutionPlanDescriptor::toString() const str += _engineConfig ? ", engineConfig=" + fmt::format("{:p}", static_cast(_engineConfig.get())) : ", engineConfig=null"; - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/GraphDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/GraphDescriptor.cpp index bcd0d9bf64b..9003cce2868 100644 --- a/projects/hipdnn/backend/src/descriptors/GraphDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/GraphDescriptor.cpp @@ -505,7 +505,7 @@ std::string GraphDescriptor::toString() const str += _handle != nullptr ? fmt::format("{:p}", static_cast(_handle)) : "null"; str += ", name=" + (_name.empty() ? std::string("(empty)") : _name); str += ", serializedGraphSize=" + std::to_string(_graphSerializedBuffer.size()); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/KnobDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/KnobDescriptor.cpp index a547c3282d2..069f9791a78 100644 --- a/projects/hipdnn/backend/src/descriptors/KnobDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/KnobDescriptor.cpp @@ -846,7 +846,7 @@ std::string KnobDescriptor::toString() const { str += "max=" + std::to_string(*_maxValueDouble); } - str += '}'; + str += "}"; } if(!_validValuesString.empty() || _stringMaxLength.has_value()) { @@ -855,10 +855,10 @@ std::string KnobDescriptor::toString() const { str += " maxLen=" + std::to_string(*_stringMaxLength); } - str += '}'; + str += "}"; } - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/KnobSettingDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/KnobSettingDescriptor.cpp index 8841ea9d68d..5f45a0f7056 100644 --- a/projects/hipdnn/backend/src/descriptors/KnobSettingDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/KnobSettingDescriptor.cpp @@ -135,7 +135,7 @@ std::string KnobSettingDescriptor::toString() const { std::string str = "KnobSettingDescriptor: {knobId=" + _knobId; str += ", valueType=" + std::to_string(static_cast(_value.type)); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/LayernormOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/LayernormOperationDescriptor.cpp index 12004144af2..9c14a05f6ad 100644 --- a/projects/hipdnn/backend/src/descriptors/LayernormOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/LayernormOperationDescriptor.cpp @@ -373,7 +373,7 @@ std::string LayernormOperationDescriptor::toString() const str += ", forward_phase=" + std::to_string(static_cast(_data.forward_phase)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/MatmulOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/MatmulOperationDescriptor.cpp index 626ac6cfab1..1a445f2936d 100644 --- a/projects/hipdnn/backend/src/descriptors/MatmulOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/MatmulOperationDescriptor.cpp @@ -193,7 +193,7 @@ std::string MatmulOperationDescriptor::toString() const str += ", c_uid=" + std::to_string(_data.c_tensor_uid); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/PointwiseOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/PointwiseOperationDescriptor.cpp index 4430317f36d..c942ba0e69c 100644 --- a/projects/hipdnn/backend/src/descriptors/PointwiseOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/PointwiseOperationDescriptor.cpp @@ -369,7 +369,7 @@ std::string PointwiseOperationDescriptor::toString() const + (_data.softplus_beta ? std::to_string(*_data.softplus_beta) : "nullopt"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/RMSNormBackwardOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/RMSNormBackwardOperationDescriptor.cpp index c02b63ca0c4..fcedc6e08b9 100644 --- a/projects/hipdnn/backend/src/descriptors/RMSNormBackwardOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/RMSNormBackwardOperationDescriptor.cpp @@ -283,7 +283,7 @@ std::string RMSNormBackwardOperationDescriptor::toString() const + (_data.dbias_tensor_uid ? std::to_string(*_data.dbias_tensor_uid) : "nullopt"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/RMSNormOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/RMSNormOperationDescriptor.cpp index 2902945054e..a39a56b1bbb 100644 --- a/projects/hipdnn/backend/src/descriptors/RMSNormOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/RMSNormOperationDescriptor.cpp @@ -288,7 +288,7 @@ std::string RMSNormOperationDescriptor::toString() const str += ", forward_phase=" + std::to_string(static_cast(_data.forward_phase)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ReductionOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ReductionOperationDescriptor.cpp index 8e0812dc96c..aa4741405f4 100644 --- a/projects/hipdnn/backend/src/descriptors/ReductionOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ReductionOperationDescriptor.cpp @@ -213,7 +213,7 @@ std::string ReductionOperationDescriptor::toString() const str += (_data.is_deterministic ? "true" : "false"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/ResampleFwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/ResampleFwdOperationDescriptor.cpp index 6af025ef310..3da52bf645f 100644 --- a/projects/hipdnn/backend/src/descriptors/ResampleFwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/ResampleFwdOperationDescriptor.cpp @@ -341,7 +341,7 @@ std::string ResampleFwdOperationDescriptor::toString() const : "nullopt"); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/SdpaBwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/SdpaBwdOperationDescriptor.cpp index 438f02a08bc..702338edd5f 100644 --- a/projects/hipdnn/backend/src/descriptors/SdpaBwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/SdpaBwdOperationDescriptor.cpp @@ -740,7 +740,7 @@ std::string SdpaBwdOperationDescriptor::toString() const str += ", diagonal_alignment=" + std::to_string(static_cast(_data.diagonal_alignment)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/SdpaFwdOperationDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/SdpaFwdOperationDescriptor.cpp index e79fc3ce83e..52cf0a5ff46 100644 --- a/projects/hipdnn/backend/src/descriptors/SdpaFwdOperationDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/SdpaFwdOperationDescriptor.cpp @@ -873,7 +873,7 @@ std::string SdpaFwdOperationDescriptor::toString() const str += ", implementation=" + std::to_string(static_cast(_data.implementation)); str += ", compute_data_type="; str += hipdnn_flatbuffers_sdk::data_objects::EnumNameDataType(_computeDataType); - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/TensorDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/TensorDescriptor.cpp index fc2608b293a..30a353369b3 100644 --- a/projects/hipdnn/backend/src/descriptors/TensorDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/TensorDescriptor.cpp @@ -515,7 +515,7 @@ std::string TensorDescriptor::toString() const break; } } - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/descriptors/VariantDescriptor.cpp b/projects/hipdnn/backend/src/descriptors/VariantDescriptor.cpp index 75aaae07e46..382aaec975f 100644 --- a/projects/hipdnn/backend/src/descriptors/VariantDescriptor.cpp +++ b/projects/hipdnn/backend/src/descriptors/VariantDescriptor.cpp @@ -367,7 +367,7 @@ std::string VariantDescriptor::toString() const str += ", overrideShapes=" + std::to_string(_overrideShapes.size()); str += ", overrideStrides=" + std::to_string(_overrideStrides.size()); } - str += '}'; + str += "}"; return str; } diff --git a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp index 652f348a4e6..eaaea25272c 100644 --- a/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp +++ b/projects/hipdnn/backend/src/plugin/EnginePluginResourceManager.cpp @@ -303,16 +303,7 @@ EnginePluginResourceManager::~EnginePluginResourceManager() // Destroy plugin handles for(const auto& [handle, plugin] : _handleToPlugin) { - // Destructors are implicitly noexcept: a log call inside safeDestroyHandle - // (fmt formatting, sink I/O, allocation) can throw. - try - { - safeDestroyHandle(plugin, handle); - } - // NOLINTNEXTLINE(bugprone-empty-catch): re-logging here would recurse into the same throw - catch(...) - { - } + safeDestroyHandle(plugin, handle); } } diff --git a/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp b/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp index d094b5a6c71..be08fb995e7 100644 --- a/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp +++ b/projects/hipdnn/data_sdk/include/hipdnn_data_sdk/utilities/PlatformUtils.linux.hpp @@ -96,7 +96,7 @@ inline void* getSymbol(SharedLibraryHandle handle, const char* symbolName) inline std::filesystem::path getLoadedLibraryDirectoryForSymbol(const char* symbolName) { auto _ = dlerror(); - const void* const symbol = dlsym(RTLD_DEFAULT, symbolName); + void* symbol = dlsym(RTLD_DEFAULT, symbolName); const char* error = dlerror(); if(error != nullptr) { diff --git a/projects/miopen/src/kernels/default_configurations.hpp b/projects/miopen/src/kernels/default_configurations.hpp index d9d1ccaa865..8b69e85e346 100644 --- a/projects/miopen/src/kernels/default_configurations.hpp +++ b/projects/miopen/src/kernels/default_configurations.hpp @@ -145,7 +145,8 @@ #endif #ifndef MIO_BN_LDSGCN_SIZE -#define MIO_BN_LDSGCN_SIZE 16 +// 4 SIMD with up to 16 wave each => at most 64 waves +#define MIO_BN_LDSGCN_SIZE 64 #endif #ifndef MIO_BN_LDS_SIZE diff --git a/projects/miopen/src/kernels/reduction_functions.hpp b/projects/miopen/src/kernels/reduction_functions.hpp index 8a70eebc633..20a9f76ffdb 100644 --- a/projects/miopen/src/kernels/reduction_functions.hpp +++ b/projects/miopen/src/kernels/reduction_functions.hpp @@ -175,33 +175,11 @@ __forceinline__ __device__ void lds_reduce2_2d(FloatAccumC& x, y = static_cast(lcl_data[xlid * 2 + 1] * scale); } -template -__forceinline__ __device__ void dpp_interleaved_reduction(FloatAccum& temp_sum1, - FloatAccum& temp_sum2) -{ - __asm__ volatile("s_nop 4\n" - "v_add_f32 %0 %0 %0 row_shr:1 bound_ctrl:0\n" - "v_add_f32 %1 %1 %1 row_shr:1 bound_ctrl:0\n" - "s_nop 0\n" - "v_add_f32 %0 %0 %0 row_shr:2 bound_ctrl:0\n" - "v_add_f32 %1 %1 %1 row_shr:2 bound_ctrl:0\n" - "s_nop 0\n" - "v_add_f32 %0 %0 %0 row_shr:4 bank_mask:0xe\n" - "v_add_f32 %1 %1 %1 row_shr:4 bank_mask:0xe\n" - "s_nop 0\n" - "v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc\n" - "v_add_f32 %1 %1 %1 row_shr:8 bank_mask:0xc\n" - "s_nop 0\n" - "v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa\n" - "v_add_f32 %1 %1 %1 row_bcast:15 row_mask:0xa\n" - "s_nop 0\n" - "v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc\n" - "v_add_f32 %1 %1 %1 row_bcast:31 row_mask:0xc\n" - "s_nop 0" - : "=v"(temp_sum1), "=v"(temp_sum2) - : "0"(temp_sum1), "1"(temp_sum2)); -} - +// Caller must ensure: SizeLclData >= (blockDim.x * blockDim.y * blockDim.z + warpSize - 1) / +// warpSize +// @warning Undefined behavior if SizeLclData is too small +// Caller must ensure: All lanes must be active +// @warning Undefined behavior if lanes are masked template __forceinline__ __device__ void gcn_reduce2(FloatAccum& x, FloatAccum& y, @@ -210,10 +188,12 @@ __forceinline__ __device__ void gcn_reduce2(FloatAccum& x, FloatAccum (&lcl_data_y)[SizeLclData], unsigned int lid) { - const unsigned int ldsidx = lid >> 6; - dpp_interleaved_reduction(x, y); + const unsigned int ldsidx = lid / warpSize; + constexpr unsigned long long mask = 0xFFFFFFFFFFFFFFFFull; + x = __reduce_add_sync(mask, x); + y = __reduce_add_sync(mask, y); // Last thread - if((lid % 64) == 63) + if((lid % warpSize) == warpSize - 1) { lcl_data_x[ldsidx] = x; lcl_data_y[ldsidx] = y; diff --git a/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp b/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp index 0f0cd6572ec..c54d7459b05 100644 --- a/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp +++ b/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp @@ -22,7 +22,7 @@ defined(CK_AMD_GPU_GFX1036) || defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || \ defined(CK_AMD_GPU_GFX1102) || defined(CK_AMD_GPU_GFX1103) || defined(CK_AMD_GPU_GFX1150) || \ defined(CK_AMD_GPU_GFX1151) || defined(CK_AMD_GPU_GFX1152) || defined(CK_AMD_GPU_GFX1153) || \ - defined(CK_AMD_GPU_GFX1200) || defined(CK_AMD_GPU_GFX1201)) + defined(CK_AMD_GPU_GFX1200) || defined(CK_AMD_GPU_GFX1201) || defined(CK_AMD_GPU_GFX1250)) #error No CK_AMD_GPU_GFX* macro defined. Exactly one target must be defined. #endif @@ -43,7 +43,7 @@ defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) || \ defined(CK_AMD_GPU_GFX1103) || defined(CK_AMD_GPU_GFX1150) || defined(CK_AMD_GPU_GFX1151) || \ defined(CK_AMD_GPU_GFX1152) || defined(CK_AMD_GPU_GFX1153) || defined(CK_AMD_GPU_GFX1200) || \ - defined(CK_AMD_GPU_GFX1201) + defined(CK_AMD_GPU_GFX1201) || defined(CK_AMD_GPU_GFX1250) #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 #endif @@ -55,7 +55,7 @@ defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) || \ defined(CK_AMD_GPU_GFX1103) || defined(CK_AMD_GPU_GFX1150) || defined(CK_AMD_GPU_GFX1151) || \ defined(CK_AMD_GPU_GFX1152) || defined(CK_AMD_GPU_GFX1153) || defined(CK_AMD_GPU_GFX1200) || \ - defined(CK_AMD_GPU_GFX1201) + defined(CK_AMD_GPU_GFX1201) || defined(CK_AMD_GPU_GFX1250) #define CK_USE_AMD_V_FMAC_F32 #define CK_USE_AMD_V_DOT2_F32_F16 #define CK_USE_AMD_V_DOT4_I32_I8 diff --git a/projects/miopen/src/solver/batchnorm/backward_per_activation.cpp b/projects/miopen/src/solver/batchnorm/backward_per_activation.cpp index b67f5ffed6b..cd4f8e381fb 100644 --- a/projects/miopen/src/solver/batchnorm/backward_per_activation.cpp +++ b/projects/miopen/src/solver/batchnorm/backward_per_activation.cpp @@ -121,6 +121,7 @@ BnBwdTrainingPerActivation::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, + {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, }; kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); diff --git a/projects/miopen/src/solver/batchnorm/backward_per_activation_fused.cpp b/projects/miopen/src/solver/batchnorm/backward_per_activation_fused.cpp index 45921966790..4207b1ba0a1 100644 --- a/projects/miopen/src/solver/batchnorm/backward_per_activation_fused.cpp +++ b/projects/miopen/src/solver/batchnorm/backward_per_activation_fused.cpp @@ -135,7 +135,9 @@ ConvSolution BnBwdTrgActivationFused::GetSolution(const FusionContext& context, kernel.g_wk = {xgridsize, ygridsize, zgridsize}; - unsigned int ldsgcn = xlocalsize / 64; + auto const waveSize = handle.GetWavefrontWidth(); + + unsigned int ldsgcn = xlocalsize / waveSize; unsigned int ldsnogcn = xlocalsize; int variant = 0; @@ -156,7 +158,7 @@ ConvSolution BnBwdTrgActivationFused::GetSolution(const FusionContext& context, const auto& activ_op = dynamic_cast(*problem.fusion_plan_desc->op_map[1]); - const auto build_params = KernelBuildParameters{ + auto build_params = KernelBuildParameters{ {"MIO_BN_N", static_cast(n)}, {"MIO_BN_NCHW", static_cast(n * c * h * w)}, {"MIO_BN_NHW", static_cast(n * h * w)}, @@ -172,12 +174,18 @@ ConvSolution BnBwdTrgActivationFused::GetSolution(const FusionContext& context, {"MIO_BN_GFX110X", static_cast(StartsWith(handle.GetDeviceName(), "gfx110"))}, {"MIO_BN_GFX115X", static_cast(StartsWith(handle.GetDeviceName(), "gfx115"))}, {"MIO_BN_GFX120X", static_cast(StartsWith(handle.GetDeviceName(), "gfx120"))}, + {"MIO_BN_GFX125X", static_cast(StartsWith(handle.GetDeviceName(), "gfx125"))}, {"MIOPEN_NRN_OP_ID", static_cast(activ_op.activMode)}, {"MIOPEN_USE_FP16", static_cast(dtype == miopenHalf)}, {"MIOPEN_USE_FP32", static_cast(dtype == miopenFloat)}, {"DATA_TYPE", data_type}}; kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); + if(mode == miopenBNSpatial) + { + build_params.Define("HIP_ENABLE_EXTRA_WARP_SYNC_TYPES"); + } + result.construction_params.push_back(kernel); } diff --git a/projects/miopen/src/solver/batchnorm/backward_spatial.cpp b/projects/miopen/src/solver/batchnorm/backward_spatial.cpp index c7368a8d673..a255f007185 100644 --- a/projects/miopen/src/solver/batchnorm/backward_spatial.cpp +++ b/projects/miopen/src/solver/batchnorm/backward_spatial.cpp @@ -340,8 +340,11 @@ ConvSolution BnBwdTrainingSpatial::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, + {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, }; + build_params.Define("HIP_ENABLE_EXTRA_WARP_SYNC_TYPES"); + kernel.comp_options = build_params.GenerateFor(kbp::HIP()); kernel.l_wk.push_back(xlocalsize); diff --git a/projects/miopen/src/solver/batchnorm/forward_inference.cpp b/projects/miopen/src/solver/batchnorm/forward_inference.cpp index 253a4f0e6d6..619c62fc856 100644 --- a/projects/miopen/src/solver/batchnorm/forward_inference.cpp +++ b/projects/miopen/src/solver/batchnorm/forward_inference.cpp @@ -162,6 +162,7 @@ ConvSolution BnFwdInference::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, + {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, {"MIO_LAYOUT_NHWC", static_cast(problem.IsLayoutNHWC())}, {"MIO_BN_VECTORIZE", static_cast(vectorsize > 1)}, {"MIO_BN_VEC_SIZE", vectorsize}, diff --git a/projects/miopen/src/solver/batchnorm/forward_per_activation.cpp b/projects/miopen/src/solver/batchnorm/forward_per_activation.cpp index a18885df691..74552763d6e 100644 --- a/projects/miopen/src/solver/batchnorm/forward_per_activation.cpp +++ b/projects/miopen/src/solver/batchnorm/forward_per_activation.cpp @@ -120,6 +120,7 @@ BnFwdTrainingPerActivation::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, + {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, }; auto kernel = KernelInfo{}; diff --git a/projects/miopen/src/solver/batchnorm/forward_per_activation_fused.cpp b/projects/miopen/src/solver/batchnorm/forward_per_activation_fused.cpp index 016f5274b06..dfb2c17f33b 100644 --- a/projects/miopen/src/solver/batchnorm/forward_per_activation_fused.cpp +++ b/projects/miopen/src/solver/batchnorm/forward_per_activation_fused.cpp @@ -175,6 +175,7 @@ ConvSolution BnFwdTrgActivationFused::GetSolution(const FusionContext& context, {"MIO_BN_GFX110X", static_cast(StartsWith(handle.GetDeviceName(), "gfx110"))}, {"MIO_BN_GFX115X", static_cast(StartsWith(handle.GetDeviceName(), "gfx115"))}, {"MIO_BN_GFX120X", static_cast(StartsWith(handle.GetDeviceName(), "gfx120"))}, + {"MIO_BN_GFX125X", static_cast(StartsWith(handle.GetDeviceName(), "gfx125"))}, {"MIOPEN_YES_ACTIV", static_cast(1)}, {"MIOPEN_NRN_OP_ID", static_cast(activ_op.activMode)}, {"MIOPEN_USE_FP16", static_cast(input_desc.GetType() == miopenHalf)}, diff --git a/projects/miopen/src/solver/batchnorm/forward_spatial.cpp b/projects/miopen/src/solver/batchnorm/forward_spatial.cpp index 3a36c32b80d..e3789fd98d0 100644 --- a/projects/miopen/src/solver/batchnorm/forward_spatial.cpp +++ b/projects/miopen/src/solver/batchnorm/forward_spatial.cpp @@ -227,6 +227,8 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, int stash_method = 0; size_t nelements = 1; + auto const waveSize = handle.GetWavefrontWidth(); + GetVariantFromKernelId( config.kernel_id, variant, vectorsize, xlocalsize, ylocalsize, zlocalsize, nelements); @@ -240,7 +242,7 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, xlocalsize = 256; } xgridsize = c * xlocalsize; - ldsgcn = xlocalsize / 64; + ldsgcn = xlocalsize / waveSize; ldsnogcn = xlocalsize; } else @@ -282,7 +284,7 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, (xlocalsize * ylocalsize * zlocalsize) / xlocalsize_final / zlocalsize_final; } ldsnogcn = xlocalsize * ylocalsize * zlocalsize; - ldsgcn = xlocalsize * ylocalsize * zlocalsize / 64; + ldsgcn = xlocalsize * ylocalsize * zlocalsize / waveSize; } auto result = ConvSolution{miopenStatusSuccess}; @@ -316,6 +318,7 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, + {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, {"MIO_LAYOUT_NHWC", static_cast(problem.IsLayoutNHWC())}, {"MIO_BN_VECTORIZE", static_cast(vectorsize > 1)}, {"MIO_BN_VEC_SIZE", vectorsize}, @@ -328,6 +331,8 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, build_params.Define("MIO_BN_CHW", in_nstride); build_params.Define("MIO_BN_NCHW", in_nchw); + build_params.Define("HIP_ENABLE_EXTRA_WARP_SYNC_TYPES"); + kernel.kernel_file = "MIOpenBatchNormFwdTrainSpatial.cpp"; std::string kernel_name = "MIOpenBatchNormFwdTrainSpatial"; kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); diff --git a/projects/miopen/src/solver/conv/conv_winoRxS.cpp b/projects/miopen/src/solver/conv/conv_winoRxS.cpp index 6bd58c4bd59..8b51f4258a4 100644 --- a/projects/miopen/src/solver/conv/conv_winoRxS.cpp +++ b/projects/miopen/src/solver/conv/conv_winoRxS.cpp @@ -694,12 +694,12 @@ static bool IsApplicableBase(const ExecutionContext& ctx, const ProblemDescripti const auto name = ctx.GetStream().GetDeviceName(); if(!(StartsWith(name, "gfx9") || StartsWith(name, "gfx10") || StartsWith(name, "gfx11") || - StartsWith(name, "gfx12"))) + StartsWith(name, "gfx120"))) return false; if(problem.IsFp16() && !(name == "gfx906" || name == "gfx908" || name == "gfx90a" || name == "gfx942" || StartsWith(name, "gfx95") || name == "gfx1011" || name == "gfx1012" || - StartsWith(name, "gfx103") || StartsWith(name, "gfx11") || StartsWith(name, "gfx12"))) + StartsWith(name, "gfx103") || StartsWith(name, "gfx11") || StartsWith(name, "gfx120"))) return false; if(name == "gfx90a" && problem.IsGfx90aFp16altRequired()) @@ -868,7 +868,7 @@ ConvSolution ConvBinWinoRxS::GetSolution( const auto is_gfx9 = StartsWith(name, "gfx9"); const auto is_gfx10 = StartsWith(name, "gfx10"); const auto is_gfx11 = StartsWith(name, "gfx11"); - const auto is_gfx12 = StartsWith(name, "gfx12"); + const auto is_gfx12 = StartsWith(name, "gfx120"); const auto is_v21 = IsWinogradV21Preferred(name, problem); size_t wg_size = is_gfx9 ? 512 : 256; diff --git a/projects/miopen/src/solver/conv/conv_wino_fury_RxS.cpp b/projects/miopen/src/solver/conv/conv_wino_fury_RxS.cpp index a3ff18c84ee..edb3fb31459 100644 --- a/projects/miopen/src/solver/conv/conv_wino_fury_RxS.cpp +++ b/projects/miopen/src/solver/conv/conv_wino_fury_RxS.cpp @@ -326,7 +326,7 @@ class ShaderModelFactory { return std::make_unique(args, cu_count, n_groups, reduced_vgpr_mem); } - else if(StartsWith(dev_name, "gfx12")) + else if(StartsWith(dev_name, "gfx120")) { return std::make_unique(args, cu_count, n_groups, reduced_vgpr_mem); } @@ -389,8 +389,8 @@ bool ConvWinoFuryRxSCommon::IsApplicable(const ExecutionCo return false; const auto dev_name = ctx.GetStream().GetDeviceName(); - // All gfx11/gfx12 ASICs are supported - if(!(StartsWith(dev_name, "gfx11") || StartsWith(dev_name, "gfx12"))) + // All gfx11/gfx120x ASICs are supported + if(!(StartsWith(dev_name, "gfx11") || StartsWith(dev_name, "gfx120"))) return false; #if WORKAROUND_ISSUE_3044 if(dev_name == "gfx1103") @@ -520,7 +520,7 @@ ConvWinoFuryRxSCommon::GetSolution(const ExecutionContext& std::string kernel_arch = "_gfx11"; const bool is_gfx11 = StartsWith(dev_name, "gfx11"); - const bool is_gfx12 = StartsWith(dev_name, "gfx12"); + const bool is_gfx12 = StartsWith(dev_name, "gfx120"); if(!is_gfx11 && !is_gfx12) MIOPEN_THROW(miopenStatusInternalError); diff --git a/projects/miopen/src/solver/conv_winoRxS_fused.cpp b/projects/miopen/src/solver/conv_winoRxS_fused.cpp index 51f0e906e29..3bf170327e1 100644 --- a/projects/miopen/src/solver/conv_winoRxS_fused.cpp +++ b/projects/miopen/src/solver/conv_winoRxS_fused.cpp @@ -155,13 +155,13 @@ bool ConvBinWinogradRxSf2x3g1Fused::IsApplicable(const FusionContext& context, const std::string name = conv_ctx.GetStream().GetDeviceName(); if(!(StartsWith(name, "gfx9") || StartsWith(name, "gfx10") || StartsWith(name, "gfx11") || - StartsWith(name, "gfx12"))) + StartsWith(name, "gfx120"))) return false; if(conv_problem.IsFp16() && !(StartsWith(name, "gfx906") || StartsWith(name, "gfx908") || StartsWith(name, "gfx90a") || StartsWith(name, "gfx942") || StartsWith(name, "gfx1011") || StartsWith(name, "gfx1012") || - StartsWith(name, "gfx103") || StartsWith(name, "gfx11") || StartsWith(name, "gfx12"))) + StartsWith(name, "gfx103") || StartsWith(name, "gfx11") || StartsWith(name, "gfx120"))) return false; // clang-format off @@ -208,7 +208,7 @@ ConvSolution ConvBinWinogradRxSf2x3g1Fused::GetSolution(const FusionContext& con const auto is_gfx9 = StartsWith(name, "gfx9"); const auto is_gfx10 = StartsWith(name, "gfx10"); const auto is_gfx11 = StartsWith(name, "gfx11"); - const auto is_gfx12 = StartsWith(name, "gfx12"); + const auto is_gfx12 = StartsWith(name, "gfx120"); const auto is_v21 = IsWinogradV21Preferred<2, 3>(name, conv_problem); size_t wg_size = is_gfx9 ? 512 : 256; kernel.g_wk.push_back(wg_size * n_groups); diff --git a/projects/miopen/src/solver/mha/mha_common.hpp b/projects/miopen/src/solver/mha/mha_common.hpp index d44a672890c..7ca1f1341b3 100644 --- a/projects/miopen/src/solver/mha/mha_common.hpp +++ b/projects/miopen/src/solver/mha/mha_common.hpp @@ -40,6 +40,7 @@ #include #if MIOPEN_ROCBLAS_VERSION_FLAT < 2045000 #include +#define USE_ROCBLAS_EX3 0 #else #include /// rocblas_gemm_ex3 supports F8 datatypes. From 941c1e21e9c66f41614922b8895999cce70a8bbb Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 27 May 2026 15:06:50 -0400 Subject: [PATCH 78/97] ci: add hipdnn_python_bindings to therock test matrix Co-Authored-By: Claude Opus 4.7 --- .github/scripts/therock_matrix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/scripts/therock_matrix.py b/.github/scripts/therock_matrix.py index 013ae6ba491..20ef896587f 100644 --- a/.github/scripts/therock_matrix.py +++ b/.github/scripts/therock_matrix.py @@ -115,6 +115,7 @@ "projects_to_test": [ "hipdnn", "hipdnn_install", + "hipdnn_python_bindings", "hipdnn-samples", "miopenprovider", "hipblasltprovider", From bdd41e702407681c4ecfa28cbb797ac7c5c70803 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 27 May 2026 15:38:30 -0400 Subject: [PATCH 79/97] Revert remaining cpp/hpp changes in PR 7600 Reverts hipblas and miopen cpp/hpp diffs vs PR merge-base. Patch saved in WIP/worktrees/rocm-libraries/python_binding_tests/cpphpp-pr-diffs-2.patch for later reapply. Co-Authored-By: Claude Opus 4.7 --- .../blas_ex/testing_gemm_batched_ex.hpp | 4 +- .../include/blas_ex/testing_gemm_ex.hpp | 4 +- .../testing_gemm_strided_batched_ex.hpp | 4 +- .../src/kernels/default_configurations.hpp | 3 +- .../src/kernels/reduction_functions.hpp | 40 ++++++++++++++----- .../include/utility/config.hpp | 6 +-- .../batchnorm/backward_per_activation.cpp | 1 - .../backward_per_activation_fused.cpp | 12 +----- .../src/solver/batchnorm/backward_spatial.cpp | 3 -- .../solver/batchnorm/forward_inference.cpp | 1 - .../batchnorm/forward_per_activation.cpp | 1 - .../forward_per_activation_fused.cpp | 1 - .../src/solver/batchnorm/forward_spatial.cpp | 9 +---- .../miopen/src/solver/conv/conv_winoRxS.cpp | 6 +-- .../src/solver/conv/conv_wino_fury_RxS.cpp | 8 ++-- .../miopen/src/solver/conv_winoRxS_fused.cpp | 6 +-- projects/miopen/src/solver/mha/mha_common.hpp | 1 - 17 files changed, 54 insertions(+), 56 deletions(-) diff --git a/projects/hipblas/clients/include/blas_ex/testing_gemm_batched_ex.hpp b/projects/hipblas/clients/include/blas_ex/testing_gemm_batched_ex.hpp index c624fd3a767..558ecf7b72e 100644 --- a/projects/hipblas/clients/include/blas_ex/testing_gemm_batched_ex.hpp +++ b/projects/hipblas/clients/include/blas_ex/testing_gemm_batched_ex.hpp @@ -272,8 +272,8 @@ void testing_gemm_batched_ex_bad_arg(const Arguments& arg) computeType, algo, flags)); - // If K == 0, A, and B can be nullptr - DAPI_CHECK(hipblasGemmBatchedExFn, (handle, transA, transB, M, N, 0, alpha, + // If K == 0, alpha, A, and B can be nullptr + DAPI_CHECK(hipblasGemmBatchedExFn, (handle, transA, transB, M, N, 0, nullptr, nullptr, aType, lda, nullptr, bType, ldb, beta, (void**)dC.ptr_on_device(), cType, ldc, batch_count, diff --git a/projects/hipblas/clients/include/blas_ex/testing_gemm_ex.hpp b/projects/hipblas/clients/include/blas_ex/testing_gemm_ex.hpp index 7d693d8a980..62f33d4db53 100644 --- a/projects/hipblas/clients/include/blas_ex/testing_gemm_ex.hpp +++ b/projects/hipblas/clients/include/blas_ex/testing_gemm_ex.hpp @@ -269,8 +269,8 @@ void testing_gemm_ex_bad_arg(const Arguments& arg) computeType, algo, flags)); - // If K == 0, A, and B can be nullptr - DAPI_CHECK(hipblasGemmExFn, (handle, transA, transB, M, N, 0, alpha, + // If K == 0, alpha, A, and B can be nullptr + DAPI_CHECK(hipblasGemmExFn, (handle, transA, transB, M, N, 0, nullptr, nullptr, aType, lda, nullptr, bType, ldb, beta, dC, cType, ldc, diff --git a/projects/hipblas/clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp b/projects/hipblas/clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp index 8aa4993433b..e6a03b85c40 100644 --- a/projects/hipblas/clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp +++ b/projects/hipblas/clients/include/blas_ex/testing_gemm_strided_batched_ex.hpp @@ -285,8 +285,8 @@ void testing_gemm_strided_batched_ex_bad_arg(const Arguments& arg) computeType, algo, flags)); - // If K == 0, A, and B can be nullptr - DAPI_CHECK(hipblasGemmStridedBatchedExFn, (handle, transA, transB, M, N, 0, alpha, + // If K == 0, alpha, A, and B can be nullptr + DAPI_CHECK(hipblasGemmStridedBatchedExFn, (handle, transA, transB, M, N, 0, nullptr, nullptr, aType, lda, stride_A, nullptr, bType, ldb, stride_B, beta, dC, cType, ldc, stride_C, batch_count, diff --git a/projects/miopen/src/kernels/default_configurations.hpp b/projects/miopen/src/kernels/default_configurations.hpp index 8b69e85e346..d9d1ccaa865 100644 --- a/projects/miopen/src/kernels/default_configurations.hpp +++ b/projects/miopen/src/kernels/default_configurations.hpp @@ -145,8 +145,7 @@ #endif #ifndef MIO_BN_LDSGCN_SIZE -// 4 SIMD with up to 16 wave each => at most 64 waves -#define MIO_BN_LDSGCN_SIZE 64 +#define MIO_BN_LDSGCN_SIZE 16 #endif #ifndef MIO_BN_LDS_SIZE diff --git a/projects/miopen/src/kernels/reduction_functions.hpp b/projects/miopen/src/kernels/reduction_functions.hpp index 20a9f76ffdb..8a70eebc633 100644 --- a/projects/miopen/src/kernels/reduction_functions.hpp +++ b/projects/miopen/src/kernels/reduction_functions.hpp @@ -175,11 +175,33 @@ __forceinline__ __device__ void lds_reduce2_2d(FloatAccumC& x, y = static_cast(lcl_data[xlid * 2 + 1] * scale); } -// Caller must ensure: SizeLclData >= (blockDim.x * blockDim.y * blockDim.z + warpSize - 1) / -// warpSize -// @warning Undefined behavior if SizeLclData is too small -// Caller must ensure: All lanes must be active -// @warning Undefined behavior if lanes are masked +template +__forceinline__ __device__ void dpp_interleaved_reduction(FloatAccum& temp_sum1, + FloatAccum& temp_sum2) +{ + __asm__ volatile("s_nop 4\n" + "v_add_f32 %0 %0 %0 row_shr:1 bound_ctrl:0\n" + "v_add_f32 %1 %1 %1 row_shr:1 bound_ctrl:0\n" + "s_nop 0\n" + "v_add_f32 %0 %0 %0 row_shr:2 bound_ctrl:0\n" + "v_add_f32 %1 %1 %1 row_shr:2 bound_ctrl:0\n" + "s_nop 0\n" + "v_add_f32 %0 %0 %0 row_shr:4 bank_mask:0xe\n" + "v_add_f32 %1 %1 %1 row_shr:4 bank_mask:0xe\n" + "s_nop 0\n" + "v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc\n" + "v_add_f32 %1 %1 %1 row_shr:8 bank_mask:0xc\n" + "s_nop 0\n" + "v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa\n" + "v_add_f32 %1 %1 %1 row_bcast:15 row_mask:0xa\n" + "s_nop 0\n" + "v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc\n" + "v_add_f32 %1 %1 %1 row_bcast:31 row_mask:0xc\n" + "s_nop 0" + : "=v"(temp_sum1), "=v"(temp_sum2) + : "0"(temp_sum1), "1"(temp_sum2)); +} + template __forceinline__ __device__ void gcn_reduce2(FloatAccum& x, FloatAccum& y, @@ -188,12 +210,10 @@ __forceinline__ __device__ void gcn_reduce2(FloatAccum& x, FloatAccum (&lcl_data_y)[SizeLclData], unsigned int lid) { - const unsigned int ldsidx = lid / warpSize; - constexpr unsigned long long mask = 0xFFFFFFFFFFFFFFFFull; - x = __reduce_add_sync(mask, x); - y = __reduce_add_sync(mask, y); + const unsigned int ldsidx = lid >> 6; + dpp_interleaved_reduction(x, y); // Last thread - if((lid % warpSize) == warpSize - 1) + if((lid % 64) == 63) { lcl_data_x[ldsidx] = x; lcl_data_y[ldsidx] = y; diff --git a/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp b/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp index c54d7459b05..0f0cd6572ec 100644 --- a/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp +++ b/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp @@ -22,7 +22,7 @@ defined(CK_AMD_GPU_GFX1036) || defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || \ defined(CK_AMD_GPU_GFX1102) || defined(CK_AMD_GPU_GFX1103) || defined(CK_AMD_GPU_GFX1150) || \ defined(CK_AMD_GPU_GFX1151) || defined(CK_AMD_GPU_GFX1152) || defined(CK_AMD_GPU_GFX1153) || \ - defined(CK_AMD_GPU_GFX1200) || defined(CK_AMD_GPU_GFX1201) || defined(CK_AMD_GPU_GFX1250)) + defined(CK_AMD_GPU_GFX1200) || defined(CK_AMD_GPU_GFX1201)) #error No CK_AMD_GPU_GFX* macro defined. Exactly one target must be defined. #endif @@ -43,7 +43,7 @@ defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) || \ defined(CK_AMD_GPU_GFX1103) || defined(CK_AMD_GPU_GFX1150) || defined(CK_AMD_GPU_GFX1151) || \ defined(CK_AMD_GPU_GFX1152) || defined(CK_AMD_GPU_GFX1153) || defined(CK_AMD_GPU_GFX1200) || \ - defined(CK_AMD_GPU_GFX1201) || defined(CK_AMD_GPU_GFX1250) + defined(CK_AMD_GPU_GFX1201) #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 #endif @@ -55,7 +55,7 @@ defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) || \ defined(CK_AMD_GPU_GFX1103) || defined(CK_AMD_GPU_GFX1150) || defined(CK_AMD_GPU_GFX1151) || \ defined(CK_AMD_GPU_GFX1152) || defined(CK_AMD_GPU_GFX1153) || defined(CK_AMD_GPU_GFX1200) || \ - defined(CK_AMD_GPU_GFX1201) || defined(CK_AMD_GPU_GFX1250) + defined(CK_AMD_GPU_GFX1201) #define CK_USE_AMD_V_FMAC_F32 #define CK_USE_AMD_V_DOT2_F32_F16 #define CK_USE_AMD_V_DOT4_I32_I8 diff --git a/projects/miopen/src/solver/batchnorm/backward_per_activation.cpp b/projects/miopen/src/solver/batchnorm/backward_per_activation.cpp index cd4f8e381fb..b67f5ffed6b 100644 --- a/projects/miopen/src/solver/batchnorm/backward_per_activation.cpp +++ b/projects/miopen/src/solver/batchnorm/backward_per_activation.cpp @@ -121,7 +121,6 @@ BnBwdTrainingPerActivation::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, - {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, }; kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); diff --git a/projects/miopen/src/solver/batchnorm/backward_per_activation_fused.cpp b/projects/miopen/src/solver/batchnorm/backward_per_activation_fused.cpp index 4207b1ba0a1..45921966790 100644 --- a/projects/miopen/src/solver/batchnorm/backward_per_activation_fused.cpp +++ b/projects/miopen/src/solver/batchnorm/backward_per_activation_fused.cpp @@ -135,9 +135,7 @@ ConvSolution BnBwdTrgActivationFused::GetSolution(const FusionContext& context, kernel.g_wk = {xgridsize, ygridsize, zgridsize}; - auto const waveSize = handle.GetWavefrontWidth(); - - unsigned int ldsgcn = xlocalsize / waveSize; + unsigned int ldsgcn = xlocalsize / 64; unsigned int ldsnogcn = xlocalsize; int variant = 0; @@ -158,7 +156,7 @@ ConvSolution BnBwdTrgActivationFused::GetSolution(const FusionContext& context, const auto& activ_op = dynamic_cast(*problem.fusion_plan_desc->op_map[1]); - auto build_params = KernelBuildParameters{ + const auto build_params = KernelBuildParameters{ {"MIO_BN_N", static_cast(n)}, {"MIO_BN_NCHW", static_cast(n * c * h * w)}, {"MIO_BN_NHW", static_cast(n * h * w)}, @@ -174,18 +172,12 @@ ConvSolution BnBwdTrgActivationFused::GetSolution(const FusionContext& context, {"MIO_BN_GFX110X", static_cast(StartsWith(handle.GetDeviceName(), "gfx110"))}, {"MIO_BN_GFX115X", static_cast(StartsWith(handle.GetDeviceName(), "gfx115"))}, {"MIO_BN_GFX120X", static_cast(StartsWith(handle.GetDeviceName(), "gfx120"))}, - {"MIO_BN_GFX125X", static_cast(StartsWith(handle.GetDeviceName(), "gfx125"))}, {"MIOPEN_NRN_OP_ID", static_cast(activ_op.activMode)}, {"MIOPEN_USE_FP16", static_cast(dtype == miopenHalf)}, {"MIOPEN_USE_FP32", static_cast(dtype == miopenFloat)}, {"DATA_TYPE", data_type}}; kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); - if(mode == miopenBNSpatial) - { - build_params.Define("HIP_ENABLE_EXTRA_WARP_SYNC_TYPES"); - } - result.construction_params.push_back(kernel); } diff --git a/projects/miopen/src/solver/batchnorm/backward_spatial.cpp b/projects/miopen/src/solver/batchnorm/backward_spatial.cpp index a255f007185..c7368a8d673 100644 --- a/projects/miopen/src/solver/batchnorm/backward_spatial.cpp +++ b/projects/miopen/src/solver/batchnorm/backward_spatial.cpp @@ -340,11 +340,8 @@ ConvSolution BnBwdTrainingSpatial::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, - {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, }; - build_params.Define("HIP_ENABLE_EXTRA_WARP_SYNC_TYPES"); - kernel.comp_options = build_params.GenerateFor(kbp::HIP()); kernel.l_wk.push_back(xlocalsize); diff --git a/projects/miopen/src/solver/batchnorm/forward_inference.cpp b/projects/miopen/src/solver/batchnorm/forward_inference.cpp index 619c62fc856..253a4f0e6d6 100644 --- a/projects/miopen/src/solver/batchnorm/forward_inference.cpp +++ b/projects/miopen/src/solver/batchnorm/forward_inference.cpp @@ -162,7 +162,6 @@ ConvSolution BnFwdInference::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, - {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, {"MIO_LAYOUT_NHWC", static_cast(problem.IsLayoutNHWC())}, {"MIO_BN_VECTORIZE", static_cast(vectorsize > 1)}, {"MIO_BN_VEC_SIZE", vectorsize}, diff --git a/projects/miopen/src/solver/batchnorm/forward_per_activation.cpp b/projects/miopen/src/solver/batchnorm/forward_per_activation.cpp index 74552763d6e..a18885df691 100644 --- a/projects/miopen/src/solver/batchnorm/forward_per_activation.cpp +++ b/projects/miopen/src/solver/batchnorm/forward_per_activation.cpp @@ -120,7 +120,6 @@ BnFwdTrainingPerActivation::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, - {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, }; auto kernel = KernelInfo{}; diff --git a/projects/miopen/src/solver/batchnorm/forward_per_activation_fused.cpp b/projects/miopen/src/solver/batchnorm/forward_per_activation_fused.cpp index dfb2c17f33b..016f5274b06 100644 --- a/projects/miopen/src/solver/batchnorm/forward_per_activation_fused.cpp +++ b/projects/miopen/src/solver/batchnorm/forward_per_activation_fused.cpp @@ -175,7 +175,6 @@ ConvSolution BnFwdTrgActivationFused::GetSolution(const FusionContext& context, {"MIO_BN_GFX110X", static_cast(StartsWith(handle.GetDeviceName(), "gfx110"))}, {"MIO_BN_GFX115X", static_cast(StartsWith(handle.GetDeviceName(), "gfx115"))}, {"MIO_BN_GFX120X", static_cast(StartsWith(handle.GetDeviceName(), "gfx120"))}, - {"MIO_BN_GFX125X", static_cast(StartsWith(handle.GetDeviceName(), "gfx125"))}, {"MIOPEN_YES_ACTIV", static_cast(1)}, {"MIOPEN_NRN_OP_ID", static_cast(activ_op.activMode)}, {"MIOPEN_USE_FP16", static_cast(input_desc.GetType() == miopenHalf)}, diff --git a/projects/miopen/src/solver/batchnorm/forward_spatial.cpp b/projects/miopen/src/solver/batchnorm/forward_spatial.cpp index e3789fd98d0..3a36c32b80d 100644 --- a/projects/miopen/src/solver/batchnorm/forward_spatial.cpp +++ b/projects/miopen/src/solver/batchnorm/forward_spatial.cpp @@ -227,8 +227,6 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, int stash_method = 0; size_t nelements = 1; - auto const waveSize = handle.GetWavefrontWidth(); - GetVariantFromKernelId( config.kernel_id, variant, vectorsize, xlocalsize, ylocalsize, zlocalsize, nelements); @@ -242,7 +240,7 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, xlocalsize = 256; } xgridsize = c * xlocalsize; - ldsgcn = xlocalsize / waveSize; + ldsgcn = xlocalsize / 64; ldsnogcn = xlocalsize; } else @@ -284,7 +282,7 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, (xlocalsize * ylocalsize * zlocalsize) / xlocalsize_final / zlocalsize_final; } ldsnogcn = xlocalsize * ylocalsize * zlocalsize; - ldsgcn = xlocalsize * ylocalsize * zlocalsize / waveSize; + ldsgcn = xlocalsize * ylocalsize * zlocalsize / 64; } auto result = ConvSolution{miopenStatusSuccess}; @@ -318,7 +316,6 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, {"MIO_BN_GFX110X", (StartsWith(handle.GetDeviceName(), "gfx110") ? "1" : "0")}, {"MIO_BN_GFX115X", (StartsWith(handle.GetDeviceName(), "gfx115") ? "1" : "0")}, {"MIO_BN_GFX120X", (StartsWith(handle.GetDeviceName(), "gfx120") ? "1" : "0")}, - {"MIO_BN_GFX125X", (StartsWith(handle.GetDeviceName(), "gfx125") ? "1" : "0")}, {"MIO_LAYOUT_NHWC", static_cast(problem.IsLayoutNHWC())}, {"MIO_BN_VECTORIZE", static_cast(vectorsize > 1)}, {"MIO_BN_VEC_SIZE", vectorsize}, @@ -331,8 +328,6 @@ ConvSolution BnFwdTrainingSpatial::GetSolution(const ExecutionContext& context, build_params.Define("MIO_BN_CHW", in_nstride); build_params.Define("MIO_BN_NCHW", in_nchw); - build_params.Define("HIP_ENABLE_EXTRA_WARP_SYNC_TYPES"); - kernel.kernel_file = "MIOpenBatchNormFwdTrainSpatial.cpp"; std::string kernel_name = "MIOpenBatchNormFwdTrainSpatial"; kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); diff --git a/projects/miopen/src/solver/conv/conv_winoRxS.cpp b/projects/miopen/src/solver/conv/conv_winoRxS.cpp index 8b51f4258a4..6bd58c4bd59 100644 --- a/projects/miopen/src/solver/conv/conv_winoRxS.cpp +++ b/projects/miopen/src/solver/conv/conv_winoRxS.cpp @@ -694,12 +694,12 @@ static bool IsApplicableBase(const ExecutionContext& ctx, const ProblemDescripti const auto name = ctx.GetStream().GetDeviceName(); if(!(StartsWith(name, "gfx9") || StartsWith(name, "gfx10") || StartsWith(name, "gfx11") || - StartsWith(name, "gfx120"))) + StartsWith(name, "gfx12"))) return false; if(problem.IsFp16() && !(name == "gfx906" || name == "gfx908" || name == "gfx90a" || name == "gfx942" || StartsWith(name, "gfx95") || name == "gfx1011" || name == "gfx1012" || - StartsWith(name, "gfx103") || StartsWith(name, "gfx11") || StartsWith(name, "gfx120"))) + StartsWith(name, "gfx103") || StartsWith(name, "gfx11") || StartsWith(name, "gfx12"))) return false; if(name == "gfx90a" && problem.IsGfx90aFp16altRequired()) @@ -868,7 +868,7 @@ ConvSolution ConvBinWinoRxS::GetSolution( const auto is_gfx9 = StartsWith(name, "gfx9"); const auto is_gfx10 = StartsWith(name, "gfx10"); const auto is_gfx11 = StartsWith(name, "gfx11"); - const auto is_gfx12 = StartsWith(name, "gfx120"); + const auto is_gfx12 = StartsWith(name, "gfx12"); const auto is_v21 = IsWinogradV21Preferred(name, problem); size_t wg_size = is_gfx9 ? 512 : 256; diff --git a/projects/miopen/src/solver/conv/conv_wino_fury_RxS.cpp b/projects/miopen/src/solver/conv/conv_wino_fury_RxS.cpp index edb3fb31459..a3ff18c84ee 100644 --- a/projects/miopen/src/solver/conv/conv_wino_fury_RxS.cpp +++ b/projects/miopen/src/solver/conv/conv_wino_fury_RxS.cpp @@ -326,7 +326,7 @@ class ShaderModelFactory { return std::make_unique(args, cu_count, n_groups, reduced_vgpr_mem); } - else if(StartsWith(dev_name, "gfx120")) + else if(StartsWith(dev_name, "gfx12")) { return std::make_unique(args, cu_count, n_groups, reduced_vgpr_mem); } @@ -389,8 +389,8 @@ bool ConvWinoFuryRxSCommon::IsApplicable(const ExecutionCo return false; const auto dev_name = ctx.GetStream().GetDeviceName(); - // All gfx11/gfx120x ASICs are supported - if(!(StartsWith(dev_name, "gfx11") || StartsWith(dev_name, "gfx120"))) + // All gfx11/gfx12 ASICs are supported + if(!(StartsWith(dev_name, "gfx11") || StartsWith(dev_name, "gfx12"))) return false; #if WORKAROUND_ISSUE_3044 if(dev_name == "gfx1103") @@ -520,7 +520,7 @@ ConvWinoFuryRxSCommon::GetSolution(const ExecutionContext& std::string kernel_arch = "_gfx11"; const bool is_gfx11 = StartsWith(dev_name, "gfx11"); - const bool is_gfx12 = StartsWith(dev_name, "gfx120"); + const bool is_gfx12 = StartsWith(dev_name, "gfx12"); if(!is_gfx11 && !is_gfx12) MIOPEN_THROW(miopenStatusInternalError); diff --git a/projects/miopen/src/solver/conv_winoRxS_fused.cpp b/projects/miopen/src/solver/conv_winoRxS_fused.cpp index 3bf170327e1..51f0e906e29 100644 --- a/projects/miopen/src/solver/conv_winoRxS_fused.cpp +++ b/projects/miopen/src/solver/conv_winoRxS_fused.cpp @@ -155,13 +155,13 @@ bool ConvBinWinogradRxSf2x3g1Fused::IsApplicable(const FusionContext& context, const std::string name = conv_ctx.GetStream().GetDeviceName(); if(!(StartsWith(name, "gfx9") || StartsWith(name, "gfx10") || StartsWith(name, "gfx11") || - StartsWith(name, "gfx120"))) + StartsWith(name, "gfx12"))) return false; if(conv_problem.IsFp16() && !(StartsWith(name, "gfx906") || StartsWith(name, "gfx908") || StartsWith(name, "gfx90a") || StartsWith(name, "gfx942") || StartsWith(name, "gfx1011") || StartsWith(name, "gfx1012") || - StartsWith(name, "gfx103") || StartsWith(name, "gfx11") || StartsWith(name, "gfx120"))) + StartsWith(name, "gfx103") || StartsWith(name, "gfx11") || StartsWith(name, "gfx12"))) return false; // clang-format off @@ -208,7 +208,7 @@ ConvSolution ConvBinWinogradRxSf2x3g1Fused::GetSolution(const FusionContext& con const auto is_gfx9 = StartsWith(name, "gfx9"); const auto is_gfx10 = StartsWith(name, "gfx10"); const auto is_gfx11 = StartsWith(name, "gfx11"); - const auto is_gfx12 = StartsWith(name, "gfx120"); + const auto is_gfx12 = StartsWith(name, "gfx12"); const auto is_v21 = IsWinogradV21Preferred<2, 3>(name, conv_problem); size_t wg_size = is_gfx9 ? 512 : 256; kernel.g_wk.push_back(wg_size * n_groups); diff --git a/projects/miopen/src/solver/mha/mha_common.hpp b/projects/miopen/src/solver/mha/mha_common.hpp index 7ca1f1341b3..d44a672890c 100644 --- a/projects/miopen/src/solver/mha/mha_common.hpp +++ b/projects/miopen/src/solver/mha/mha_common.hpp @@ -40,7 +40,6 @@ #include #if MIOPEN_ROCBLAS_VERSION_FLAT < 2045000 #include -#define USE_ROCBLAS_EX3 0 #else #include /// rocblas_gemm_ex3 supports F8 datatypes. From 1140ba1a43296b22d2c1983d85ceca2403c371a3 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 27 May 2026 15:40:03 -0400 Subject: [PATCH 80/97] Revert descriptor.cpp.j2 template change Reverts the codegen template; underlying generated .cpp files were already reverted to PR merge-base in prior commit. Patch saved to WIP/worktrees/rocm-libraries/python_binding_tests/descriptor-j2.patch. Co-Authored-By: Claude Opus 4.7 --- .../tools/DescriptorGenerator/templates/descriptor.cpp.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/hipdnn/tools/DescriptorGenerator/templates/descriptor.cpp.j2 b/projects/hipdnn/tools/DescriptorGenerator/templates/descriptor.cpp.j2 index c1ab7732cd4..3be26a2e9ae 100644 --- a/projects/hipdnn/tools/DescriptorGenerator/templates/descriptor.cpp.j2 +++ b/projects/hipdnn/tools/DescriptorGenerator/templates/descriptor.cpp.j2 @@ -452,7 +452,7 @@ std::string {{ op.class_name }}::toString() const str += ", compute_data_type="; str += {{ op.fbs_namespace }}::EnumNameDataType(_computeDataType); {% endif %} - str += '}'; + str += "}"; return str; } From 20de809456c236ec539b68c35b587b2304db4f92 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 27 May 2026 15:46:50 -0400 Subject: [PATCH 81/97] tmp: bump TheRock pin to 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed Temporary CI hash bump for testing. Revert before merge. Co-Authored-By: Claude Opus 4.7 --- .github/actions/ci-env/action.yml | 2 +- .github/workflows/therock-test-component.yml | 2 +- .github/workflows/therock-test-packages.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/actions/ci-env/action.yml b/.github/actions/ci-env/action.yml index ad2e582cf94..457247fc899 100644 --- a/.github/actions/ci-env/action.yml +++ b/.github/actions/ci-env/action.yml @@ -4,7 +4,7 @@ description: "Single source of truth for shared CI constants (TheRock ref, Docke outputs: therock-ref: description: "TheRock commit hash to use" - value: "974db7092f493a7ffbcffaa393ca66d67d12bd13" # 2026-05-18 + value: "393a062fca21cd0054a5184ccf02fa9d9fe4d9ed" # tmp bump docker-image: description: "Docker container image for Linux builds" value: "ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2" diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index b1e36d79fa0..e085db7513e 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -70,7 +70,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: e44b42bbcc7452328ccf12bcac319bda5ede4a6a # 2026-05-20 commit + ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump - name: Configure git for long paths on Windows if: ${{ runner.os == 'Windows' }} diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index 937a4e098af..f6737ce6810 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -43,7 +43,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: e44b42bbcc7452328ccf12bcac319bda5ede4a6a # 2026-05-20 commit + ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump - name: Setting up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 From ffc2a4c0d3f47d0b8804182059e192dd611d2438 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 27 May 2026 15:51:20 -0400 Subject: [PATCH 82/97] tmp: bump remaining inline TheRock pins to 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed Co-Authored-By: Claude Opus 4.7 --- .github/workflows/therock-ci-linux.yml | 2 +- .github/workflows/therock-ci-nightly.yml | 2 +- .github/workflows/therock-ci-windows.yml | 2 +- .github/workflows/therock-ci.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index d498e94d861..ec63b399dc5 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -51,7 +51,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: e44b42bbcc7452328ccf12bcac319bda5ede4a6a # 2026-05-20 commit + ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump - name: Install python deps run: | diff --git a/.github/workflows/therock-ci-nightly.yml b/.github/workflows/therock-ci-nightly.yml index 4d663f65979..9770aa2da27 100644 --- a/.github/workflows/therock-ci-nightly.yml +++ b/.github/workflows/therock-ci-nightly.yml @@ -43,7 +43,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: e44b42bbcc7452328ccf12bcac319bda5ede4a6a # 2026-05-20 commit + ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index 38bcbba392b..5e88e0e376e 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -52,7 +52,7 @@ jobs: with: repository: "ROCm/TheRock" path: "TheRock" - ref: e44b42bbcc7452328ccf12bcac319bda5ede4a6a # 2026-05-20 commit + ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index 3eca8b89cda..8728b833466 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -69,7 +69,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: e44b42bbcc7452328ccf12bcac319bda5ede4a6a # 2026-05-20 commit + ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 From f6911fc0b62217a190bea2bee39c0b230bf387e2 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 27 May 2026 16:02:38 -0400 Subject: [PATCH 83/97] ci: drop --include-iree-libs flag (removed in TheRock 393a062) --- .github/workflows/therock-ci-linux.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index ec63b399dc5..6a837a9d61c 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -92,8 +92,7 @@ jobs: - name: Fetch sources timeout-minutes: 30 run: | - # contains() matches the literal "=THEROCK_ENABLE_IREE_LIBS=ON" emitted by therock_matrix.py - ./TheRock/build_tools/fetch_sources.py --jobs 12 --no-include-rocm-libraries --no-include-ml-frameworks ${{ contains(inputs.cmake_options, 'THEROCK_ENABLE_IREE_LIBS=ON') && '--include-iree-libs' || '' }} + ./TheRock/build_tools/fetch_sources.py --jobs 12 --no-include-rocm-libraries --no-include-ml-frameworks - name: Configure Projects env: From da6cf750d2445eea995ff5f713ff6053928f99d5 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 27 May 2026 18:34:13 -0400 Subject: [PATCH 84/97] ci: move rocm-libraries checkout to external-sources/ for hipdnn python bindings test The hipdnn_python_bindings test script in TheRock expects rocm-libraries at external-sources/rocm-libraries/ (treated as a submodule of TheRock). Update the test-component workflow checkout path and the notify_teams script reference to match the expected layout. --- .github/workflows/therock-test-component.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index e085db7513e..0c89aafd877 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -79,7 +79,7 @@ jobs: - name: Checkout rocm-libraries repository for scripts uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: - path: rocm-libraries + path: external-sources/rocm-libraries - name: Run setup test environment workflow timeout-minutes: 15 @@ -113,7 +113,7 @@ jobs: - name: Notify Teams Channel on Failure if: ${{ failure() && !github.event.pull_request.head.repo.fork }} run: | - python3 rocm-libraries/.github/scripts/notify_teams.py \ + python3 external-sources/rocm-libraries/.github/scripts/notify_teams.py \ --failure-stage test \ --log-path ./test_logs/test_output.log \ --webhook-urls '{"miopen":"${{ secrets.MIOPEN_CI_WEBHOOK_URL }}","hipdnn":"${{ secrets.HIPDNN_CI_WEBHOOK_URL }}"}' \ From d71e9b47b128ab9cb01728752957d67cd5c45900 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Wed, 27 May 2026 18:39:31 -0400 Subject: [PATCH 85/97] ci: bump TheRock pin to 64985547315d for hipdnn python bindings path fix Reverts the external-sources/ workflow checkout path added in da6cf750d24 and bumps the TheRock pin to pick up the script-side fix (TheRock@649855473) that resolves the hipDNN pytest dir via HIPDNN_PYTHON_TESTS_DIR / ROCM_LIBRARIES_DIR / sibling rocm-libraries checkout instead of the hard-coded external-sources/ submodule path. --- .github/actions/ci-env/action.yml | 2 +- .github/workflows/therock-ci-linux.yml | 2 +- .github/workflows/therock-ci-nightly.yml | 2 +- .github/workflows/therock-ci-windows.yml | 2 +- .github/workflows/therock-ci.yml | 2 +- .github/workflows/therock-test-component.yml | 6 +++--- .github/workflows/therock-test-packages.yml | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/actions/ci-env/action.yml b/.github/actions/ci-env/action.yml index 457247fc899..0e13e83d202 100644 --- a/.github/actions/ci-env/action.yml +++ b/.github/actions/ci-env/action.yml @@ -4,7 +4,7 @@ description: "Single source of truth for shared CI constants (TheRock ref, Docke outputs: therock-ref: description: "TheRock commit hash to use" - value: "393a062fca21cd0054a5184ccf02fa9d9fe4d9ed" # tmp bump + value: "64985547315d38cc0163afe8a60b014facf51b9f" # tmp bump docker-image: description: "Docker container image for Linux builds" value: "ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2" diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index 6a837a9d61c..1ed4a8e00d7 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -51,7 +51,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump + ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump - name: Install python deps run: | diff --git a/.github/workflows/therock-ci-nightly.yml b/.github/workflows/therock-ci-nightly.yml index 9770aa2da27..be9b0e5429b 100644 --- a/.github/workflows/therock-ci-nightly.yml +++ b/.github/workflows/therock-ci-nightly.yml @@ -43,7 +43,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump + ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index 5e88e0e376e..2d513af1f5b 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -52,7 +52,7 @@ jobs: with: repository: "ROCm/TheRock" path: "TheRock" - ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump + ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index 8728b833466..cb03a8f8180 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -69,7 +69,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump + ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index 0c89aafd877..9e033ef2c00 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -70,7 +70,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump + ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump - name: Configure git for long paths on Windows if: ${{ runner.os == 'Windows' }} @@ -79,7 +79,7 @@ jobs: - name: Checkout rocm-libraries repository for scripts uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: - path: external-sources/rocm-libraries + path: rocm-libraries - name: Run setup test environment workflow timeout-minutes: 15 @@ -113,7 +113,7 @@ jobs: - name: Notify Teams Channel on Failure if: ${{ failure() && !github.event.pull_request.head.repo.fork }} run: | - python3 external-sources/rocm-libraries/.github/scripts/notify_teams.py \ + python3 rocm-libraries/.github/scripts/notify_teams.py \ --failure-stage test \ --log-path ./test_logs/test_output.log \ --webhook-urls '{"miopen":"${{ secrets.MIOPEN_CI_WEBHOOK_URL }}","hipdnn":"${{ secrets.HIPDNN_CI_WEBHOOK_URL }}"}' \ diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index f6737ce6810..c4c3e7be1a6 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -43,7 +43,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 393a062fca21cd0054a5184ccf02fa9d9fe4d9ed # tmp bump + ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump - name: Setting up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 From 1b20c415d30e064fc89db555af3b0a6bf675b940 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 10:38:11 -0400 Subject: [PATCH 86/97] ci: bump TheRock pin to 9fdb394fb7 for test venv pip fix --- .github/actions/ci-env/action.yml | 2 +- .github/workflows/therock-ci-linux.yml | 2 +- .github/workflows/therock-ci-nightly.yml | 2 +- .github/workflows/therock-ci-windows.yml | 2 +- .github/workflows/therock-ci.yml | 2 +- .github/workflows/therock-test-component.yml | 2 +- .github/workflows/therock-test-packages.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/actions/ci-env/action.yml b/.github/actions/ci-env/action.yml index 0e13e83d202..023400acf84 100644 --- a/.github/actions/ci-env/action.yml +++ b/.github/actions/ci-env/action.yml @@ -4,7 +4,7 @@ description: "Single source of truth for shared CI constants (TheRock ref, Docke outputs: therock-ref: description: "TheRock commit hash to use" - value: "64985547315d38cc0163afe8a60b014facf51b9f" # tmp bump + value: "9fdb394fb7691eeab17014b47e6423d158bf405e" # tmp bump docker-image: description: "Docker container image for Linux builds" value: "ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2" diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index 1ed4a8e00d7..58b33b66bfb 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -51,7 +51,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump + ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump - name: Install python deps run: | diff --git a/.github/workflows/therock-ci-nightly.yml b/.github/workflows/therock-ci-nightly.yml index be9b0e5429b..0b436948dcb 100644 --- a/.github/workflows/therock-ci-nightly.yml +++ b/.github/workflows/therock-ci-nightly.yml @@ -43,7 +43,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump + ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index 2d513af1f5b..1ad4005e4f6 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -52,7 +52,7 @@ jobs: with: repository: "ROCm/TheRock" path: "TheRock" - ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump + ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index cb03a8f8180..1ac03ffb678 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -69,7 +69,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump + ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index 9e033ef2c00..9120cc24a62 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -70,7 +70,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump + ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump - name: Configure git for long paths on Windows if: ${{ runner.os == 'Windows' }} diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index c4c3e7be1a6..6b205140c6f 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -43,7 +43,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 64985547315d38cc0163afe8a60b014facf51b9f # tmp bump + ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump - name: Setting up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 From acd24cff7d42a953b0749aa0086129dedb6970c1 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 10:44:31 -0400 Subject: [PATCH 87/97] ci: bump TheRock pin to ecd0aa8fb for uv pip wheel fix --- .github/actions/ci-env/action.yml | 2 +- .github/workflows/therock-ci-linux.yml | 2 +- .github/workflows/therock-ci-nightly.yml | 2 +- .github/workflows/therock-ci-windows.yml | 2 +- .github/workflows/therock-ci.yml | 2 +- .github/workflows/therock-test-component.yml | 2 +- .github/workflows/therock-test-packages.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/actions/ci-env/action.yml b/.github/actions/ci-env/action.yml index 023400acf84..578e71bcf29 100644 --- a/.github/actions/ci-env/action.yml +++ b/.github/actions/ci-env/action.yml @@ -4,7 +4,7 @@ description: "Single source of truth for shared CI constants (TheRock ref, Docke outputs: therock-ref: description: "TheRock commit hash to use" - value: "9fdb394fb7691eeab17014b47e6423d158bf405e" # tmp bump + value: "ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888" # tmp bump docker-image: description: "Docker container image for Linux builds" value: "ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2" diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index 58b33b66bfb..b0f60146f18 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -51,7 +51,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump + ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump - name: Install python deps run: | diff --git a/.github/workflows/therock-ci-nightly.yml b/.github/workflows/therock-ci-nightly.yml index 0b436948dcb..ae7c553e57d 100644 --- a/.github/workflows/therock-ci-nightly.yml +++ b/.github/workflows/therock-ci-nightly.yml @@ -43,7 +43,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump + ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index 1ad4005e4f6..80ca1524e43 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -52,7 +52,7 @@ jobs: with: repository: "ROCm/TheRock" path: "TheRock" - ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump + ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index 1ac03ffb678..b2e1d1dc05d 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -69,7 +69,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump + ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index 9120cc24a62..f243eec0dcb 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -70,7 +70,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump + ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump - name: Configure git for long paths on Windows if: ${{ runner.os == 'Windows' }} diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index 6b205140c6f..58ee59dcaf2 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -43,7 +43,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 9fdb394fb7691eeab17014b47e6423d158bf405e # tmp bump + ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump - name: Setting up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 From ec1ab5da8c79f4535107d274d56ed072ea9a459e Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 08:49:21 -0600 Subject: [PATCH 88/97] Potential fix for pull request finding 'Unused import' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> --- projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py index 7a17020b04c..8be730bdda0 100644 --- a/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py +++ b/projects/hipdnn/python/hipdnn_frontend/test/test_conv_fprop.py @@ -6,8 +6,6 @@ import numpy as np import pytest -import hipdnn_frontend as hipdnn - from .helpers import build_conv_fprop_graph, execute_graph # Dimensions used across tests From e8f6fb4b5b310ec99cd1f3526c30083f49f616da Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 11:56:32 -0400 Subject: [PATCH 89/97] [hipDNN] Install Python pytest files into share/hipdnn/tests/python Stage hipdnn_frontend/test/*.py into share/hipdnn/tests/python so downstream packagers (e.g. TheRock) can include them in the hipDNN test artifact. Keeps tests outside the package payload to avoid accidentally testing the staged tree instead of the installed wheel. Co-Authored-By: Claude Opus 4.7 --- projects/hipdnn/python/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 983d0f7a6fd..39599c6d245 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -59,4 +59,7 @@ else() install(TARGETS hipdnn_frontend_python DESTINATION "${_staging}") install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/hipdnn_frontend/__init__.py" DESTINATION "${_staging}") + install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/hipdnn_frontend/test/" + DESTINATION "share/hipdnn/tests/python" + FILES_MATCHING PATTERN "*.py") endif() From 4874b136e36559bc6919ec6b543934e0deb74497 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 11:59:07 -0400 Subject: [PATCH 90/97] ci: bump TheRock pin to b8a0e8bc8 for artifact-sourced Python tests Co-Authored-By: Claude Opus 4.7 --- .github/actions/ci-env/action.yml | 2 +- .github/workflows/therock-ci-linux.yml | 2 +- .github/workflows/therock-ci-nightly.yml | 2 +- .github/workflows/therock-ci-windows.yml | 2 +- .github/workflows/therock-ci.yml | 2 +- .github/workflows/therock-test-component.yml | 2 +- .github/workflows/therock-test-packages.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/actions/ci-env/action.yml b/.github/actions/ci-env/action.yml index 578e71bcf29..376175c17a7 100644 --- a/.github/actions/ci-env/action.yml +++ b/.github/actions/ci-env/action.yml @@ -4,7 +4,7 @@ description: "Single source of truth for shared CI constants (TheRock ref, Docke outputs: therock-ref: description: "TheRock commit hash to use" - value: "ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888" # tmp bump + value: "b8a0e8bc8999356b44a57719ac7f29986ad28128" # tmp bump docker-image: description: "Docker container image for Linux builds" value: "ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2" diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index b0f60146f18..0700e2bbd17 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -51,7 +51,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump + ref: b8a0e8bc8999356b44a57719ac7f29986ad28128 # tmp bump - name: Install python deps run: | diff --git a/.github/workflows/therock-ci-nightly.yml b/.github/workflows/therock-ci-nightly.yml index ae7c553e57d..594eeb8446b 100644 --- a/.github/workflows/therock-ci-nightly.yml +++ b/.github/workflows/therock-ci-nightly.yml @@ -43,7 +43,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump + ref: b8a0e8bc8999356b44a57719ac7f29986ad28128 # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index 80ca1524e43..d8c3f1b120a 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -52,7 +52,7 @@ jobs: with: repository: "ROCm/TheRock" path: "TheRock" - ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump + ref: b8a0e8bc8999356b44a57719ac7f29986ad28128 # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index b2e1d1dc05d..32470d318b6 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -69,7 +69,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump + ref: b8a0e8bc8999356b44a57719ac7f29986ad28128 # tmp bump - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index f243eec0dcb..c529a68bb03 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -70,7 +70,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump + ref: b8a0e8bc8999356b44a57719ac7f29986ad28128 # tmp bump - name: Configure git for long paths on Windows if: ${{ runner.os == 'Windows' }} diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index 58ee59dcaf2..8ca40b5ba09 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -43,7 +43,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: ecd0aa8fb8949bf41e793d3bac4e7d6c1f62a888 # tmp bump + ref: b8a0e8bc8999356b44a57719ac7f29986ad28128 # tmp bump - name: Setting up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 From bd546f206ea4c78f3a782c58dc3a9bb7a42ffdb1 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 13:18:34 -0400 Subject: [PATCH 91/97] revert: drop ai-rules.md TheRock CI section from this PR --- projects/hipdnn/docs/ai-rules.md | 57 -------------------------------- 1 file changed, 57 deletions(-) diff --git a/projects/hipdnn/docs/ai-rules.md b/projects/hipdnn/docs/ai-rules.md index b6084586780..c04a4788db7 100644 --- a/projects/hipdnn/docs/ai-rules.md +++ b/projects/hipdnn/docs/ai-rules.md @@ -152,63 +152,6 @@ When requested to build/test: --- -## CI System (TheRock) - -hipDNN CI runs through [TheRock](https://github.com/ROCm/TheRock), AMD's unified ROCm build system. GitHub Actions check out TheRock at a pinned commit hash, then TheRock builds and tests hipDNN as one of its components. - -### TheRock Hash Pin - -The TheRock commit hash is pinned in `.github/actions/ci-env/action.yml` (the `therock-ref` output). All CI workflows read from this single source of truth. When updating the TheRock pin, also update the inline hash in `.github/workflows/therock-test-packages.yml` and `.github/workflows/therock-test-component.yml` which have secondary copies. - -### CI Build Flow - -1. **Checkout**: GitHub Actions checks out TheRock at the pinned hash into a `TheRock/` subdirectory alongside the rocm-libraries checkout. -2. **DVC pull**: `dvc pull -v` hydrates large binary files (MIOpen kernel DBs, benchmarking workloads) from the S3 remote (`s3://therock-dvc/rocm-libraries`). -3. **Source fetch**: `TheRock/build_tools/fetch_sources.py` pre-fetches all third-party sources TheRock needs. This runs before CMake configure so the build is fully offline afterward. -4. **Configure**: CMake configures with `THEROCK_ROCM_LIBRARIES_SOURCE_DIR` pointing to the local rocm-libraries checkout and project-specific flags (e.g., `THEROCK_ENABLE_HIPDNN_INTEGRATION_TESTS`). -5. **Build**: `cmake --build TheRock/build --target therock-archives therock-dist`. -6. **Upload**: Built artifacts are uploaded to S3 so GPU test runners can download them. -7. **Test**: A separate GPU runner downloads the artifacts and runs component test scripts inside a container. - -### 3rd Party Dependencies and FetchContent - -**All 3rd party dependencies are pre-staged from S3 in CI — do not add CMake `FetchContent` calls that download from GitHub at configure time.** TheRock's `fetch_sources.py` handles dependency fetching before CMake runs. A `FetchContent` that hits GitHub during the build will fail in CI. - -hipDNN's `cmake/Dependencies.cmake` has a `HIPDNN_NO_DOWNLOAD` option that sets `FETCHCONTENT_FULLY_DISCONNECTED`. Dependencies declared via `FetchContent` (GTest, flatbuffers, spdlog, nlohmann_json, tsl-robin-map, nanobind) must also be findable as pre-installed packages so TheRock can supply them. - -When adding a new dependency: -- Add it to `cmake/Dependencies.cmake` with `find_package()` first, `FetchContent` as fallback -- Coordinate with TheRock to ensure the dependency is included in `fetch_sources.py` -- The S3 DVC remote (`s3://therock-dvc/rocm-libraries`) stores large binary assets, not source dependencies - -### CI Workflow Files - -| Workflow | Purpose | -|----------|---------| -| `therock-ci.yml` | Top-level orchestrator for PR/push CI | -| `therock-ci-linux.yml` | Reusable Linux build+test workflow | -| `therock-ci-windows.yml` | Reusable Windows build+test workflow | -| `therock-test-packages.yml` | Per-component test matrix fan-out | -| `therock-test-component.yml` | Individual test component with sharding | -| `hipdnn-superbuild-ci.yml` | Superbuild CI using pre-built ROCm wheels (no TheRock source build) | -| `hipdnn-python-tests.yml` | Python binding tests | - -### hipDNN CMake Flags in TheRock CI - -These flags are set by `.github/scripts/therock_matrix.py` when hipDNN changes are detected: - -``` -THEROCK_ENABLE_HIPBLASLTPROVIDER=ON -THEROCK_ENABLE_HIPKERNELPROVIDER=ON -THEROCK_ENABLE_MIOPENPROVIDER=ON -THEROCK_ENABLE_HIPDNN_SAMPLES=ON -THEROCK_ENABLE_COMPOSABLE_KERNEL=ON -THEROCK_ENABLE_HIPDNN_INTEGRATION_TESTS=ON -THEROCK_ENABLE_IREE_LIBS=ON -``` - ---- - ## C++ Code Style ### Naming Conventions From e82d72a1e841f91f0b3c5b273c843d3e186791e8 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 13:41:23 -0400 Subject: [PATCH 92/97] remove unused assemble_wheel.py TheRock CI uses its own pack_frontend_wheel.py for wheel packaging. The local CMake build only stages files for TheRock to consume and never invokes assemble_wheel.py. --- .../hipdnn/python/scripts/assemble_wheel.py | 185 ------------------ 1 file changed, 185 deletions(-) delete mode 100644 projects/hipdnn/python/scripts/assemble_wheel.py diff --git a/projects/hipdnn/python/scripts/assemble_wheel.py b/projects/hipdnn/python/scripts/assemble_wheel.py deleted file mode 100644 index 09977f53218..00000000000 --- a/projects/hipdnn/python/scripts/assemble_wheel.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT - -"""Assemble a Python wheel from pre-built artifacts (stdlib only).""" - -import argparse -import base64 -import csv -import hashlib -import io -import os -import re -import stat -import sys -import zipfile - -_PACKAGE_NAME = "hipdnn_frontend" -_DEFAULT_VERSION = "0.1.0" -_REQUIRES_DIST = ["numpy>=1.19.0"] - - -def _parse_so_tags(so_filename): - """Extract Python, ABI, and platform tags from a .so filename. - - Example: hipdnn_frontend_python.cpython-312-x86_64-linux-gnu.so - -> ('cp312', 'cp312', 'linux_x86_64') - """ - m = re.search( - r"\.cpython-(\d+)([a-z]*)-(.+)\.so$", - so_filename, - ) - if not m: - sys.exit(f"Cannot parse tags from filename: {so_filename}") - - ver, flags, platform_raw = m.groups() - py_tag = f"cp{ver}" - abi_tag = f"cp{ver}{flags}" - platform_tag = _gnu_triplet_to_wheel_tag(platform_raw) - return py_tag, abi_tag, platform_tag - - -def _gnu_triplet_to_wheel_tag(triplet): - """Convert a GNU triplet (from .so filename) to a PEP 425 platform tag. - - x86_64-linux-gnu -> linux_x86_64 - aarch64-linux-gnu -> linux_aarch64 - """ - parts = triplet.split("-") - if len(parts) >= 2 and parts[1] == "linux": - return f"linux_{parts[0]}" - return triplet.replace("-", "_").replace(".", "_") - - -def _hash_record(data): - """Return 'sha256=,' for a RECORD entry.""" - digest = hashlib.sha256(data).digest() - b64 = base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii") - return f"sha256={b64}", len(data) - - -def _get_init_file(package_dir): - """Return (arcname, filepath) for __init__.py, or None if missing.""" - init_path = os.path.join(package_dir, "__init__.py") - if os.path.isfile(init_path): - return f"{_PACKAGE_NAME}/__init__.py", init_path - return None - - -def _build_metadata(version): - return ( - f"Metadata-Version: 2.1\n" - f"Name: {_PACKAGE_NAME.replace('_', '-')}\n" - f"Version: {version}\n" - f"Summary: Python bindings for the hipDNN frontend library\n" - f"Author: Advanced Micro Devices, Inc.\n" - f"License: MIT\n" - f"Requires-Python: >=3.8\n" - + "".join(f"Requires-Dist: {dep}\n" for dep in _REQUIRES_DIST) - ) - - -def _build_wheel_metadata(py_tag, abi_tag, platform_tag): - return ( - f"Wheel-Version: 1.0\n" - f"Generator: assemble_wheel.py\n" - f"Root-Is-Purelib: false\n" - f"Tag: {py_tag}-{abi_tag}-{platform_tag}\n" - ) - - -def assemble(so_path, package_dir, output_dir, version): - so_filename = os.path.basename(so_path) - py_tag, abi_tag, platform_tag = _parse_so_tags(so_filename) - - dist_info = f"{_PACKAGE_NAME}-{version}.dist-info" - wheel_name = f"{_PACKAGE_NAME}-{version}-{py_tag}-{abi_tag}-{platform_tag}.whl" - wheel_path = os.path.join(output_dir, wheel_name) - - records = [] - - with zipfile.ZipFile(wheel_path, "w", zipfile.ZIP_DEFLATED) as whl: - # 1. __init__.py - init_entry = _get_init_file(package_dir) - if init_entry: - arcname, filepath = init_entry - with open(filepath, "rb") as f: - data = f.read() - whl.writestr(arcname, data) - h, sz = _hash_record(data) - records.append((arcname, h, sz)) - - # 2. Compiled extension — use ZIP_STORED for the .so (no compression benefit) - so_arcname = f"{_PACKAGE_NAME}/{so_filename}" - with open(so_path, "rb") as f: - so_data = f.read() - info = zipfile.ZipInfo(so_arcname) - info.compress_type = zipfile.ZIP_STORED - info.external_attr = ( - stat.S_IRUSR - | stat.S_IWUSR - | stat.S_IXUSR - | stat.S_IRGRP - | stat.S_IXGRP - | stat.S_IROTH - | stat.S_IXOTH - ) << 16 - whl.writestr(info, so_data) - h, sz = _hash_record(so_data) - records.append((so_arcname, h, sz)) - - # 3. dist-info/METADATA - metadata = _build_metadata(version).encode() - arcname = f"{dist_info}/METADATA" - whl.writestr(arcname, metadata) - h, sz = _hash_record(metadata) - records.append((arcname, h, sz)) - - # 4. dist-info/WHEEL - wheel_meta = _build_wheel_metadata(py_tag, abi_tag, platform_tag).encode() - arcname = f"{dist_info}/WHEEL" - whl.writestr(arcname, wheel_meta) - h, sz = _hash_record(wheel_meta) - records.append((arcname, h, sz)) - - # 5. dist-info/top_level.txt - top_level = f"{_PACKAGE_NAME}\n".encode() - arcname = f"{dist_info}/top_level.txt" - whl.writestr(arcname, top_level) - h, sz = _hash_record(top_level) - records.append((arcname, h, sz)) - - # 6. dist-info/RECORD (must be last — its own entry has no hash) - buf = io.StringIO() - writer = csv.writer(buf) - for row in records: - writer.writerow(row) - writer.writerow((f"{dist_info}/RECORD", "", "")) - record_data = buf.getvalue().encode() - whl.writestr(f"{dist_info}/RECORD", record_data) - - print(f"Wheel written: {wheel_path}") - return wheel_path - - -def main(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--so-path", required=True, help="Path to compiled .so") - parser.add_argument( - "--package-dir", required=True, help="Path to pure-Python package dir" - ) - parser.add_argument("--output-dir", required=True, help="Directory for output .whl") - parser.add_argument("--version", default=_DEFAULT_VERSION, help="Package version") - args = parser.parse_args() - - if not os.path.isfile(args.so_path): - sys.exit(f"Extension not found: {args.so_path}") - if not os.path.isdir(args.package_dir): - sys.exit(f"Package directory not found: {args.package_dir}") - os.makedirs(args.output_dir, exist_ok=True) - - assemble(args.so_path, args.package_dir, args.output_dir, args.version) - - -if __name__ == "__main__": - main() From f6afa86c3be56eae6d6995fb010baf01cd844618 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 14:12:59 -0400 Subject: [PATCH 93/97] ci: bump TheRock pin to 32329200459a --- .github/actions/ci-env/action.yml | 2 +- .github/workflows/therock-ci-linux.yml | 2 +- .github/workflows/therock-ci-nightly.yml | 2 +- .github/workflows/therock-ci-windows.yml | 2 +- .github/workflows/therock-ci.yml | 2 +- .github/workflows/therock-test-component.yml | 2 +- .github/workflows/therock-test-packages.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/actions/ci-env/action.yml b/.github/actions/ci-env/action.yml index 5bc57e2054a..b83cbbd764c 100644 --- a/.github/actions/ci-env/action.yml +++ b/.github/actions/ci-env/action.yml @@ -4,7 +4,7 @@ description: "Single source of truth for shared CI constants (TheRock ref, Docke outputs: therock-ref: description: "TheRock commit hash to use" - value: "09796322b89aa5239d03700c4ad3976da6cc689d" + value: "32329200459aa4396127a53560e1f454d136d706" docker-image: description: "Docker container image for Linux builds" value: "ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2" diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index 24f76cb8078..3a26e3063a9 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -51,7 +51,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 09796322b89aa5239d03700c4ad3976da6cc689d + ref: 32329200459aa4396127a53560e1f454d136d706 - name: Install python deps run: | diff --git a/.github/workflows/therock-ci-nightly.yml b/.github/workflows/therock-ci-nightly.yml index e31261babb3..16112065ce5 100644 --- a/.github/workflows/therock-ci-nightly.yml +++ b/.github/workflows/therock-ci-nightly.yml @@ -43,7 +43,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 09796322b89aa5239d03700c4ad3976da6cc689d + ref: 32329200459aa4396127a53560e1f454d136d706 - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index 97c0455e6e3..f408093724a 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -52,7 +52,7 @@ jobs: with: repository: "ROCm/TheRock" path: "TheRock" - ref: 09796322b89aa5239d03700c4ad3976da6cc689d + ref: 32329200459aa4396127a53560e1f454d136d706 - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index 9a63935057f..8372ac78c7f 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -69,7 +69,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 09796322b89aa5239d03700c4ad3976da6cc689d + ref: 32329200459aa4396127a53560e1f454d136d706 - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index 445b8404d1f..0a106193eec 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -70,7 +70,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 09796322b89aa5239d03700c4ad3976da6cc689d + ref: 32329200459aa4396127a53560e1f454d136d706 - name: Configure git for long paths on Windows if: ${{ runner.os == 'Windows' }} diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index 6977486ca93..c0996deaf46 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -43,7 +43,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 09796322b89aa5239d03700c4ad3976da6cc689d + ref: 32329200459aa4396127a53560e1f454d136d706 - name: Setting up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 From 38bdd9c05128d3a2dace0388d6030c518bc6b830 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 14:50:26 -0400 Subject: [PATCH 94/97] ci: retrigger after upstream hipBLASLt Tensile flake From 7fb05649833e6bab4d7ad2e45b0f0938dde3f2e5 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 18:06:26 -0400 Subject: [PATCH 95/97] Revert "[hipDNN] Install Python pytest files into share/hipdnn/tests/python" This reverts commit e8f6fb4b5b310ec99cd1f3526c30083f49f616da. --- projects/hipdnn/python/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 39599c6d245..983d0f7a6fd 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -59,7 +59,4 @@ else() install(TARGETS hipdnn_frontend_python DESTINATION "${_staging}") install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/hipdnn_frontend/__init__.py" DESTINATION "${_staging}") - install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/hipdnn_frontend/test/" - DESTINATION "share/hipdnn/tests/python" - FILES_MATCHING PATTERN "*.py") endif() From 2624f8e6d8d72c67c1cd461709d07be2b3252f79 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Thu, 28 May 2026 23:34:14 -0400 Subject: [PATCH 96/97] fix: re-install python pytest files into share/hipdnn/tests/python Un-reverts the install rule the TheRock test runner depends on. test_hipdnn_frontend_python.py expects the pytest directory at build/share/hipdnn/tests/python; without it the test job fails with FileNotFoundError. Co-Authored-By: Claude Opus 4.7 --- projects/hipdnn/python/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/projects/hipdnn/python/CMakeLists.txt b/projects/hipdnn/python/CMakeLists.txt index 983d0f7a6fd..39599c6d245 100644 --- a/projects/hipdnn/python/CMakeLists.txt +++ b/projects/hipdnn/python/CMakeLists.txt @@ -59,4 +59,7 @@ else() install(TARGETS hipdnn_frontend_python DESTINATION "${_staging}") install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/hipdnn_frontend/__init__.py" DESTINATION "${_staging}") + install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/hipdnn_frontend/test/" + DESTINATION "share/hipdnn/tests/python" + FILES_MATCHING PATTERN "*.py") endif() From e9078c4cf98a2647962307b9dea2c99768734843 Mon Sep 17 00:00:00 2001 From: Thomas Vy Date: Fri, 29 May 2026 18:09:13 -0400 Subject: [PATCH 97/97] ci(hipdnn): bump TheRock ref to wheel-pack fix; fix trailing whitespace Point all TheRock CI pins to 9c0ca0a4a, which fixes the frontend wheel-pack script to use `uv build --wheel` instead of the invalid `uv pip wheel`. Also strip trailing whitespace on the ref line that failed the pre-commit trailing-whitespace hook. Co-Authored-By: Claude Opus 4.7 --- .github/actions/ci-env/action.yml | 2 +- .github/workflows/therock-ci-linux.yml | 2 +- .github/workflows/therock-ci-nightly.yml | 2 +- .github/workflows/therock-ci-windows.yml | 2 +- .github/workflows/therock-ci.yml | 2 +- .github/workflows/therock-test-component.yml | 2 +- .github/workflows/therock-test-packages.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/actions/ci-env/action.yml b/.github/actions/ci-env/action.yml index b83cbbd764c..30f0279d983 100644 --- a/.github/actions/ci-env/action.yml +++ b/.github/actions/ci-env/action.yml @@ -4,7 +4,7 @@ description: "Single source of truth for shared CI constants (TheRock ref, Docke outputs: therock-ref: description: "TheRock commit hash to use" - value: "32329200459aa4396127a53560e1f454d136d706" + value: "9c0ca0a4a6900c6dfb5ede35f2034f303b53433e" docker-image: description: "Docker container image for Linux builds" value: "ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:48492540591673fdc8d51beb89bde41e7ba13cbb528f643c0a481ba42c4058f2" diff --git a/.github/workflows/therock-ci-linux.yml b/.github/workflows/therock-ci-linux.yml index b44c91552c2..272dc22bd3e 100644 --- a/.github/workflows/therock-ci-linux.yml +++ b/.github/workflows/therock-ci-linux.yml @@ -51,7 +51,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 32329200459aa4396127a53560e1f454d136d706 + ref: 9c0ca0a4a6900c6dfb5ede35f2034f303b53433e - name: Install python deps run: | pip install -r TheRock/requirements.txt diff --git a/.github/workflows/therock-ci-nightly.yml b/.github/workflows/therock-ci-nightly.yml index e868e444304..e2c3ca9c14b 100644 --- a/.github/workflows/therock-ci-nightly.yml +++ b/.github/workflows/therock-ci-nightly.yml @@ -43,7 +43,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 32329200459aa4396127a53560e1f454d136d706 + ref: 9c0ca0a4a6900c6dfb5ede35f2034f303b53433e - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: diff --git a/.github/workflows/therock-ci-windows.yml b/.github/workflows/therock-ci-windows.yml index a0d9ed22828..76e9b13ddfd 100644 --- a/.github/workflows/therock-ci-windows.yml +++ b/.github/workflows/therock-ci-windows.yml @@ -52,7 +52,7 @@ jobs: with: repository: "ROCm/TheRock" path: "TheRock" - ref: 32329200459aa4396127a53560e1f454d136d706 + ref: 9c0ca0a4a6900c6dfb5ede35f2034f303b53433e - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 diff --git a/.github/workflows/therock-ci.yml b/.github/workflows/therock-ci.yml index 35e36b73835..edb5b5ce30e 100644 --- a/.github/workflows/therock-ci.yml +++ b/.github/workflows/therock-ci.yml @@ -69,7 +69,7 @@ jobs: with: repository: "ROCm/TheRock" path: TheRock - ref: 32329200459aa4396127a53560e1f454d136d706 + ref: 9c0ca0a4a6900c6dfb5ede35f2034f303b53433e - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: diff --git a/.github/workflows/therock-test-component.yml b/.github/workflows/therock-test-component.yml index 6760704e3ee..0ccf337ad45 100644 --- a/.github/workflows/therock-test-component.yml +++ b/.github/workflows/therock-test-component.yml @@ -73,7 +73,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 32329200459aa4396127a53560e1f454d136d706 + ref: 9c0ca0a4a6900c6dfb5ede35f2034f303b53433e - name: Configure git for long paths on Windows diff --git a/.github/workflows/therock-test-packages.yml b/.github/workflows/therock-test-packages.yml index 03139979536..268a0f53cc5 100644 --- a/.github/workflows/therock-test-packages.yml +++ b/.github/workflows/therock-test-packages.yml @@ -46,7 +46,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: 32329200459aa4396127a53560e1f454d136d706 + ref: 9c0ca0a4a6900c6dfb5ede35f2034f303b53433e - name: Setting up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0