Skip to content

Commit 9a1a725

Browse files
author
zhangyue
committed
build(ascend): support custom kernel builds
1 parent 64751ea commit 9a1a725

7 files changed

Lines changed: 170 additions & 33 deletions

File tree

CMakeLists.txt

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,20 @@ option(WITH_ASCEND "Enable Ascend backend" OFF)
2323

2424
option(WITH_TORCH "Enable PyTorch C++ backend" OFF)
2525

26-
# Default OFF until CANN's `extract_host_stub.py` path handling is fixed for
27-
# `scikit-build-core` temp-dir builds (triggers `KeyError` on the preprocessed
28-
# object path). Enable explicitly with `-DBUILD_CUSTOM_KERNEL=ON` when the
29-
# toolchain is compatible or when building via the standalone
30-
# `src/ascend/custom/build.sh` script.
31-
option(BUILD_CUSTOM_KERNEL "Build custom AscendC kernel PyTorch extension (requires `torch_npu`)" OFF)
26+
# Custom `AscendC` kernels under `src/native/ascend/custom/`. `ON` by default
27+
# so CI and routine dev builds always exercise `implementation_index=1/2`
28+
# for `RmsNorm` / `AddRmsNorm`. Gated by `WITH_ASCEND` in
29+
# `src/CMakeLists.txt`, so non-Ascend builds ignore it. Pass
30+
# `-DBUILD_ASCEND_CUSTOM=OFF` to skip the `ccec` build on Ascend
31+
# machines where the custom kernels aren't needed.
32+
#
33+
# When `ON`, `src/CMakeLists.txt` drives the standalone
34+
# `src/native/ascend/custom/build.sh` via a build-phase custom command. This
35+
# sidesteps a `CANN` `extract_host_stub.py` path bug that breaks in-tree
36+
# `ascendc_library()` under `scikit-build-core` temp-dir builds, then links
37+
# the produced `libno_workspace_kernel.a` into the `ops` module with
38+
# `--whole-archive`. Requires `torch_npu` and the `AscendC` toolchain (`ccec`).
39+
option(BUILD_ASCEND_CUSTOM "Build custom AscendC kernels" ON)
3240

3341
option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF)
3442
option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF)

src/CMakeLists.txt

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,70 @@ if(WITH_ASCEND)
262262
list(APPEND DEVICE_LIST "ascend")
263263

264264
# Custom `AscendC` kernels (PyTorch extension, requires `torch_npu`).
265-
if(BUILD_CUSTOM_KERNEL)
266-
add_subdirectory(native/ascend/custom)
265+
if(BUILD_ASCEND_CUSTOM)
266+
# In-tree `ascendc_library()` trips the `CANN` `extract_host_stub.py`
267+
# path-handling bug under `scikit-build-core`'s temp-dir builds
268+
# (`KeyError` on `/./workspace/...` paths in `$<TARGET_OBJECTS>`).
269+
# Work around it by driving the standalone `src/native/ascend/custom/build.sh`;
270+
# that script invokes a separate `cmake` with
271+
# `src/native/ascend/custom/` as its `SOURCE_DIR`, avoiding the buggy
272+
# path shape. The produced `.a` is imported and linked into
273+
# `ops` with `--whole-archive`.
274+
set(_custom_build_dir "${CMAKE_SOURCE_DIR}/build/build_ascend_custom")
275+
set(_custom_lib "${_custom_build_dir}/lib/libno_workspace_kernel.a")
276+
set(_custom_source_dir "${CMAKE_CURRENT_BINARY_DIR}/ascend_custom_source")
277+
278+
if(NOT DEFINED SOC_VERSION OR "${SOC_VERSION}" STREQUAL "")
279+
include(${CMAKE_CURRENT_SOURCE_DIR}/native/ascend/custom/cmake/detect_soc.cmake)
280+
infiniops_detect_soc(SOC_VERSION)
281+
endif()
282+
283+
# Drive `build.sh` as a build-phase target with explicit source
284+
# dependencies so that editing any `op_host/` or `op_kernel/`
285+
# source re-triggers the build (plain `execute_process` at
286+
# configure time would only gate on file existence and leave
287+
# stale `.a` files in place).
288+
file(GLOB_RECURSE _custom_srcs CONFIGURE_DEPENDS
289+
"${CMAKE_CURRENT_SOURCE_DIR}/native/ascend/custom/*.cpp"
290+
"${CMAKE_CURRENT_SOURCE_DIR}/native/ascend/custom/*.h"
291+
"${CMAKE_CURRENT_SOURCE_DIR}/native/ascend/custom/build.sh")
292+
293+
# Scrub env inherited from the outer `scikit-build-core` invocation
294+
# before handing control to `build.sh`: `CMAKE_GENERATOR` /
295+
# `CMAKE_EXPORT_COMPILE_COMMANDS` leaking into the inner `cmake`
296+
# change the path format passed to `ninja`'s `_host_cpp` rule and
297+
# re-trigger the `CANN` `extract_host_stub.py` `KeyError`
298+
# (`/./workspace/...`) that standalone `build.sh` avoids.
299+
#
300+
# `pip install` MUST be invoked with `--no-build-isolation` on
301+
# Ascend; otherwise pip's build-isolation overlay shadows system
302+
# `torch` (via `PYTHONPATH`) and the inner `cmake`'s
303+
# `import torch` in `config_envs.cmake` fails with
304+
# `ModuleNotFoundError`.
305+
add_custom_command(
306+
OUTPUT ${_custom_lib}
307+
COMMAND ${CMAKE_COMMAND} -E rm -f "${_custom_source_dir}"
308+
COMMAND ${CMAKE_COMMAND} -E create_symlink
309+
"${CMAKE_CURRENT_SOURCE_DIR}/native/ascend/custom"
310+
"${_custom_source_dir}"
311+
COMMAND ${CMAKE_COMMAND} -E env
312+
--unset=CMAKE_GENERATOR
313+
--unset=CMAKE_EXPORT_COMPILE_COMMANDS
314+
--unset=CMAKE_BUILD_PARALLEL_LEVEL
315+
"BUILD_DIR=${_custom_build_dir}"
316+
"MAIN_SRC_DIR=${CMAKE_CURRENT_SOURCE_DIR}"
317+
bash ${_custom_source_dir}/build.sh ${SOC_VERSION}
318+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/native/ascend/custom
319+
DEPENDS ${_custom_srcs}
320+
COMMENT "Building custom AscendC kernels (SOC_VERSION=${SOC_VERSION})"
321+
VERBATIM)
322+
323+
add_custom_target(no_workspace_kernel_build ALL DEPENDS ${_custom_lib})
324+
325+
add_library(no_workspace_kernel STATIC IMPORTED GLOBAL)
326+
set_target_properties(no_workspace_kernel PROPERTIES
327+
IMPORTED_LOCATION "${_custom_lib}")
328+
add_dependencies(no_workspace_kernel no_workspace_kernel_build)
267329

268330
# Link the compiled `AscendC` kernel objects into `infiniops` so that
269331
# custom kernel implementations (e.g. `RmsNorm` index 1) can call
@@ -651,9 +713,17 @@ if(GENERATE_PYTHON_BINDINGS)
651713
# The `Operator<..., 1>` template instantiations that call
652714
# `aclrtlaunch_*` live in `ops.cc`, so link here with
653715
# `--whole-archive` to ensure all launch functions are available.
654-
if(BUILD_CUSTOM_KERNEL)
716+
# `$<TARGET_FILE>` works for both real `ascendc_library()` targets and
717+
# `IMPORTED` targets pointing at a pre-built `.a`. The
718+
# `no_workspace_kernel` target is only created inside the
719+
# `WITH_ASCEND` block above, so this branch must mirror that gate;
720+
# otherwise non-Ascend builds error out with "No target
721+
# no_workspace_kernel".
722+
if(WITH_ASCEND AND BUILD_ASCEND_CUSTOM)
655723
target_link_libraries(ops PRIVATE
656-
-Wl,--whole-archive no_workspace_kernel -Wl,--no-whole-archive)
724+
-Wl,--whole-archive $<TARGET_FILE:no_workspace_kernel> -Wl,--no-whole-archive)
725+
# `ops` link step must wait for `build.sh` to produce the `.a`.
726+
add_dependencies(ops no_workspace_kernel_build)
657727
endif()
658728
set(_INFINIOPS_INSTALL_RPATH "$ORIGIN")
659729
if(WITH_TORCH)

src/native/ascend/custom/CMakeLists.txt

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ else()
3030
endif()
3131

3232
set(PROJECT_OP_SRC_BASE ${PROJECT_SOURCE_DIR})
33-
set(PROJECT_BUILD_PATH ${PROJECT_SOURCE_DIR}/build)
34-
set(PROJECT_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/output)
33+
set(MAIN_SRC_DIR "${PROJECT_OP_SRC_BASE}/../.." CACHE PATH
34+
"Main InfiniOps source directory.")
3535

3636
include(cmake/config_envs.cmake)
3737
include(cmake/config_ascend.cmake)
@@ -43,13 +43,15 @@ if(CCACHE_PROGRAM)
4343
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
4444
endif()
4545

46-
# Shared library output location.
47-
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_PATH})
46+
# `CMAKE_LIBRARY_OUTPUT_DIRECTORY` is set by `build.sh` so that the
47+
# standalone `libascend_kernel.so` lands next to `libno_workspace_kernel.a`
48+
# under `<repo>/build/build_ascend_custom/output/`.
4849

4950
# Host-side files.
5051
file(GLOB OP_SRCS
5152
${PROJECT_OP_SRC_BASE}/torch_binding.cpp
5253
${PROJECT_OP_SRC_BASE}/rms_norm/op_host/rms_norm.cpp
54+
${PROJECT_OP_SRC_BASE}/add_rms_norm/op_host/add_rms_norm.cpp
5355
)
5456

5557
# Shared library name — consumed by `kernel_custom.h` variants and by the
@@ -58,9 +60,29 @@ set(OP_PLUGIN_NAME ascend_kernel)
5860

5961
# Kernel-side files (device code compiled by the `AscendC` toolchain).
6062
ascendc_library(no_workspace_kernel STATIC
61-
${PROJECT_OP_SRC_BASE}/rms_norm/op_kernel/rms_norm.cpp
63+
rms_norm/op_kernel/rms_norm.cpp
64+
add_rms_norm/op_kernel/add_rms_norm.cpp
6265
)
6366

67+
# `CANN 8.5.1` installs host objects under `objects-<CONFIG>/`, while
68+
# `recompile_binary.py` only scans the host directory root.
69+
if(TARGET no_workspace_kernel_host)
70+
add_custom_command(TARGET no_workspace_kernel_host POST_BUILD
71+
COMMAND ${CMAKE_COMMAND}
72+
-DHOST_DIR=${CMAKE_BINARY_DIR}/no_workspace_kernel_host_dir
73+
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/flatten_host_objects.cmake
74+
VERBATIM)
75+
endif()
76+
77+
# The kernel translation units include `"data_type_enum.h"` from the main
78+
# project's `src/` so that launcher and device code share one `DataType`
79+
# enum. `ascendc_library` forwards the interface target's `INCLUDES`
80+
# property to the nested `ExternalProject_Add` (see
81+
# `${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake/legacy_modules/function.cmake`),
82+
# so append the main `src/` dir here.
83+
set_property(TARGET no_workspace_kernel_interface APPEND PROPERTY
84+
INCLUDES ${MAIN_SRC_DIR})
85+
6486
# Create the shared library `libascend_kernel.so`.
6587
add_library(${OP_PLUGIN_NAME} SHARED ${OP_SRCS})
6688

src/native/ascend/custom/build.sh

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
#!/bin/bash
2-
# Build custom `AscendC` kernels into `libascend_kernel.so`.
2+
# Build custom `AscendC` kernels into `libno_workspace_kernel.a` (+ the
3+
# standalone `libascend_kernel.so`).
4+
#
5+
# Intermediate artifacts default to `<repo>/build/build_ascend_custom/`
6+
# so the source tree under `src/` stays free of build output. Override
7+
# via `BUILD_DIR=<abs-path> bash build.sh <soc>` if needed.
38
set -e
49

510
SOC_VERSION="${1:-Ascend910_9382}"
@@ -10,20 +15,26 @@ source "${_CANN_TOOLKIT_INSTALL_PATH}/set_env.sh"
1015
echo "CANN: ${ASCEND_TOOLKIT_HOME}"
1116

1217
ASCEND_INCLUDE_DIR=${ASCEND_TOOLKIT_HOME}/$(arch)-linux/include
13-
CURRENT_DIR=$(pwd)
14-
OUTPUT_DIR=${CURRENT_DIR}/output
15-
mkdir -p "${OUTPUT_DIR}"
1618

17-
BUILD_DIR=build
19+
# Resolve build directory. `<script>/../../..` is `<repo>/`.
20+
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
21+
REAL_SCRIPT_DIR=$(cd "$(dirname "$(readlink -f "$0")")" && pwd)
22+
REPO_ROOT=$(cd "${REAL_SCRIPT_DIR}/../../.." && pwd)
23+
BUILD_DIR="${BUILD_DIR:-${REPO_ROOT}/build/build_ascend_custom}"
24+
OUTPUT_DIR="${BUILD_DIR}/output"
25+
MAIN_SRC_DIR="${MAIN_SRC_DIR:-${REPO_ROOT}/src}"
26+
1827
rm -rf "${BUILD_DIR}"
19-
mkdir -p "${BUILD_DIR}"
28+
mkdir -p "${BUILD_DIR}" "${OUTPUT_DIR}"
2029

2130
cmake \
2231
-DASCEND_HOME_PATH="${ASCEND_HOME_PATH}" \
2332
-DASCEND_INCLUDE_DIR="${ASCEND_INCLUDE_DIR}" \
2433
-DSOC_VERSION="${SOC_VERSION}" \
34+
-DCMAKE_LIBRARY_OUTPUT_DIRECTORY="${OUTPUT_DIR}" \
35+
-DMAIN_SRC_DIR="${MAIN_SRC_DIR}" \
2536
-B "${BUILD_DIR}" \
26-
-S .
37+
-S "${SCRIPT_DIR}"
2738

2839
cmake --build "${BUILD_DIR}" -j 16
2940

src/native/ascend/custom/cmake/config_ascend.cmake

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,9 @@ set(ASCEND_CANN_PACKAGE_PATH ${ASCEND_HOME_PATH})
99
# Auto-detect `SOC_VERSION` from `npu-smi info` if not set externally.
1010
# Required by `CANN`'s `ascendc.cmake` for `AscendC` kernel compilation.
1111
if(NOT DEFINED SOC_VERSION OR "${SOC_VERSION}" STREQUAL "")
12-
execute_process(
13-
COMMAND bash -c "npu-smi info 2>/dev/null | awk '/910B|910A|310/ {for (i=1;i<=NF;i++) if ($i ~ /^(910|310)/) {print \"Ascend\" $i; exit}}'"
14-
OUTPUT_VARIABLE _DETECTED_SOC
15-
OUTPUT_STRIP_TRAILING_WHITESPACE)
16-
17-
if(_DETECTED_SOC)
18-
set(SOC_VERSION "${_DETECTED_SOC}" CACHE STRING "Ascend SOC version" FORCE)
19-
else()
20-
set(SOC_VERSION "Ascend910B4" CACHE STRING "Ascend SOC version" FORCE)
21-
endif()
22-
12+
include(${CMAKE_CURRENT_LIST_DIR}/detect_soc.cmake)
13+
infiniops_detect_soc(_detected_soc)
14+
set(SOC_VERSION "${_detected_soc}" CACHE STRING "Ascend SOC version" FORCE)
2315
message(STATUS "SOC_VERSION auto-set to ${SOC_VERSION}")
2416
endif()
2517

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Auto-detect the Ascend SOC version from `npu-smi info`.
2+
#
3+
# `infiniops_detect_soc(<out_var>)` parses the first `910*` / `310*` entry
4+
# in `npu-smi info` and writes `Ascend<NNNX>` into the named variable in
5+
# the caller's scope. Falls back to `Ascend910B4` when detection fails
6+
# (no NPU on the host, `npu-smi` missing, output format mismatch).
7+
#
8+
# Called from both `src/CMakeLists.txt` (outer `pip install` build, to
9+
# forward `SOC_VERSION` to the standalone `build.sh` invocation) and
10+
# `src/native/ascend/custom/cmake/config_ascend.cmake` (the sub-build driven
11+
# by that `build.sh`).
12+
13+
function(infiniops_detect_soc out_var)
14+
execute_process(
15+
COMMAND bash -c "npu-smi info 2>/dev/null | awk '/910B|910A|310/ {for (i=1;i<=NF;i++) if ($i ~ /^(910|310)/) {print \"Ascend\" $i; exit}}'"
16+
OUTPUT_VARIABLE _detected
17+
OUTPUT_STRIP_TRAILING_WHITESPACE)
18+
19+
if(_detected)
20+
set(${out_var} "${_detected}" PARENT_SCOPE)
21+
else()
22+
set(${out_var} "Ascend910B4" PARENT_SCOPE)
23+
endif()
24+
endfunction()
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
if(NOT DEFINED HOST_DIR OR NOT IS_DIRECTORY "${HOST_DIR}")
2+
message(FATAL_ERROR "`HOST_DIR` must point to an existing host object directory")
3+
endif()
4+
5+
file(GLOB_RECURSE _host_objects "${HOST_DIR}/objects-*/*.o")
6+
7+
foreach(_obj IN LISTS _host_objects)
8+
get_filename_component(_obj_name "${_obj}" NAME)
9+
file(COPY_FILE "${_obj}" "${HOST_DIR}/${_obj_name}" ONLY_IF_DIFFERENT)
10+
endforeach()

0 commit comments

Comments
 (0)