Skip to content

Commit 449c9e0

Browse files
authored
Revert "feat(ascend): op-norm-rope group — Swiglu, SiluAndMul, CausalSoftmax,…" (#72)
This reverts commit 38a23cf.
1 parent 38a23cf commit 449c9e0

35 files changed

Lines changed: 438 additions & 3963 deletions

CMakeLists.txt

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,12 @@ option(WITH_ASCEND "Enable Ascend backend" OFF)
1818

1919
option(WITH_TORCH "Enable PyTorch C++ backend" OFF)
2020

21-
# Custom `AscendC` kernels under `src/ascend/custom/`. `ON` by default
22-
# so CI and routine dev builds always exercise `implementation_index=1/2`
23-
# for `RmsNorm` / `AddRmsNorm`. Gated by `WITH_ASCEND` in
24-
# `src/CMakeLists.txt` — non-Ascend builds ignore it. Pass
25-
# `-DBUILD_ASCEND_CUSTOM=OFF` to skip the `ccec` build on Ascend
26-
# machines where the custom kernels aren't needed.
27-
#
28-
# When `ON`, `src/CMakeLists.txt` drives the standalone
29-
# `src/ascend/custom/build.sh` via `execute_process` at configure time
30-
# (sidesteps a `CANN` `extract_host_stub.py` path bug that breaks
31-
# in-tree `ascendc_library()` under `scikit-build-core` temp-dir builds)
32-
# and links the produced `libno_workspace_kernel.a` into the `ops`
33-
# module with `--whole-archive`. Requires `torch_npu` and the
34-
# `AscendC` toolchain (`ccec`).
35-
option(BUILD_ASCEND_CUSTOM "Build custom AscendC kernels" ON)
21+
# Default OFF until CANN's `extract_host_stub.py` path handling is fixed for
22+
# `scikit-build-core` temp-dir builds (triggers `KeyError` on the preprocessed
23+
# object path). Enable explicitly with `-DBUILD_CUSTOM_KERNEL=ON` when the
24+
# toolchain is compatible or when building via the standalone
25+
# `src/ascend/custom/build.sh` script.
26+
option(BUILD_CUSTOM_KERNEL "Build custom AscendC kernel PyTorch extension (requires `torch_npu`)" OFF)
3627

3728
option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF)
3829
option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF)

pyproject.toml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,6 @@ name = "InfiniOps"
77
version = "0.1.0"
88

99
[project.optional-dependencies]
10-
# TODO: `torch` here is unconstrained. On Ascend hosts, the working
11-
# torch is the Ascend-matched `torch 2.9.0+cpu` paired with
12-
# `torch_npu 2.9.0.post1+…`. A `pip install -e .[dev] --force-reinstall`
13-
# will re-resolve `torch` to the latest PyPI version (currently
14-
# `torch 2.11.0`), which now declares `cuda-toolkit` / `nvidia-cublas` /
15-
# `nvidia-cudnn` / … as hard deps — downloads GBs of CUDA wheels and
16-
# kills the `torch_npu` / `vllm-ascend` pairing. Needs a platform-aware
17-
# split (e.g. `torch; platform_machine != 'aarch64'`, or move `torch`
18-
# out of `dev` and require it pre-installed in the container image).
1910
dev = ["pytest", "pytest-cov", "pytest-xdist", "ruff", "torch", "pyyaml"]
2011

2112
[tool.scikit-build.wheel]

scripts/generate_wrappers.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -112,29 +112,9 @@ def _find_vector_tensor_params(op_name):
112112
return set(re.findall(r"std::vector<Tensor>\s+(\w+)", source))
113113

114114

115-
def _find_params_with_defaults(op_name):
116-
"""Return ``{param_name: default_literal}`` for base-header params that
117-
carry a `= <literal>` default value. `libclang`'s cursor API does not
118-
expose defaults reliably, so we regex-scan the source. Only used for
119-
plain scalar defaults such as ``bool pre_gathered = false``.
120-
"""
121-
source = (_BASE_DIR / f"{op_name}.h").read_text()
122-
123-
mapping = {}
124-
125-
for name, default in re.findall(
126-
r"\b(?:bool|int(?:64_t|32_t|8_t|16_t)?|std::size_t|std::uint\w+_t|float|double)\s+(\w+)\s*=\s*([^,\)]+?)\s*(?:,|\))",
127-
source,
128-
):
129-
mapping[name] = default.strip()
130-
131-
return mapping
132-
133-
134115
def _generate_pybind11(operator):
135116
optional_tensor_params = _find_optional_tensor_params(operator.name)
136117
vector_tensor_params = _find_vector_tensor_params(operator.name)
137-
params_with_defaults = _find_params_with_defaults(operator.name)
138118

139119
def _is_optional_tensor(arg):
140120
if arg.spelling in optional_tensor_params:
@@ -206,10 +186,6 @@ def _generate_py_args(node):
206186

207187
if _is_optional(arg):
208188
parts.append(f'py::arg("{arg.spelling}") = py::none()')
209-
elif arg.spelling in params_with_defaults:
210-
parts.append(
211-
f'py::arg("{arg.spelling}") = {params_with_defaults[arg.spelling]}'
212-
)
213189
else:
214190
parts.append(f'py::arg("{arg.spelling}")')
215191

@@ -281,7 +257,8 @@ def _generate_call(op_name, call, method=True):
281257
}})
282258
.def_static("clear_cache", &Self::clear_cache);
283259
284-
{callers}}}
260+
{callers}
261+
}}
285262
286263
}} // namespace infini::ops
287264

src/CMakeLists.txt

Lines changed: 4 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -241,66 +241,8 @@ if(WITH_ASCEND)
241241
list(APPEND DEVICE_LIST "ascend")
242242

243243
# Custom `AscendC` kernels (PyTorch extension, requires `torch_npu`).
244-
if(BUILD_ASCEND_CUSTOM)
245-
# In-tree `ascendc_library()` trips the `CANN` `extract_host_stub.py`
246-
# path-handling bug under `scikit-build-core`'s temp-dir builds
247-
# (`KeyError` on `/./workspace/...` paths in `$<TARGET_OBJECTS>`).
248-
# Work around it by driving the standalone `src/ascend/custom/build.sh`
249-
# — that script invokes a separate `cmake` with
250-
# `src/ascend/custom/` as its `SOURCE_DIR`, avoiding the buggy
251-
# path shape. The produced `.a` is imported and linked into
252-
# `ops` with `--whole-archive`.
253-
set(_custom_build_dir "${CMAKE_SOURCE_DIR}/build/build_ascend_custom")
254-
set(_custom_lib "${_custom_build_dir}/lib/libno_workspace_kernel.a")
255-
256-
if(NOT DEFINED SOC_VERSION OR "${SOC_VERSION}" STREQUAL "")
257-
include(${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/cmake/detect_soc.cmake)
258-
infiniops_detect_soc(SOC_VERSION)
259-
endif()
260-
261-
# Drive `build.sh` as a build-phase target with explicit source
262-
# dependencies so that editing any `op_host/` or `op_kernel/`
263-
# source re-triggers the build (plain `execute_process` at
264-
# configure time would only gate on file existence and leave
265-
# stale `.a` files in place).
266-
file(GLOB_RECURSE _custom_srcs CONFIGURE_DEPENDS
267-
"${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/*.cpp"
268-
"${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/*.h"
269-
"${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/build.sh")
270-
271-
# Scrub env inherited from the outer `scikit-build-core` invocation
272-
# before handing control to `build.sh`:
273-
# * `CMAKE_GENERATOR` / `CMAKE_EXPORT_COMPILE_COMMANDS` leaking
274-
# into the inner `cmake` change the path format passed to
275-
# `ninja`'s `_host_cpp` rule and re-trigger the `CANN`
276-
# `extract_host_stub.py` `KeyError` (`/./workspace/...`) that
277-
# standalone `build.sh` avoids.
278-
# * `PYTHONPATH` from `pip`'s build-isolation overlay makes the
279-
# child `python3` skip the system `site-packages` — child
280-
# `cmake` modules that `import torch` (`config_envs.cmake`)
281-
# then fail with `ModuleNotFoundError` even though `torch` is
282-
# installed.
283-
add_custom_command(
284-
OUTPUT ${_custom_lib}
285-
COMMAND ${CMAKE_COMMAND} -E env
286-
--unset=CMAKE_GENERATOR
287-
--unset=CMAKE_EXPORT_COMPILE_COMMANDS
288-
--unset=CMAKE_BUILD_PARALLEL_LEVEL
289-
--unset=PYTHONPATH
290-
"BUILD_DIR=${_custom_build_dir}"
291-
"CMAKE_EXE=${CMAKE_COMMAND}"
292-
bash ${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom/build.sh ${SOC_VERSION}
293-
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ascend/custom
294-
DEPENDS ${_custom_srcs}
295-
COMMENT "Building custom AscendC kernels (SOC_VERSION=${SOC_VERSION})"
296-
VERBATIM)
297-
298-
add_custom_target(no_workspace_kernel_build ALL DEPENDS ${_custom_lib})
299-
300-
add_library(no_workspace_kernel STATIC IMPORTED GLOBAL)
301-
set_target_properties(no_workspace_kernel PROPERTIES
302-
IMPORTED_LOCATION "${_custom_lib}")
303-
add_dependencies(no_workspace_kernel no_workspace_kernel_build)
244+
if(BUILD_CUSTOM_KERNEL)
245+
add_subdirectory(ascend/custom)
304246

305247
# Link the compiled `AscendC` kernel objects into `infiniops` so that
306248
# custom kernel implementations (e.g. `RmsNorm` index 1) can call
@@ -437,13 +379,9 @@ if(GENERATE_PYTHON_BINDINGS)
437379
# The `Operator<..., 1>` template instantiations that call
438380
# `aclrtlaunch_*` live in `ops.cc`, so link here with
439381
# `--whole-archive` to ensure all launch functions are available.
440-
# `$<TARGET_FILE>` works for both real `ascendc_library()` targets and
441-
# `IMPORTED` targets pointing at a pre-built `.a`.
442-
if(BUILD_ASCEND_CUSTOM)
382+
if(BUILD_CUSTOM_KERNEL)
443383
target_link_libraries(ops PRIVATE
444-
-Wl,--whole-archive $<TARGET_FILE:no_workspace_kernel> -Wl,--no-whole-archive)
445-
# `ops` link step must wait for `build.sh` to produce the `.a`.
446-
add_dependencies(ops no_workspace_kernel_build)
384+
-Wl,--whole-archive no_workspace_kernel -Wl,--no-whole-archive)
447385
endif()
448386

449387
set_target_properties(infiniops PROPERTIES INSTALL_RPATH "$ORIGIN")

src/ascend/add_rms_norm/kernel.h

Lines changed: 0 additions & 144 deletions
This file was deleted.

0 commit comments

Comments
 (0)