From 52d1d6dd056e0c6123e71f3056c1037a31bed0c9 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Thu, 28 May 2026 16:00:17 +0800 Subject: [PATCH 1/2] revert: remove public functional operator API --- CMakeLists.txt | 14 +- cmake/InfiniOpsConfig.cmake.in | 3 - cmake/infiniops.pc.in | 10 -- include/infini/ops.h | 8 - scripts/generate_wrappers.py | 288 +-------------------------------- src/CMakeLists.txt | 179 ++------------------ tests/test_cpp_api.py | 101 ------------ 7 files changed, 17 insertions(+), 586 deletions(-) delete mode 100644 cmake/InfiniOpsConfig.cmake.in delete mode 100644 cmake/infiniops.pc.in delete mode 100644 include/infini/ops.h delete mode 100644 tests/test_cpp_api.py diff --git a/CMakeLists.txt b/CMakeLists.txt index fe478691b..b3eb82a6f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.18) -project(InfiniOps VERSION 0.1.0 LANGUAGES CXX) - -include(GNUInstallDirs) +project(InfiniOps LANGUAGES CXX) if(POLICY CMP0116) cmake_policy(SET CMP0116 NEW) @@ -36,7 +34,6 @@ option(BUILD_CUSTOM_KERNEL "Build custom AscendC kernel PyTorch extension (requi option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF) option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF) -option(GENERATE_CPP_OPERATOR_API "Generate public C++ operator API" ON) option(GENERATE_PYTHON_BINDINGS "Generate Python bindings" OFF) set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk") @@ -322,13 +319,10 @@ if(WITH_ILUVATAR) if(NOT ILUVATAR_CUDA_COMPILER) message(FATAL_ERROR "`WITH_ILUVATAR` is `ON` but CoreX `clang++` was not found.") endif() - get_filename_component(ILUVATAR_CUDA_BIN_DIR "${ILUVATAR_CUDA_COMPILER}" DIRECTORY) - get_filename_component(ILUVATAR_CUDA_ROOT "${ILUVATAR_CUDA_BIN_DIR}/.." ABSOLUTE) - set(CUDAToolkit_ROOT "${ILUVATAR_CUDA_ROOT}" CACHE PATH "Iluvatar CoreX toolkit root") set(ILUVATAR_CUDA_FLAGS - "--cuda-gpu-arch=${ILUVATAR_ARCH};-fPIC;-Wno-error=unused-variable;-Wno-error=unused-private-field;-Wno-unused-variable;-std=c++17;--cuda-path=${ILUVATAR_CUDA_ROOT};-x;ivcore" + "--cuda-gpu-arch=${ILUVATAR_ARCH};-fPIC;-Wno-error=unused-variable;-Wno-error=unused-private-field;-Wno-unused-variable;-std=c++17;--cuda-path=/usr/local/corex;-x;ivcore" CACHE STRING "Iluvatar CUDA compiler flags") - message(STATUS "Iluvatar: CUDA compiler ${ILUVATAR_CUDA_COMPILER}, arch ${ILUVATAR_ARCH}, toolkit ${ILUVATAR_CUDA_ROOT}") + message(STATUS "Iluvatar: CUDA compiler ${ILUVATAR_CUDA_COMPILER}, arch ${ILUVATAR_ARCH}") find_package(CUDAToolkit REQUIRED) endif() @@ -446,7 +440,6 @@ if(WITH_MOORE) find_library(MUSA_LIB NAMES musa HINTS "${MUSA_ROOT}/lib" REQUIRED) find_library(MUSART_LIB NAMES musart HINTS "${MUSA_ROOT}/lib" REQUIRED) find_library(MUBLAS_LIB NAMES mublas HINTS "${MUSA_ROOT}/lib" REQUIRED) - find_library(MUSA_OPENMP_LIB NAMES omp iomp5 HINTS "${MUSA_ROOT}/lib" REQUIRED) endif() if(WITH_CAMBRICON) @@ -481,7 +474,6 @@ endif() # If all other platforms are not enabled, CPU is enabled by default. if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_HYGON AND NOT WITH_METAX AND NOT WITH_MOORE AND NOT WITH_CAMBRICON AND NOT WITH_ASCEND) - set(WITH_CPU ON CACHE BOOL "Enable CPU backend" FORCE) add_compile_definitions(WITH_CPU=1) endif() diff --git a/cmake/InfiniOpsConfig.cmake.in b/cmake/InfiniOpsConfig.cmake.in deleted file mode 100644 index af1f50794..000000000 --- a/cmake/InfiniOpsConfig.cmake.in +++ /dev/null @@ -1,3 +0,0 @@ -@PACKAGE_INIT@ - -include("${CMAKE_CURRENT_LIST_DIR}/InfiniOpsTargets.cmake") diff --git a/cmake/infiniops.pc.in b/cmake/infiniops.pc.in deleted file mode 100644 index 09b544ef0..000000000 --- a/cmake/infiniops.pc.in +++ /dev/null @@ -1,10 +0,0 @@ -prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=${prefix} -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ - -Name: InfiniOps -Description: InfiniOps operator library -Version: @PROJECT_VERSION@ -Libs: -L${libdir} -linfiniops -Cflags: -I${includedir} diff --git a/include/infini/ops.h b/include/infini/ops.h deleted file mode 100644 index db17bd335..000000000 --- a/include/infini/ops.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef INFINI_OPS_H_ -#define INFINI_OPS_H_ - -#ifdef __cplusplus -#include -#endif - -#endif // INFINI_OPS_H_ diff --git a/scripts/generate_wrappers.py b/scripts/generate_wrappers.py index b15041baa..0302dd55d 100644 --- a/scripts/generate_wrappers.py +++ b/scripts/generate_wrappers.py @@ -9,12 +9,8 @@ import subprocess import textwrap -try: - import clang.cindex - from clang.cindex import CursorKind -except ImportError: - clang = None - CursorKind = None +import clang.cindex +from clang.cindex import CursorKind _SRC_DIR = pathlib.Path("src") @@ -33,8 +29,6 @@ _INCLUDE_DIR = _GENERATION_DIR / "include" -_PUBLIC_INCLUDE_DIR = _INCLUDE_DIR / "infini" - _INDENTATION = " " @@ -80,30 +74,8 @@ def _find_base_header(op_name): raise FileNotFoundError(f"no base header for op {op_name!r}") -class _ParsedType: - def __init__(self, spelling): - self.spelling = spelling - - -class _ParsedArgument: - def __init__(self, type_spelling, spelling): - self.type = _ParsedType(type_spelling) - self.spelling = spelling - - -class _ParsedFunction: - def __init__(self, arguments): - self._arguments = arguments - - def get_arguments(self): - return self._arguments - - class _OperatorExtractor: def __call__(self, op_name): - if clang is None: - return _parse_operator_header(op_name) - index = clang.cindex.Index.create() args = ( "-std=c++17", @@ -143,131 +115,6 @@ def _find(node, op_name): yield from _OperatorExtractor._find(child, op_name) -def _parse_operator_header(op_name): - pascal_case_op_name = _snake_to_pascal(op_name) - source = _strip_cpp_comments(_find_base_header(op_name).read_text()) - class_body = _extract_class_body(source, pascal_case_op_name) - constructors = [ - _ParsedFunction(_parse_parameter_list(params)) - for params in _find_signature_parameters( - class_body, rf"(?:explicit\s+)?{pascal_case_op_name}\s*\(" - ) - ] - calls = [ - _ParsedFunction(_parse_parameter_list(params)) - for params in _find_signature_parameters( - class_body, r"(?:virtual\s+)?void\s+operator\s*\(\s*\)\s*\(" - ) - ] - - return _Operator(op_name, constructors, calls) - - -def _strip_cpp_comments(source): - source = re.sub(r"/\*.*?\*/", "", source, flags=re.DOTALL) - return re.sub(r"//.*", "", source) - - -def _extract_class_body(source, class_name): - match = re.search(rf"\bclass\s+{class_name}\b[^{{]*{{", source) - - if match is None: - raise ValueError(f"no class definition for {class_name!r}") - - start = match.end() - depth = 1 - index = start - - while index < len(source): - char = source[index] - - if char == "{": - depth += 1 - elif char == "}": - depth -= 1 - if depth == 0: - return source[start:index] - - index += 1 - - raise ValueError(f"unterminated class definition for {class_name!r}") - - -def _find_signature_parameters(source, pattern): - params = [] - - for match in re.finditer(pattern, source): - opening_paren = match.end() - 1 - - if opening_paren < 0 or source[opening_paren] != "(": - continue - - closing_paren = _find_matching_delimiter(source, opening_paren, "(", ")") - params.append(source[opening_paren + 1 : closing_paren]) - - return params - - -def _find_matching_delimiter(source, start, opening, closing): - depth = 0 - - for index in range(start, len(source)): - char = source[index] - - if char == opening: - depth += 1 - elif char == closing: - depth -= 1 - if depth == 0: - return index - - raise ValueError(f"unmatched delimiter {opening!r}") - - -def _parse_parameter_list(params): - arguments = [] - - for param in _split_top_level(params, ","): - param = _strip_default_argument(param.strip()) - - if not param or param == "void": - continue - - match = re.match(r"(.+?[\s*&]+)([A-Za-z_][A-Za-z0-9_]*)$", param) - - if match is None: - raise ValueError(f"could not parse parameter {param!r}") - - arguments.append(_ParsedArgument(match.group(1).strip(), match.group(2))) - - return arguments - - -def _split_top_level(text, delimiter): - parts = [] - start = 0 - depth = 0 - pairs = {"<": ">", "(": ")", "[": "]", "{": "}"} - closing = {value: key for key, value in pairs.items()} - - for index, char in enumerate(text): - if char in pairs: - depth += 1 - elif char in closing: - depth -= 1 - elif char == delimiter and depth == 0: - parts.append(text[start:index]) - start = index + 1 - - parts.append(text[start:]) - return parts - - -def _strip_default_argument(param): - parts = _split_top_level(param, "=") - return parts[0].strip() - - class _Operator: def __init__(self, name, constructors, calls): self.name = name @@ -421,7 +268,7 @@ def _generate_call(op_name, call, method=True): f" }}\n" f" Config config;\n" f" config.set_implementation_index(implementation_index);\n" - f" return functional::{pascal_case_op_name}(handle, config, {call_args});\n" + f" return generated_dispatch::Call{pascal_case_op_name}(handle, config, {call_args});\n" f' }}, {py_args_str}py::kw_only(), py::arg("stream") = 0, py::arg("implementation_index") = 0);' ) @@ -481,7 +328,6 @@ def _overload_order_key(node): #include "base/{op_name}.h" #include "config.h" -#include "infini/ops.h" #include "generated/bindings/generated_dispatch.h" #include "handle.h" #include "pybind11_utils.h" @@ -804,54 +650,6 @@ def _append_optional_params(prefix, params): return declarations, definitions -def _generate_functional_entries(operator): - def _generate_params(node): - return ", ".join( - f"{arg.type.spelling} {arg.spelling}" - for arg in node.get_arguments() - if arg.spelling != "stream" - ) - - def _generate_arguments(node): - return ", ".join( - arg.spelling for arg in node.get_arguments() if arg.spelling != "stream" - ) - - def _append_optional_args(prefix, args): - if args: - return f"{prefix}, {args}" - - return prefix - - def _append_optional_params(prefix, params): - if params: - return f"{prefix}, {params}" - - return prefix - - pascal_case_op_name = _snake_to_pascal(operator.name) - op_type = f"::infini::ops::{pascal_case_op_name}" - operator_type = f"::infini::ops::Operator<{op_type}>" - declarations = [] - definitions = [] - - for call in operator.calls: - params = _generate_params(call) - args = _generate_arguments(call) - function_params = _append_optional_params( - "const Handle& handle, const Config& config", params - ) - - declarations.append(f"void {pascal_case_op_name}({function_params});") - definitions.append( - f"""void {pascal_case_op_name}({function_params}) {{ - return {operator_type}::Call({_append_optional_args("handle, config", args)}); -}}""" - ) - - return declarations, definitions - - def _generate_generated_dispatch_header(op_names, devices, declarations): header_base_includes = "\n".join( f'#include "base/{op_name}.h"' for op_name in op_names @@ -904,56 +702,6 @@ def _generate_generated_dispatch_source(impl_paths, definitions): """ -def _generate_functional_header(declarations): - return f"""#ifndef INFINI_OPS_FUNCTIONAL_OPS_H_ -#define INFINI_OPS_FUNCTIONAL_OPS_H_ - -#include -#include -#include -#include - -#include "config.h" -#include "data_type.h" -#include "device.h" -#include "handle.h" -#include "tensor.h" - -namespace infini::ops::functional {{ - -{chr(10).join(declarations)} - -}} // namespace infini::ops::functional - -#endif -""" - - -def _generate_functional_source(op_names, devices, impl_paths, definitions): - base_includes = "\n".join(f'#include "base/{op_name}.h"' for op_name in op_names) - device_includes = "\n".join( - f'#include "{path}"' for path in _device_marker_headers(devices) - ) - impl_includes = "\n".join( - f'#include "{_to_include_path(impl_path)}"' for impl_path in impl_paths - ) - - return f"""#include "infini/functional_ops.h" - -// clang-format off -{device_includes} -{base_includes} -{impl_includes} -// clang-format on - -namespace infini::ops::functional {{ - -{chr(10).join(definitions)} - -}} // namespace infini::ops::functional -""" - - def _device_marker_headers(devices): paths = { "cpu": "native/cpu/device_.h", @@ -1073,9 +821,6 @@ def _generate_op_artifacts(item): dispatch_declarations, dispatch_definitions = _generate_generated_dispatch_entries( operator ) - functional_declarations, functional_definitions = _generate_functional_entries( - operator - ) return { "op_name": op_name, @@ -1087,8 +832,6 @@ def _generate_op_artifacts(item): "legacy_c_header": legacy_c_header, "dispatch_declarations": dispatch_declarations, "dispatch_definitions": dispatch_definitions, - "functional_declarations": functional_declarations, - "functional_definitions": functional_definitions, "impl_paths": impl_paths, } @@ -1159,8 +902,6 @@ def _dispatch_gen_batch_size(): directory.mkdir(parents=True) - _PUBLIC_INCLUDE_DIR.mkdir(parents=True, exist_ok=True) - ops_json = pathlib.Path("ops.json") if ops_json.exists(): @@ -1188,11 +929,6 @@ def _dispatch_gen_batch_size(): for artifact in artifacts for declaration in artifact["dispatch_declarations"] ] - functional_declarations = [ - declaration - for artifact in artifacts - for declaration in artifact["functional_declarations"] - ] use_monolithic_bindings = _use_monolithic_bindings() op_includes = [] @@ -1222,9 +958,6 @@ def _dispatch_gen_batch_size(): ) (_BINDINGS_DIR / "generated_dispatch.h").write_text(dispatch_header) - functional_header = _generate_functional_header(functional_declarations) - (_PUBLIC_INCLUDE_DIR / "functional_ops.h").write_text(functional_header) - dispatch_batch_size = _dispatch_gen_batch_size() for dispatch_batch_index, start in enumerate( @@ -1246,21 +979,6 @@ def _dispatch_gen_batch_size(): dispatch_source ) - functional_definitions = [ - definition - for artifact in batch - for definition in artifact["functional_definitions"] - ] - functional_source = _generate_functional_source( - [artifact["op_name"] for artifact in batch], - args.devices, - impl_paths, - functional_definitions, - ) - (_GENERATED_SRC_DIR / f"functional_ops_{dispatch_batch_index}.cc").write_text( - functional_source - ) - bind_func_calls = "\n".join( f"{bind_func_name}(m);" for bind_func_name in bind_func_names ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3016e5eab..39505b744 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -175,11 +175,7 @@ if(WITH_MOORE) target_sources(infiniops PRIVATE ${MOORE_SOURCES}) target_include_directories(infiniops PUBLIC "${MUSA_ROOT}/include") - target_link_libraries(infiniops PUBLIC - ${MUSA_LIB} - ${MUSART_LIB} - ${MUBLAS_LIB} - ${MUSA_OPENMP_LIB}) + target_link_libraries(infiniops PUBLIC ${MUSA_LIB} ${MUSART_LIB} ${MUBLAS_LIB}) list(APPEND DEVICE_LIST "moore") endif() @@ -211,15 +207,10 @@ if(WITH_CAMBRICON) endforeach() get_directory_property(CAMBRICON_OBJECT_FILES CAMBRICON_OBJECTS) if(CAMBRICON_OBJECT_FILES) - set_source_files_properties(${CAMBRICON_OBJECT_FILES} - PROPERTIES EXTERNAL_OBJECT TRUE GENERATED TRUE) target_sources(infiniops PRIVATE ${CAMBRICON_OBJECT_FILES}) endif() else() - if(CAMBRICON_MLU_SOURCES) - message(FATAL_ERROR - "cncc compiler not found. Cambricon .mlu kernels cannot be compiled.") - endif() + message(WARNING "cncc compiler not found. MLU kernels will not be compiled.") endif() target_compile_definitions(infiniops PRIVATE WITH_CAMBRICON=1) @@ -269,7 +260,6 @@ if(WITH_ASCEND) else() message(FATAL_ERROR "libascend_hal.so not found (tried ${ASCEND_HAL_REAL}, ${ASCEND_HAL_STUB}, and ${ASCEND_HAL_DEVLIB})") endif() - get_filename_component(ASCEND_HAL_DIR "${ASCEND_HAL_LIB}" DIRECTORY) target_include_directories(infiniops PUBLIC "${ASCEND_HOME}/include" @@ -360,7 +350,7 @@ if(WITH_TORCH) endif() message(STATUS "Generating torch op wrappers - done") - file(GLOB_RECURSE TORCH_SOURCES + file(GLOB_RECURSE TORCH_SOURCES CONFIGURE_DEPENDS "torch/*.cc" "torch/*.cpp" "${PROJECT_SOURCE_DIR}/generated/torch/*.cc" "${PROJECT_SOURCE_DIR}/generated/torch/*.cpp" @@ -406,7 +396,7 @@ if(WITH_TORCH) target_link_libraries(infiniops PUBLIC ${TORCH_LIBRARIES}) target_include_directories(infiniops PUBLIC ${TORCH_INCLUDE_DIRS} - $ + ${PROJECT_SOURCE_DIR}/generated ) # Each generated `.cc` instantiates `at::_out(...)`, which @@ -510,20 +500,14 @@ if(WITH_TORCH) endif() endif() -target_include_directories(infiniops - PUBLIC - $ - $ - $ - $ -) +target_include_directories(infiniops PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) +if(GENERATE_PYTHON_BINDINGS) find_package(Python COMPONENTS Interpreter REQUIRED) - # Always regenerate wrappers so the generated functional API and pybind11 - # dispatch code match the active device list. Stale generated files (e.g., - # committed for one platform) would omit specializations for other enabled - # backends, causing link-time or runtime failures. + # Always regenerate bindings so the included kernel headers match the + # active device list. Stale generated files (e.g., committed for one + # platform) would omit specializations for other enabled backends, + # causing link-time or runtime failures. set(GENERATOR_ARGS --devices ${DEVICE_LIST}) if(WITH_TORCH) @@ -545,74 +529,7 @@ if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) message(STATUS "Generating wrappers - done") endif() - file(GLOB_RECURSE FUNCTIONAL_API_SOURCES - "${PROJECT_SOURCE_DIR}/generated/src/functional_ops_*.cc") - - if(WITH_NVIDIA) - set_source_files_properties(${FUNCTIONAL_API_SOURCES} - PROPERTIES LANGUAGE CUDA) - target_sources(infiniops PRIVATE ${FUNCTIONAL_API_SOURCES}) - elseif(WITH_ILUVATAR) - set(_iluvatar_functional_include_flags - "-I${CMAKE_CURRENT_SOURCE_DIR}" - "-I${PROJECT_SOURCE_DIR}" - "-I${PROJECT_SOURCE_DIR}/generated" - "-I${PROJECT_SOURCE_DIR}/generated/include") - foreach(_dir IN LISTS TORCH_INCLUDE_DIRS CUDAToolkit_INCLUDE_DIRS) - list(APPEND _iluvatar_functional_include_flags "-I${_dir}") - endforeach() - - set(_iluvatar_functional_defs -DWITH_ILUVATAR=1) - if(WITH_CPU) - list(APPEND _iluvatar_functional_defs -DWITH_CPU=1) - endif() - if(WITH_TORCH) - list(APPEND _iluvatar_functional_defs -DWITH_TORCH=1) - endif() - if(DEFINED TORCH_CXX11_ABI) - list(APPEND _iluvatar_functional_defs - "-D_GLIBCXX_USE_CXX11_ABI=${TORCH_CXX11_ABI}") - endif() - - set(ILUVATAR_FUNCTIONAL_OBJECTS) - set(_iluvatar_functional_object_dir - "${CMAKE_CURRENT_BINARY_DIR}/iluvatar_functional_objs") - foreach(_src IN LISTS FUNCTIONAL_API_SOURCES) - get_filename_component(_name "${_src}" NAME_WE) - set(_obj "${_iluvatar_functional_object_dir}/${_name}.o") - set(_dep "${_obj}.d") - set(_depfile_arg) - if(CMAKE_GENERATOR MATCHES "Ninja") - set(_depfile_arg DEPFILE "${_dep}") - endif() - add_custom_command( - OUTPUT "${_obj}" - COMMAND ${CMAKE_COMMAND} -E make_directory - "${_iluvatar_functional_object_dir}" - COMMAND ${ILUVATAR_CUDA_COMPILER} - ${_iluvatar_functional_defs} - ${_iluvatar_functional_include_flags} - ${ILUVATAR_CUDA_FLAGS} - -MMD -MF "${_dep}" - -c "${_src}" -o "${_obj}" - DEPENDS "${_src}" - ${_depfile_arg} - COMMENT "Compiling ${_name}.cc with CoreX clang++" - VERBATIM - ) - list(APPEND ILUVATAR_FUNCTIONAL_OBJECTS "${_obj}") - endforeach() - - set_source_files_properties(${ILUVATAR_FUNCTIONAL_OBJECTS} - PROPERTIES EXTERNAL_OBJECT TRUE GENERATED TRUE) - target_sources(infiniops PRIVATE ${ILUVATAR_FUNCTIONAL_OBJECTS}) - else() - target_sources(infiniops PRIVATE ${FUNCTIONAL_API_SOURCES}) - endif() -endif() - -if(GENERATE_PYTHON_BINDINGS) - file(GLOB_RECURSE PYBIND11_SOURCES + file(GLOB_RECURSE PYBIND11_SOURCES CONFIGURE_DEPENDS "${PROJECT_SOURCE_DIR}/generated/bindings/*.cc") set(PYBIND11_DISPATCH_SOURCES) @@ -794,12 +711,6 @@ if(GENERATE_PYTHON_BINDINGS) if(WITH_TORCH) list(APPEND _INFINIOPS_INSTALL_RPATH ${TORCH_RUNTIME_DIRS}) endif() - if(WITH_MOORE) - list(APPEND _INFINIOPS_INSTALL_RPATH "${MUSA_ROOT}/lib") - endif() - if(WITH_ASCEND) - list(APPEND _INFINIOPS_INSTALL_RPATH "${ASCEND_HOME}/lib64" "${ASCEND_HAL_DIR}") - endif() set_target_properties(infiniops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}") set_target_properties(ops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}") @@ -816,71 +727,3 @@ if(GENERATE_PYTHON_BINDINGS) DESTINATION .) endif() endif() - -include(CMakePackageConfigHelpers) - -configure_file( - ${PROJECT_SOURCE_DIR}/cmake/infiniops.pc.in - ${CMAKE_CURRENT_BINARY_DIR}/infiniops.pc - @ONLY -) - -configure_package_config_file( - ${PROJECT_SOURCE_DIR}/cmake/InfiniOpsConfig.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfig.cmake - INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps -) - -write_basic_package_version_file( - ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfigVersion.cmake - VERSION ${PROJECT_VERSION} - COMPATIBILITY SameMajorVersion -) - -install(TARGETS infiniops - EXPORT InfiniOpsTargets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} -) - -install(FILES ${PROJECT_SOURCE_DIR}/include/infini/ops.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/infini -) - -if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) - install(FILES ${PROJECT_SOURCE_DIR}/generated/include/infini/functional_ops.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/infini - ) -endif() - -install(FILES - ${PROJECT_SOURCE_DIR}/src/config.h - ${PROJECT_SOURCE_DIR}/src/data_type.h - ${PROJECT_SOURCE_DIR}/src/device.h - ${PROJECT_SOURCE_DIR}/src/handle.h - ${PROJECT_SOURCE_DIR}/src/hash.h - ${PROJECT_SOURCE_DIR}/src/tensor.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} -) - -install(FILES - ${PROJECT_SOURCE_DIR}/src/common/constexpr_map.h - ${PROJECT_SOURCE_DIR}/src/common/traits.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common -) - -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/infiniops.pc - DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig -) - -install(EXPORT InfiniOpsTargets - NAMESPACE InfiniOps:: - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps -) - -install(FILES - ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfig.cmake - ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfigVersion.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps -) diff --git a/tests/test_cpp_api.py b/tests/test_cpp_api.py deleted file mode 100644 index 62eb8969d..000000000 --- a/tests/test_cpp_api.py +++ /dev/null @@ -1,101 +0,0 @@ -import os -import subprocess -import textwrap -from pathlib import Path - -import pytest - - -def test_cpp_functional_add_smoke(tmp_path): - install_prefix = _install_prefix() - include_dir = install_prefix / "include" - library_dir = _library_dir(install_prefix) - source = tmp_path / "add_smoke.cc" - binary = tmp_path / "add_smoke" - source.write_text(_ADD_SMOKE_SOURCE) - - _run( - [ - _compiler("CXX", "c++"), - "-std=c++17", - "-Werror", - f"-I{include_dir}", - str(source), - f"-L{library_dir}", - "-linfiniops", - f"-Wl,-rpath,{library_dir}", - "-o", - str(binary), - ] - ) - _run([str(binary)]) - - -def _install_prefix(): - prefix = os.environ.get("INFINIOPS_INSTALL_PREFIX") - - if prefix: - return Path(prefix) - - pytest.skip("`INFINIOPS_INSTALL_PREFIX` is not set.") - - -def _library_dir(prefix): - for name in ("lib", "lib64"): - library_dir = prefix / name - if (library_dir / "libinfiniops.so").exists(): - return library_dir - - pytest.skip(f"`libinfiniops.so` was not found under `{prefix}`.") - - -def _compiler(env_name, default): - compiler = os.environ.get(env_name, default) - - if not compiler: - pytest.skip(f"`{env_name}` is not configured.") - - return compiler - - -def _run(command): - try: - subprocess.run(command, check=True, text=True, capture_output=True) - except FileNotFoundError as error: - pytest.skip(f"`{command[0]}` is not available: {error}") - except subprocess.CalledProcessError as error: - output = "\n".join((error.stdout, error.stderr)).strip() - raise AssertionError(output) from error - - -_ADD_SMOKE_SOURCE = textwrap.dedent( - r""" - #include - - #include - - int main() { - float input_data[3] = {1.0f, 2.0f, 3.0f}; - float other_data[3] = {4.0f, 5.0f, 6.0f}; - float output_data[3] = {0.0f, 0.0f, 0.0f}; - - const infini::ops::Tensor::Shape shape{3}; - const infini::ops::Device device{infini::ops::Device::Type::kCpu}; - const infini::ops::DataType data_type{infini::ops::DataType::kFloat32}; - - infini::ops::Tensor input(input_data, shape, data_type, device); - infini::ops::Tensor other(other_data, shape, data_type, device); - infini::ops::Tensor output(output_data, shape, data_type, device); - infini::ops::Handle handle; - infini::ops::Config config; - - infini::ops::functional::Add(handle, config, input, other, output); - - if (output_data[0] != 5.0f || output_data[1] != 7.0f || - output_data[2] != 9.0f) { - return 1; - } - return 0; - } - """ -).lstrip() From e870e3e74b196226ec80cd8ff9f843f838a70f39 Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Thu, 28 May 2026 16:32:45 +0800 Subject: [PATCH 2/2] fix: export operator call instantiations --- CMakeLists.txt | 2 + include/infini/ops.h | 8 ++ scripts/generate_wrappers.py | 144 +++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 130 +++++++++++++++++++++++++++++-- src/operator.h | 20 +++-- tests/test_cpp_api.py | 115 ++++++++++++++++++++++++++++ 6 files changed, 402 insertions(+), 17 deletions(-) create mode 100644 include/infini/ops.h create mode 100644 tests/test_cpp_api.py diff --git a/CMakeLists.txt b/CMakeLists.txt index b3eb82a6f..4cdc54878 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,8 @@ option(BUILD_CUSTOM_KERNEL "Build custom AscendC kernel PyTorch extension (requi option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF) option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF) +option(GENERATE_OPERATOR_CALL_INSTANTIATIONS + "Generate explicit operator call instantiations" ON) option(GENERATE_PYTHON_BINDINGS "Generate Python bindings" OFF) set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk") diff --git a/include/infini/ops.h b/include/infini/ops.h new file mode 100644 index 000000000..ed8181282 --- /dev/null +++ b/include/infini/ops.h @@ -0,0 +1,8 @@ +#ifndef INFINI_OPS_H_ +#define INFINI_OPS_H_ + +#ifdef __cplusplus +#include +#endif + +#endif // INFINI_OPS_H_ diff --git a/scripts/generate_wrappers.py b/scripts/generate_wrappers.py index 0302dd55d..f5734b7ad 100644 --- a/scripts/generate_wrappers.py +++ b/scripts/generate_wrappers.py @@ -702,10 +702,113 @@ def _generate_generated_dispatch_source(impl_paths, definitions): """ +def _strip_top_level_const(type_spelling): + type_spelling = " ".join(type_spelling.split()) + + while type_spelling.startswith("const "): + type_spelling = type_spelling[len("const ") :] + + return type_spelling + + +def _generate_operator_call_instantiation_entries(operator): + def _generate_template_arguments(node): + return ", ".join( + _strip_top_level_const(arg.type.spelling) + for arg in node.get_arguments() + if arg.spelling != "stream" + ) + + def _generate_parameters(node): + return ", ".join( + f"const {_strip_top_level_const(arg.type.spelling)}& {arg.spelling}" + for arg in node.get_arguments() + if arg.spelling != "stream" + ) + + def _append_optional_params(prefix, params): + if params: + return f"{prefix}, {params}" + + return prefix + + pascal_case_op_name = _snake_to_pascal(operator.name) + declarations = [] + definitions = [] + + for call in operator.calls: + template_arguments = _generate_template_arguments(call) + params = _generate_parameters(call) + function_params = _append_optional_params( + "const Handle& handle, const Config& config", params + ) + instantiation = ( + f"Operator<{pascal_case_op_name}>::Call<{template_arguments}>" + f"({function_params})" + ) + + declarations.append(f"extern template auto {instantiation};") + definitions.append(f"template auto {instantiation};") + + return declarations, definitions + + +def _generate_operator_call_instantiation_header(op_names, declarations): + header_base_includes = "\n".join( + f'#include "base/{op_name}.h"' for op_name in op_names + ) + + return f"""#ifndef INFINI_OPS_OPERATOR_CALL_INSTANTIATIONS_H_ +#define INFINI_OPS_OPERATOR_CALL_INSTANTIATIONS_H_ + +#include +#include +#include + +#include "config.h" +#include "handle.h" +#include "operator.h" + +{header_base_includes} + +namespace infini::ops {{ + +{chr(10).join(declarations)} + +}} // namespace infini::ops + +#endif +""" + + +def _generate_operator_call_instantiation_source(devices, impl_paths, definitions): + device_includes = "\n".join( + f'#include "{path}"' for path in _device_marker_headers(devices) + ) + impl_includes = "\n".join( + f'#include "{_to_include_path(impl_path)}"' for impl_path in impl_paths + ) + + return f"""#include "infini/operator_call_instantiations.h" + +// clang-format off +{device_includes} +{impl_includes} +// clang-format on + +namespace infini::ops {{ + +{chr(10).join(definitions)} + +}} // namespace infini::ops +""" + + def _device_marker_headers(devices): paths = { "cpu": "native/cpu/device_.h", "nvidia": "native/cuda/nvidia/device_.h", + "hygon": "native/cuda/hygon/device_.h", "cambricon": "native/cambricon/device_.h", "ascend": "native/ascend/device_.h", "metax": "native/cuda/metax/device_.h", @@ -821,6 +924,10 @@ def _generate_op_artifacts(item): dispatch_declarations, dispatch_definitions = _generate_generated_dispatch_entries( operator ) + ( + call_instantiation_declarations, + call_instantiation_definitions, + ) = _generate_operator_call_instantiation_entries(operator) return { "op_name": op_name, @@ -832,6 +939,8 @@ def _generate_op_artifacts(item): "legacy_c_header": legacy_c_header, "dispatch_declarations": dispatch_declarations, "dispatch_definitions": dispatch_definitions, + "call_instantiation_declarations": call_instantiation_declarations, + "call_instantiation_definitions": call_instantiation_definitions, "impl_paths": impl_paths, } @@ -929,6 +1038,11 @@ def _dispatch_gen_batch_size(): for artifact in artifacts for declaration in artifact["dispatch_declarations"] ] + call_instantiation_declarations = [ + declaration + for artifact in artifacts + for declaration in artifact["call_instantiation_declarations"] + ] use_monolithic_bindings = _use_monolithic_bindings() op_includes = [] @@ -958,6 +1072,14 @@ def _dispatch_gen_batch_size(): ) (_BINDINGS_DIR / "generated_dispatch.h").write_text(dispatch_header) + call_instantiation_header = _generate_operator_call_instantiation_header( + op_names, call_instantiation_declarations + ) + (_INCLUDE_DIR / "infini").mkdir(exist_ok=True) + (_INCLUDE_DIR / "infini" / "operator_call_instantiations.h").write_text( + call_instantiation_header + ) + dispatch_batch_size = _dispatch_gen_batch_size() for dispatch_batch_index, start in enumerate( @@ -979,6 +1101,28 @@ def _dispatch_gen_batch_size(): dispatch_source ) + for call_instantiation_batch_index, start in enumerate( + range(0, len(artifacts), dispatch_batch_size) + ): + batch = artifacts[start : start + dispatch_batch_size] + impl_paths = list( + dict.fromkeys( + impl_path for artifact in batch for impl_path in artifact["impl_paths"] + ) + ) + definitions = [ + definition + for artifact in batch + for definition in artifact["call_instantiation_definitions"] + ] + call_instantiation_source = _generate_operator_call_instantiation_source( + args.devices, impl_paths, definitions + ) + ( + _GENERATED_SRC_DIR + / f"operator_call_instantiations_{call_instantiation_batch_index}.cc" + ).write_text(call_instantiation_source) + bind_func_calls = "\n".join( f"{bind_func_name}(m);" for bind_func_name in bind_func_names ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 39505b744..4b0ca3028 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,7 @@ add_library(infiniops SHARED) +include(GNUInstallDirs) + file(GLOB BASE_SRCS CONFIGURE_DEPENDS "*.cc") target_sources(infiniops PRIVATE ${BASE_SRCS}) @@ -500,14 +502,20 @@ if(WITH_TORCH) endif() endif() -target_include_directories(infiniops PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories(infiniops + PUBLIC + $ + $ + $ + $ +) -if(GENERATE_PYTHON_BINDINGS) +if(GENERATE_OPERATOR_CALL_INSTANTIATIONS OR GENERATE_PYTHON_BINDINGS) find_package(Python COMPONENTS Interpreter REQUIRED) - # Always regenerate bindings so the included kernel headers match the - # active device list. Stale generated files (e.g., committed for one - # platform) would omit specializations for other enabled backends, - # causing link-time or runtime failures. + # Always regenerate wrappers so emitted call instantiations and bindings + # match the active device list. Stale generated files would omit + # specializations for enabled backends, causing link-time or runtime + # failures. set(GENERATOR_ARGS --devices ${DEVICE_LIST}) if(WITH_TORCH) @@ -528,7 +536,76 @@ if(GENERATE_PYTHON_BINDINGS) else() message(STATUS "Generating wrappers - done") endif() +endif() + +if(GENERATE_OPERATOR_CALL_INSTANTIATIONS) + file(GLOB_RECURSE OPERATOR_CALL_INSTANTIATION_SOURCES CONFIGURE_DEPENDS + "${PROJECT_SOURCE_DIR}/generated/src/operator_call_instantiations_*.cc") + + if(WITH_NVIDIA OR WITH_HYGON) + set_source_files_properties(${OPERATOR_CALL_INSTANTIATION_SOURCES} + PROPERTIES LANGUAGE CUDA) + target_sources(infiniops PRIVATE ${OPERATOR_CALL_INSTANTIATION_SOURCES}) + elseif(WITH_ILUVATAR) + set(_iluvatar_call_instantiation_include_flags + "-I${CMAKE_CURRENT_SOURCE_DIR}" + "-I${PROJECT_SOURCE_DIR}" + "-I${PROJECT_SOURCE_DIR}/generated" + "-I${PROJECT_SOURCE_DIR}/generated/include") + foreach(_dir IN LISTS TORCH_INCLUDE_DIRS CUDAToolkit_INCLUDE_DIRS) + list(APPEND _iluvatar_call_instantiation_include_flags "-I${_dir}") + endforeach() + + set(_iluvatar_call_instantiation_defs -DWITH_ILUVATAR=1) + if(WITH_CPU) + list(APPEND _iluvatar_call_instantiation_defs -DWITH_CPU=1) + endif() + if(WITH_TORCH) + list(APPEND _iluvatar_call_instantiation_defs -DWITH_TORCH=1) + endif() + if(DEFINED TORCH_CXX11_ABI) + list(APPEND _iluvatar_call_instantiation_defs + "-D_GLIBCXX_USE_CXX11_ABI=${TORCH_CXX11_ABI}") + endif() + set(ILUVATAR_CALL_INSTANTIATION_OBJECTS) + set(_iluvatar_call_instantiation_object_dir + "${CMAKE_CURRENT_BINARY_DIR}/iluvatar_call_instantiation_objs") + foreach(_src IN LISTS OPERATOR_CALL_INSTANTIATION_SOURCES) + get_filename_component(_name "${_src}" NAME_WE) + set(_obj "${_iluvatar_call_instantiation_object_dir}/${_name}.o") + set(_dep "${_obj}.d") + set(_depfile_arg) + if(CMAKE_GENERATOR MATCHES "Ninja") + set(_depfile_arg DEPFILE "${_dep}") + endif() + add_custom_command( + OUTPUT "${_obj}" + COMMAND ${CMAKE_COMMAND} -E make_directory + "${_iluvatar_call_instantiation_object_dir}" + COMMAND ${ILUVATAR_CUDA_COMPILER} + ${_iluvatar_call_instantiation_defs} + ${_iluvatar_call_instantiation_include_flags} + ${ILUVATAR_CUDA_FLAGS} + -MMD -MF "${_dep}" + -c "${_src}" -o "${_obj}" + DEPENDS "${_src}" + ${_depfile_arg} + COMMENT "Compiling ${_name}.cc with CoreX clang++" + VERBATIM + ) + list(APPEND ILUVATAR_CALL_INSTANTIATION_OBJECTS "${_obj}") + endforeach() + + set_source_files_properties(${ILUVATAR_CALL_INSTANTIATION_OBJECTS} + PROPERTIES EXTERNAL_OBJECT TRUE GENERATED TRUE) + target_sources(infiniops PRIVATE ${ILUVATAR_CALL_INSTANTIATION_OBJECTS}) + else() + target_sources(infiniops PRIVATE ${OPERATOR_CALL_INSTANTIATION_SOURCES}) + endif() +endif() + +if(GENERATE_PYTHON_BINDINGS) file(GLOB_RECURSE PYBIND11_SOURCES CONFIGURE_DEPENDS "${PROJECT_SOURCE_DIR}/generated/bindings/*.cc") @@ -727,3 +804,44 @@ if(GENERATE_PYTHON_BINDINGS) DESTINATION .) endif() endif() + +install(TARGETS infiniops + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) + +install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + +if(GENERATE_OPERATOR_CALL_INSTANTIATIONS) + install(FILES + ${PROJECT_SOURCE_DIR}/generated/include/infini/operator_call_instantiations.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/infini + ) +endif() + +file(GLOB INFINIOPS_PUBLIC_CORE_HEADERS CONFIGURE_DEPENDS + "${CMAKE_CURRENT_SOURCE_DIR}/*.h") + +install(FILES ${INFINIOPS_PUBLIC_CORE_HEADERS} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/base/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/base + FILES_MATCHING PATTERN "*.h" +) + +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/common/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common + FILES_MATCHING PATTERN "*.h" +) + +if(EXISTS ${PROJECT_SOURCE_DIR}/generated/base) + install(DIRECTORY ${PROJECT_SOURCE_DIR}/generated/base/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/base + FILES_MATCHING PATTERN "*.h" + ) +endif() diff --git a/src/operator.h b/src/operator.h index 15b95697d..257d62de7 100644 --- a/src/operator.h +++ b/src/operator.h @@ -191,7 +191,8 @@ class Operator : public OperatorBase { } template - static auto Call(const Handle& handle, const Config& config, Args&&... args) { + static auto Call(const Handle& handle, const Config& config, + const Args&... args) { static std::unordered_map> cache; static std::size_t generation{0}; @@ -206,20 +207,17 @@ class Operator : public OperatorBase { auto it{cache.find(key)}; if (it == cache.end()) { - // Pass args as lvalue refs so they remain valid for the `operator()` call - // below. Forwarding rvalue temporaries into `Make()` would leave the args - // in a moved-from (empty) state before `operator()` can use them. it = cache.emplace(std::move(key), Make(config, args...)).first; } auto& op{it->second}; - return (*op)(handle, std::forward(args)...); + return (*op)(handle, args...); } template - static auto Call(const Tensor tensor, Args&&... args) { - return Call({}, {}, tensor, std::forward(args)...); + static auto Call(const Tensor tensor, const Args&... args) { + return Call({}, {}, tensor, args...); } static std::vector active_implementation_indices( @@ -241,18 +239,18 @@ class Operator : public OperatorBase { } template - auto operator()(const Handle& handle, Args&&... args) { + auto operator()(const Handle& handle, const Args&... args) { set_handle(handle); set_stream(handle.stream()); set_workspace(handle.workspace()); set_workspace_size_in_bytes(handle.workspace_size_in_bytes()); - return operator()(std::forward(args)...); + return operator()(args...); } template - auto operator()(Args&&... args) const { - return (*static_cast(this))(std::forward(args)...); + auto operator()(const Args&... args) const { + return (*static_cast(this))(args...); } protected: diff --git a/tests/test_cpp_api.py b/tests/test_cpp_api.py new file mode 100644 index 000000000..86c0c1600 --- /dev/null +++ b/tests/test_cpp_api.py @@ -0,0 +1,115 @@ +import os +import subprocess +import textwrap +from pathlib import Path + +import pytest + + +def test_cpp_operator_call_instantiation_smoke(tmp_path): + install_prefix = _install_prefix() + include_dir = install_prefix / "include" + library_dir = _library_dir(install_prefix) + source = tmp_path / "add_smoke.cc" + binary = tmp_path / "add_smoke" + source.write_text(_ADD_SMOKE_SOURCE) + + _run( + [ + _compiler("CXX", "c++"), + "-std=c++17", + "-Werror", + f"-I{include_dir}", + str(source), + f"-L{library_dir}", + "-linfiniops", + f"-Wl,-rpath,{library_dir}", + "-o", + str(binary), + ] + ) + _run([str(binary)]) + + +def _install_prefix(): + prefix = os.environ.get("INFINIOPS_INSTALL_PREFIX") + + if prefix: + return Path(prefix) + + pytest.skip("`INFINIOPS_INSTALL_PREFIX` is not set.") + + +def _library_dir(prefix): + for name in ("lib", "lib64"): + library_dir = prefix / name + if (library_dir / "libinfiniops.so").exists(): + return library_dir + + pytest.skip(f"`libinfiniops.so` was not found under `{prefix}`.") + + +def _compiler(env_name, default): + compiler = os.environ.get(env_name, default) + + if not compiler: + pytest.skip(f"`{env_name}` is not configured.") + + return compiler + + +def _run(command): + try: + subprocess.run(command, check=True, text=True, capture_output=True) + except FileNotFoundError as error: + pytest.skip(f"`{command[0]}` is not available: {error}") + except subprocess.CalledProcessError as error: + output = "\n".join((error.stdout, error.stderr)).strip() + raise AssertionError(output) from error + + +_ADD_SMOKE_SOURCE = textwrap.dedent( + r""" + #include + + #include + + int main() { + float input_data[3] = {1.0f, 2.0f, 3.0f}; + float other_data[3] = {4.0f, 5.0f, 6.0f}; + float output_data[3] = {0.0f, 0.0f, 0.0f}; + + const infini::ops::Tensor::Shape shape{3}; + const infini::ops::Device device{infini::ops::Device::Type::kCpu}; + const infini::ops::DataType data_type{infini::ops::DataType::kFloat32}; + + infini::ops::Tensor input(input_data, shape, data_type, device); + infini::ops::Tensor other(other_data, shape, data_type, device); + infini::ops::Tensor output(output_data, shape, data_type, device); + infini::ops::Handle handle; + infini::ops::Config config; + + infini::ops::Add::Call(handle, config, input, other, output); + + if (std::fabs(output_data[0] - 5.0f) > 1e-6f || + std::fabs(output_data[1] - 7.0f) > 1e-6f || + std::fabs(output_data[2] - 9.0f) > 1e-6f) { + return 1; + } + + output_data[0] = 0.0f; + output_data[1] = 0.0f; + output_data[2] = 0.0f; + + infini::ops::Add::Call(input, other, output); + + if (std::fabs(output_data[0] - 5.0f) > 1e-6f || + std::fabs(output_data[1] - 7.0f) > 1e-6f || + std::fabs(output_data[2] - 9.0f) > 1e-6f) { + return 1; + } + + return 0; + } + """ +).lstrip()