diff --git a/CMakeLists.txt b/CMakeLists.txt index fe478691b..4cdc54878 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.18) -project(InfiniOps VERSION 0.1.0 LANGUAGES CXX) - -include(GNUInstallDirs) +project(InfiniOps LANGUAGES CXX) if(POLICY CMP0116) cmake_policy(SET CMP0116 NEW) @@ -36,7 +34,8 @@ option(BUILD_CUSTOM_KERNEL "Build custom AscendC kernel PyTorch extension (requi option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF) option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF) -option(GENERATE_CPP_OPERATOR_API "Generate public C++ operator API" ON) +option(GENERATE_OPERATOR_CALL_INSTANTIATIONS + "Generate explicit operator call instantiations" ON) option(GENERATE_PYTHON_BINDINGS "Generate Python bindings" OFF) set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk") @@ -322,13 +321,10 @@ if(WITH_ILUVATAR) if(NOT ILUVATAR_CUDA_COMPILER) message(FATAL_ERROR "`WITH_ILUVATAR` is `ON` but CoreX `clang++` was not found.") endif() - get_filename_component(ILUVATAR_CUDA_BIN_DIR "${ILUVATAR_CUDA_COMPILER}" DIRECTORY) - get_filename_component(ILUVATAR_CUDA_ROOT "${ILUVATAR_CUDA_BIN_DIR}/.." ABSOLUTE) - set(CUDAToolkit_ROOT "${ILUVATAR_CUDA_ROOT}" CACHE PATH "Iluvatar CoreX toolkit root") set(ILUVATAR_CUDA_FLAGS - "--cuda-gpu-arch=${ILUVATAR_ARCH};-fPIC;-Wno-error=unused-variable;-Wno-error=unused-private-field;-Wno-unused-variable;-std=c++17;--cuda-path=${ILUVATAR_CUDA_ROOT};-x;ivcore" + "--cuda-gpu-arch=${ILUVATAR_ARCH};-fPIC;-Wno-error=unused-variable;-Wno-error=unused-private-field;-Wno-unused-variable;-std=c++17;--cuda-path=/usr/local/corex;-x;ivcore" CACHE STRING "Iluvatar CUDA compiler flags") - message(STATUS "Iluvatar: CUDA compiler ${ILUVATAR_CUDA_COMPILER}, arch ${ILUVATAR_ARCH}, toolkit ${ILUVATAR_CUDA_ROOT}") + message(STATUS "Iluvatar: CUDA compiler ${ILUVATAR_CUDA_COMPILER}, arch ${ILUVATAR_ARCH}") find_package(CUDAToolkit REQUIRED) endif() @@ -446,7 +442,6 @@ if(WITH_MOORE) find_library(MUSA_LIB NAMES musa HINTS "${MUSA_ROOT}/lib" REQUIRED) find_library(MUSART_LIB NAMES musart HINTS "${MUSA_ROOT}/lib" REQUIRED) find_library(MUBLAS_LIB NAMES mublas HINTS "${MUSA_ROOT}/lib" REQUIRED) - find_library(MUSA_OPENMP_LIB NAMES omp iomp5 HINTS "${MUSA_ROOT}/lib" REQUIRED) endif() if(WITH_CAMBRICON) @@ -481,7 +476,6 @@ endif() # If all other platforms are not enabled, CPU is enabled by default. if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_HYGON AND NOT WITH_METAX AND NOT WITH_MOORE AND NOT WITH_CAMBRICON AND NOT WITH_ASCEND) - set(WITH_CPU ON CACHE BOOL "Enable CPU backend" FORCE) add_compile_definitions(WITH_CPU=1) endif() diff --git a/cmake/InfiniOpsConfig.cmake.in b/cmake/InfiniOpsConfig.cmake.in deleted file mode 100644 index af1f50794..000000000 --- a/cmake/InfiniOpsConfig.cmake.in +++ /dev/null @@ -1,3 +0,0 @@ -@PACKAGE_INIT@ - -include("${CMAKE_CURRENT_LIST_DIR}/InfiniOpsTargets.cmake") diff --git a/cmake/infiniops.pc.in b/cmake/infiniops.pc.in deleted file mode 100644 index 09b544ef0..000000000 --- a/cmake/infiniops.pc.in +++ /dev/null @@ -1,10 +0,0 @@ -prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=${prefix} -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ - -Name: InfiniOps -Description: InfiniOps operator library -Version: @PROJECT_VERSION@ -Libs: -L${libdir} -linfiniops -Cflags: -I${includedir} diff --git a/include/infini/ops.h b/include/infini/ops.h index db17bd335..ed8181282 100644 --- a/include/infini/ops.h +++ b/include/infini/ops.h @@ -2,7 +2,7 @@ #define INFINI_OPS_H_ #ifdef __cplusplus -#include +#include #endif #endif // INFINI_OPS_H_ diff --git a/scripts/generate_wrappers.py b/scripts/generate_wrappers.py index b15041baa..f5734b7ad 100644 --- a/scripts/generate_wrappers.py +++ b/scripts/generate_wrappers.py @@ -9,12 +9,8 @@ import subprocess import textwrap -try: - import clang.cindex - from clang.cindex import CursorKind -except ImportError: - clang = None - CursorKind = None +import clang.cindex +from clang.cindex import CursorKind _SRC_DIR = pathlib.Path("src") @@ -33,8 +29,6 @@ _INCLUDE_DIR = _GENERATION_DIR / "include" -_PUBLIC_INCLUDE_DIR = _INCLUDE_DIR / "infini" - _INDENTATION = " " @@ -80,30 +74,8 @@ def _find_base_header(op_name): raise FileNotFoundError(f"no base header for op {op_name!r}") -class _ParsedType: - def __init__(self, spelling): - self.spelling = spelling - - -class _ParsedArgument: - def __init__(self, type_spelling, spelling): - self.type = _ParsedType(type_spelling) - self.spelling = spelling - - -class _ParsedFunction: - def __init__(self, arguments): - self._arguments = arguments - - def get_arguments(self): - return self._arguments - - class _OperatorExtractor: def __call__(self, op_name): - if clang is None: - return _parse_operator_header(op_name) - index = clang.cindex.Index.create() args = ( "-std=c++17", @@ -143,131 +115,6 @@ def _find(node, op_name): yield from _OperatorExtractor._find(child, op_name) -def _parse_operator_header(op_name): - pascal_case_op_name = _snake_to_pascal(op_name) - source = _strip_cpp_comments(_find_base_header(op_name).read_text()) - class_body = _extract_class_body(source, pascal_case_op_name) - constructors = [ - _ParsedFunction(_parse_parameter_list(params)) - for params in _find_signature_parameters( - class_body, rf"(?:explicit\s+)?{pascal_case_op_name}\s*\(" - ) - ] - calls = [ - _ParsedFunction(_parse_parameter_list(params)) - for params in _find_signature_parameters( - class_body, r"(?:virtual\s+)?void\s+operator\s*\(\s*\)\s*\(" - ) - ] - - return _Operator(op_name, constructors, calls) - - -def _strip_cpp_comments(source): - source = re.sub(r"/\*.*?\*/", "", source, flags=re.DOTALL) - return re.sub(r"//.*", "", source) - - -def _extract_class_body(source, class_name): - match = re.search(rf"\bclass\s+{class_name}\b[^{{]*{{", source) - - if match is None: - raise ValueError(f"no class definition for {class_name!r}") - - start = match.end() - depth = 1 - index = start - - while index < len(source): - char = source[index] - - if char == "{": - depth += 1 - elif char == "}": - depth -= 1 - if depth == 0: - return source[start:index] - - index += 1 - - raise ValueError(f"unterminated class definition for {class_name!r}") - - -def _find_signature_parameters(source, pattern): - params = [] - - for match in re.finditer(pattern, source): - opening_paren = match.end() - 1 - - if opening_paren < 0 or source[opening_paren] != "(": - continue - - closing_paren = _find_matching_delimiter(source, opening_paren, "(", ")") - params.append(source[opening_paren + 1 : closing_paren]) - - return params - - -def _find_matching_delimiter(source, start, opening, closing): - depth = 0 - - for index in range(start, len(source)): - char = source[index] - - if char == opening: - depth += 1 - elif char == closing: - depth -= 1 - if depth == 0: - return index - - raise ValueError(f"unmatched delimiter {opening!r}") - - -def _parse_parameter_list(params): - arguments = [] - - for param in _split_top_level(params, ","): - param = _strip_default_argument(param.strip()) - - if not param or param == "void": - continue - - match = re.match(r"(.+?[\s*&]+)([A-Za-z_][A-Za-z0-9_]*)$", param) - - if match is None: - raise ValueError(f"could not parse parameter {param!r}") - - arguments.append(_ParsedArgument(match.group(1).strip(), match.group(2))) - - return arguments - - -def _split_top_level(text, delimiter): - parts = [] - start = 0 - depth = 0 - pairs = {"<": ">", "(": ")", "[": "]", "{": "}"} - closing = {value: key for key, value in pairs.items()} - - for index, char in enumerate(text): - if char in pairs: - depth += 1 - elif char in closing: - depth -= 1 - elif char == delimiter and depth == 0: - parts.append(text[start:index]) - start = index + 1 - - parts.append(text[start:]) - return parts - - -def _strip_default_argument(param): - parts = _split_top_level(param, "=") - return parts[0].strip() - - class _Operator: def __init__(self, name, constructors, calls): self.name = name @@ -421,7 +268,7 @@ def _generate_call(op_name, call, method=True): f" }}\n" f" Config config;\n" f" config.set_implementation_index(implementation_index);\n" - f" return functional::{pascal_case_op_name}(handle, config, {call_args});\n" + f" return generated_dispatch::Call{pascal_case_op_name}(handle, config, {call_args});\n" f' }}, {py_args_str}py::kw_only(), py::arg("stream") = 0, py::arg("implementation_index") = 0);' ) @@ -481,7 +328,6 @@ def _overload_order_key(node): #include "base/{op_name}.h" #include "config.h" -#include "infini/ops.h" #include "generated/bindings/generated_dispatch.h" #include "handle.h" #include "pybind11_utils.h" @@ -804,54 +650,6 @@ def _append_optional_params(prefix, params): return declarations, definitions -def _generate_functional_entries(operator): - def _generate_params(node): - return ", ".join( - f"{arg.type.spelling} {arg.spelling}" - for arg in node.get_arguments() - if arg.spelling != "stream" - ) - - def _generate_arguments(node): - return ", ".join( - arg.spelling for arg in node.get_arguments() if arg.spelling != "stream" - ) - - def _append_optional_args(prefix, args): - if args: - return f"{prefix}, {args}" - - return prefix - - def _append_optional_params(prefix, params): - if params: - return f"{prefix}, {params}" - - return prefix - - pascal_case_op_name = _snake_to_pascal(operator.name) - op_type = f"::infini::ops::{pascal_case_op_name}" - operator_type = f"::infini::ops::Operator<{op_type}>" - declarations = [] - definitions = [] - - for call in operator.calls: - params = _generate_params(call) - args = _generate_arguments(call) - function_params = _append_optional_params( - "const Handle& handle, const Config& config", params - ) - - declarations.append(f"void {pascal_case_op_name}({function_params});") - definitions.append( - f"""void {pascal_case_op_name}({function_params}) {{ - return {operator_type}::Call({_append_optional_args("handle, config", args)}); -}}""" - ) - - return declarations, definitions - - def _generate_generated_dispatch_header(op_names, devices, declarations): header_base_includes = "\n".join( f'#include "base/{op_name}.h"' for op_name in op_names @@ -904,33 +702,86 @@ def _generate_generated_dispatch_source(impl_paths, definitions): """ -def _generate_functional_header(declarations): - return f"""#ifndef INFINI_OPS_FUNCTIONAL_OPS_H_ -#define INFINI_OPS_FUNCTIONAL_OPS_H_ +def _strip_top_level_const(type_spelling): + type_spelling = " ".join(type_spelling.split()) + + while type_spelling.startswith("const "): + type_spelling = type_spelling[len("const ") :] + + return type_spelling + + +def _generate_operator_call_instantiation_entries(operator): + def _generate_template_arguments(node): + return ", ".join( + _strip_top_level_const(arg.type.spelling) + for arg in node.get_arguments() + if arg.spelling != "stream" + ) + + def _generate_parameters(node): + return ", ".join( + f"const {_strip_top_level_const(arg.type.spelling)}& {arg.spelling}" + for arg in node.get_arguments() + if arg.spelling != "stream" + ) + + def _append_optional_params(prefix, params): + if params: + return f"{prefix}, {params}" + + return prefix + + pascal_case_op_name = _snake_to_pascal(operator.name) + declarations = [] + definitions = [] + + for call in operator.calls: + template_arguments = _generate_template_arguments(call) + params = _generate_parameters(call) + function_params = _append_optional_params( + "const Handle& handle, const Config& config", params + ) + instantiation = ( + f"Operator<{pascal_case_op_name}>::Call<{template_arguments}>" + f"({function_params})" + ) + + declarations.append(f"extern template auto {instantiation};") + definitions.append(f"template auto {instantiation};") + + return declarations, definitions + + +def _generate_operator_call_instantiation_header(op_names, declarations): + header_base_includes = "\n".join( + f'#include "base/{op_name}.h"' for op_name in op_names + ) + + return f"""#ifndef INFINI_OPS_OPERATOR_CALL_INSTANTIATIONS_H_ +#define INFINI_OPS_OPERATOR_CALL_INSTANTIATIONS_H_ -#include #include #include #include #include "config.h" -#include "data_type.h" -#include "device.h" #include "handle.h" -#include "tensor.h" +#include "operator.h" -namespace infini::ops::functional {{ +{header_base_includes} + +namespace infini::ops {{ {chr(10).join(declarations)} -}} // namespace infini::ops::functional +}} // namespace infini::ops #endif """ -def _generate_functional_source(op_names, devices, impl_paths, definitions): - base_includes = "\n".join(f'#include "base/{op_name}.h"' for op_name in op_names) +def _generate_operator_call_instantiation_source(devices, impl_paths, definitions): device_includes = "\n".join( f'#include "{path}"' for path in _device_marker_headers(devices) ) @@ -938,19 +789,18 @@ def _generate_functional_source(op_names, devices, impl_paths, definitions): f'#include "{_to_include_path(impl_path)}"' for impl_path in impl_paths ) - return f"""#include "infini/functional_ops.h" + return f"""#include "infini/operator_call_instantiations.h" // clang-format off {device_includes} -{base_includes} {impl_includes} // clang-format on -namespace infini::ops::functional {{ +namespace infini::ops {{ {chr(10).join(definitions)} -}} // namespace infini::ops::functional +}} // namespace infini::ops """ @@ -958,6 +808,7 @@ def _device_marker_headers(devices): paths = { "cpu": "native/cpu/device_.h", "nvidia": "native/cuda/nvidia/device_.h", + "hygon": "native/cuda/hygon/device_.h", "cambricon": "native/cambricon/device_.h", "ascend": "native/ascend/device_.h", "metax": "native/cuda/metax/device_.h", @@ -1073,9 +924,10 @@ def _generate_op_artifacts(item): dispatch_declarations, dispatch_definitions = _generate_generated_dispatch_entries( operator ) - functional_declarations, functional_definitions = _generate_functional_entries( - operator - ) + ( + call_instantiation_declarations, + call_instantiation_definitions, + ) = _generate_operator_call_instantiation_entries(operator) return { "op_name": op_name, @@ -1087,8 +939,8 @@ def _generate_op_artifacts(item): "legacy_c_header": legacy_c_header, "dispatch_declarations": dispatch_declarations, "dispatch_definitions": dispatch_definitions, - "functional_declarations": functional_declarations, - "functional_definitions": functional_definitions, + "call_instantiation_declarations": call_instantiation_declarations, + "call_instantiation_definitions": call_instantiation_definitions, "impl_paths": impl_paths, } @@ -1159,8 +1011,6 @@ def _dispatch_gen_batch_size(): directory.mkdir(parents=True) - _PUBLIC_INCLUDE_DIR.mkdir(parents=True, exist_ok=True) - ops_json = pathlib.Path("ops.json") if ops_json.exists(): @@ -1188,10 +1038,10 @@ def _dispatch_gen_batch_size(): for artifact in artifacts for declaration in artifact["dispatch_declarations"] ] - functional_declarations = [ + call_instantiation_declarations = [ declaration for artifact in artifacts - for declaration in artifact["functional_declarations"] + for declaration in artifact["call_instantiation_declarations"] ] use_monolithic_bindings = _use_monolithic_bindings() op_includes = [] @@ -1222,8 +1072,13 @@ def _dispatch_gen_batch_size(): ) (_BINDINGS_DIR / "generated_dispatch.h").write_text(dispatch_header) - functional_header = _generate_functional_header(functional_declarations) - (_PUBLIC_INCLUDE_DIR / "functional_ops.h").write_text(functional_header) + call_instantiation_header = _generate_operator_call_instantiation_header( + op_names, call_instantiation_declarations + ) + (_INCLUDE_DIR / "infini").mkdir(exist_ok=True) + (_INCLUDE_DIR / "infini" / "operator_call_instantiations.h").write_text( + call_instantiation_header + ) dispatch_batch_size = _dispatch_gen_batch_size() @@ -1246,20 +1101,27 @@ def _dispatch_gen_batch_size(): dispatch_source ) - functional_definitions = [ + for call_instantiation_batch_index, start in enumerate( + range(0, len(artifacts), dispatch_batch_size) + ): + batch = artifacts[start : start + dispatch_batch_size] + impl_paths = list( + dict.fromkeys( + impl_path for artifact in batch for impl_path in artifact["impl_paths"] + ) + ) + definitions = [ definition for artifact in batch - for definition in artifact["functional_definitions"] + for definition in artifact["call_instantiation_definitions"] ] - functional_source = _generate_functional_source( - [artifact["op_name"] for artifact in batch], - args.devices, - impl_paths, - functional_definitions, - ) - (_GENERATED_SRC_DIR / f"functional_ops_{dispatch_batch_index}.cc").write_text( - functional_source + call_instantiation_source = _generate_operator_call_instantiation_source( + args.devices, impl_paths, definitions ) + ( + _GENERATED_SRC_DIR + / f"operator_call_instantiations_{call_instantiation_batch_index}.cc" + ).write_text(call_instantiation_source) bind_func_calls = "\n".join( f"{bind_func_name}(m);" for bind_func_name in bind_func_names diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3016e5eab..4b0ca3028 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,7 @@ add_library(infiniops SHARED) +include(GNUInstallDirs) + file(GLOB BASE_SRCS CONFIGURE_DEPENDS "*.cc") target_sources(infiniops PRIVATE ${BASE_SRCS}) @@ -175,11 +177,7 @@ if(WITH_MOORE) target_sources(infiniops PRIVATE ${MOORE_SOURCES}) target_include_directories(infiniops PUBLIC "${MUSA_ROOT}/include") - target_link_libraries(infiniops PUBLIC - ${MUSA_LIB} - ${MUSART_LIB} - ${MUBLAS_LIB} - ${MUSA_OPENMP_LIB}) + target_link_libraries(infiniops PUBLIC ${MUSA_LIB} ${MUSART_LIB} ${MUBLAS_LIB}) list(APPEND DEVICE_LIST "moore") endif() @@ -211,15 +209,10 @@ if(WITH_CAMBRICON) endforeach() get_directory_property(CAMBRICON_OBJECT_FILES CAMBRICON_OBJECTS) if(CAMBRICON_OBJECT_FILES) - set_source_files_properties(${CAMBRICON_OBJECT_FILES} - PROPERTIES EXTERNAL_OBJECT TRUE GENERATED TRUE) target_sources(infiniops PRIVATE ${CAMBRICON_OBJECT_FILES}) endif() else() - if(CAMBRICON_MLU_SOURCES) - message(FATAL_ERROR - "cncc compiler not found. Cambricon .mlu kernels cannot be compiled.") - endif() + message(WARNING "cncc compiler not found. MLU kernels will not be compiled.") endif() target_compile_definitions(infiniops PRIVATE WITH_CAMBRICON=1) @@ -269,7 +262,6 @@ if(WITH_ASCEND) else() message(FATAL_ERROR "libascend_hal.so not found (tried ${ASCEND_HAL_REAL}, ${ASCEND_HAL_STUB}, and ${ASCEND_HAL_DEVLIB})") endif() - get_filename_component(ASCEND_HAL_DIR "${ASCEND_HAL_LIB}" DIRECTORY) target_include_directories(infiniops PUBLIC "${ASCEND_HOME}/include" @@ -360,7 +352,7 @@ if(WITH_TORCH) endif() message(STATUS "Generating torch op wrappers - done") - file(GLOB_RECURSE TORCH_SOURCES + file(GLOB_RECURSE TORCH_SOURCES CONFIGURE_DEPENDS "torch/*.cc" "torch/*.cpp" "${PROJECT_SOURCE_DIR}/generated/torch/*.cc" "${PROJECT_SOURCE_DIR}/generated/torch/*.cpp" @@ -406,7 +398,7 @@ if(WITH_TORCH) target_link_libraries(infiniops PUBLIC ${TORCH_LIBRARIES}) target_include_directories(infiniops PUBLIC ${TORCH_INCLUDE_DIRS} - $ + ${PROJECT_SOURCE_DIR}/generated ) # Each generated `.cc` instantiates `at::_out(...)`, which @@ -518,12 +510,12 @@ target_include_directories(infiniops $ ) -if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) +if(GENERATE_OPERATOR_CALL_INSTANTIATIONS OR GENERATE_PYTHON_BINDINGS) find_package(Python COMPONENTS Interpreter REQUIRED) - # Always regenerate wrappers so the generated functional API and pybind11 - # dispatch code match the active device list. Stale generated files (e.g., - # committed for one platform) would omit specializations for other enabled - # backends, causing link-time or runtime failures. + # Always regenerate wrappers so emitted call instantiations and bindings + # match the active device list. Stale generated files would omit + # specializations for enabled backends, causing link-time or runtime + # failures. set(GENERATOR_ARGS --devices ${DEVICE_LIST}) if(WITH_TORCH) @@ -544,42 +536,44 @@ if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) else() message(STATUS "Generating wrappers - done") endif() +endif() - file(GLOB_RECURSE FUNCTIONAL_API_SOURCES - "${PROJECT_SOURCE_DIR}/generated/src/functional_ops_*.cc") +if(GENERATE_OPERATOR_CALL_INSTANTIATIONS) + file(GLOB_RECURSE OPERATOR_CALL_INSTANTIATION_SOURCES CONFIGURE_DEPENDS + "${PROJECT_SOURCE_DIR}/generated/src/operator_call_instantiations_*.cc") - if(WITH_NVIDIA) - set_source_files_properties(${FUNCTIONAL_API_SOURCES} + if(WITH_NVIDIA OR WITH_HYGON) + set_source_files_properties(${OPERATOR_CALL_INSTANTIATION_SOURCES} PROPERTIES LANGUAGE CUDA) - target_sources(infiniops PRIVATE ${FUNCTIONAL_API_SOURCES}) + target_sources(infiniops PRIVATE ${OPERATOR_CALL_INSTANTIATION_SOURCES}) elseif(WITH_ILUVATAR) - set(_iluvatar_functional_include_flags + set(_iluvatar_call_instantiation_include_flags "-I${CMAKE_CURRENT_SOURCE_DIR}" "-I${PROJECT_SOURCE_DIR}" "-I${PROJECT_SOURCE_DIR}/generated" "-I${PROJECT_SOURCE_DIR}/generated/include") foreach(_dir IN LISTS TORCH_INCLUDE_DIRS CUDAToolkit_INCLUDE_DIRS) - list(APPEND _iluvatar_functional_include_flags "-I${_dir}") + list(APPEND _iluvatar_call_instantiation_include_flags "-I${_dir}") endforeach() - set(_iluvatar_functional_defs -DWITH_ILUVATAR=1) + set(_iluvatar_call_instantiation_defs -DWITH_ILUVATAR=1) if(WITH_CPU) - list(APPEND _iluvatar_functional_defs -DWITH_CPU=1) + list(APPEND _iluvatar_call_instantiation_defs -DWITH_CPU=1) endif() if(WITH_TORCH) - list(APPEND _iluvatar_functional_defs -DWITH_TORCH=1) + list(APPEND _iluvatar_call_instantiation_defs -DWITH_TORCH=1) endif() if(DEFINED TORCH_CXX11_ABI) - list(APPEND _iluvatar_functional_defs + list(APPEND _iluvatar_call_instantiation_defs "-D_GLIBCXX_USE_CXX11_ABI=${TORCH_CXX11_ABI}") endif() - set(ILUVATAR_FUNCTIONAL_OBJECTS) - set(_iluvatar_functional_object_dir - "${CMAKE_CURRENT_BINARY_DIR}/iluvatar_functional_objs") - foreach(_src IN LISTS FUNCTIONAL_API_SOURCES) + set(ILUVATAR_CALL_INSTANTIATION_OBJECTS) + set(_iluvatar_call_instantiation_object_dir + "${CMAKE_CURRENT_BINARY_DIR}/iluvatar_call_instantiation_objs") + foreach(_src IN LISTS OPERATOR_CALL_INSTANTIATION_SOURCES) get_filename_component(_name "${_src}" NAME_WE) - set(_obj "${_iluvatar_functional_object_dir}/${_name}.o") + set(_obj "${_iluvatar_call_instantiation_object_dir}/${_name}.o") set(_dep "${_obj}.d") set(_depfile_arg) if(CMAKE_GENERATOR MATCHES "Ninja") @@ -588,10 +582,10 @@ if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) add_custom_command( OUTPUT "${_obj}" COMMAND ${CMAKE_COMMAND} -E make_directory - "${_iluvatar_functional_object_dir}" + "${_iluvatar_call_instantiation_object_dir}" COMMAND ${ILUVATAR_CUDA_COMPILER} - ${_iluvatar_functional_defs} - ${_iluvatar_functional_include_flags} + ${_iluvatar_call_instantiation_defs} + ${_iluvatar_call_instantiation_include_flags} ${ILUVATAR_CUDA_FLAGS} -MMD -MF "${_dep}" -c "${_src}" -o "${_obj}" @@ -600,19 +594,19 @@ if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) COMMENT "Compiling ${_name}.cc with CoreX clang++" VERBATIM ) - list(APPEND ILUVATAR_FUNCTIONAL_OBJECTS "${_obj}") + list(APPEND ILUVATAR_CALL_INSTANTIATION_OBJECTS "${_obj}") endforeach() - set_source_files_properties(${ILUVATAR_FUNCTIONAL_OBJECTS} + set_source_files_properties(${ILUVATAR_CALL_INSTANTIATION_OBJECTS} PROPERTIES EXTERNAL_OBJECT TRUE GENERATED TRUE) - target_sources(infiniops PRIVATE ${ILUVATAR_FUNCTIONAL_OBJECTS}) + target_sources(infiniops PRIVATE ${ILUVATAR_CALL_INSTANTIATION_OBJECTS}) else() - target_sources(infiniops PRIVATE ${FUNCTIONAL_API_SOURCES}) + target_sources(infiniops PRIVATE ${OPERATOR_CALL_INSTANTIATION_SOURCES}) endif() endif() if(GENERATE_PYTHON_BINDINGS) - file(GLOB_RECURSE PYBIND11_SOURCES + file(GLOB_RECURSE PYBIND11_SOURCES CONFIGURE_DEPENDS "${PROJECT_SOURCE_DIR}/generated/bindings/*.cc") set(PYBIND11_DISPATCH_SOURCES) @@ -794,12 +788,6 @@ if(GENERATE_PYTHON_BINDINGS) if(WITH_TORCH) list(APPEND _INFINIOPS_INSTALL_RPATH ${TORCH_RUNTIME_DIRS}) endif() - if(WITH_MOORE) - list(APPEND _INFINIOPS_INSTALL_RPATH "${MUSA_ROOT}/lib") - endif() - if(WITH_ASCEND) - list(APPEND _INFINIOPS_INSTALL_RPATH "${ASCEND_HOME}/lib64" "${ASCEND_HAL_DIR}") - endif() set_target_properties(infiniops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}") set_target_properties(ops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}") @@ -817,70 +805,43 @@ if(GENERATE_PYTHON_BINDINGS) endif() endif() -include(CMakePackageConfigHelpers) - -configure_file( - ${PROJECT_SOURCE_DIR}/cmake/infiniops.pc.in - ${CMAKE_CURRENT_BINARY_DIR}/infiniops.pc - @ONLY -) - -configure_package_config_file( - ${PROJECT_SOURCE_DIR}/cmake/InfiniOpsConfig.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfig.cmake - INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps -) - -write_basic_package_version_file( - ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfigVersion.cmake - VERSION ${PROJECT_VERSION} - COMPATIBILITY SameMajorVersion -) - install(TARGETS infiniops - EXPORT InfiniOpsTargets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) -install(FILES ${PROJECT_SOURCE_DIR}/include/infini/ops.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/infini +install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) -if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) - install(FILES ${PROJECT_SOURCE_DIR}/generated/include/infini/functional_ops.h +if(GENERATE_OPERATOR_CALL_INSTANTIATIONS) + install(FILES + ${PROJECT_SOURCE_DIR}/generated/include/infini/operator_call_instantiations.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/infini ) endif() -install(FILES - ${PROJECT_SOURCE_DIR}/src/config.h - ${PROJECT_SOURCE_DIR}/src/data_type.h - ${PROJECT_SOURCE_DIR}/src/device.h - ${PROJECT_SOURCE_DIR}/src/handle.h - ${PROJECT_SOURCE_DIR}/src/hash.h - ${PROJECT_SOURCE_DIR}/src/tensor.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} -) +file(GLOB INFINIOPS_PUBLIC_CORE_HEADERS CONFIGURE_DEPENDS + "${CMAKE_CURRENT_SOURCE_DIR}/*.h") -install(FILES - ${PROJECT_SOURCE_DIR}/src/common/constexpr_map.h - ${PROJECT_SOURCE_DIR}/src/common/traits.h - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common +install(FILES ${INFINIOPS_PUBLIC_CORE_HEADERS} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/infiniops.pc - DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/base/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/base + FILES_MATCHING PATTERN "*.h" ) -install(EXPORT InfiniOpsTargets - NAMESPACE InfiniOps:: - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/common/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common + FILES_MATCHING PATTERN "*.h" ) -install(FILES - ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfig.cmake - ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfigVersion.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps -) +if(EXISTS ${PROJECT_SOURCE_DIR}/generated/base) + install(DIRECTORY ${PROJECT_SOURCE_DIR}/generated/base/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/base + FILES_MATCHING PATTERN "*.h" + ) +endif() diff --git a/src/operator.h b/src/operator.h index 15b95697d..257d62de7 100644 --- a/src/operator.h +++ b/src/operator.h @@ -191,7 +191,8 @@ class Operator : public OperatorBase { } template - static auto Call(const Handle& handle, const Config& config, Args&&... args) { + static auto Call(const Handle& handle, const Config& config, + const Args&... args) { static std::unordered_map> cache; static std::size_t generation{0}; @@ -206,20 +207,17 @@ class Operator : public OperatorBase { auto it{cache.find(key)}; if (it == cache.end()) { - // Pass args as lvalue refs so they remain valid for the `operator()` call - // below. Forwarding rvalue temporaries into `Make()` would leave the args - // in a moved-from (empty) state before `operator()` can use them. it = cache.emplace(std::move(key), Make(config, args...)).first; } auto& op{it->second}; - return (*op)(handle, std::forward(args)...); + return (*op)(handle, args...); } template - static auto Call(const Tensor tensor, Args&&... args) { - return Call({}, {}, tensor, std::forward(args)...); + static auto Call(const Tensor tensor, const Args&... args) { + return Call({}, {}, tensor, args...); } static std::vector active_implementation_indices( @@ -241,18 +239,18 @@ class Operator : public OperatorBase { } template - auto operator()(const Handle& handle, Args&&... args) { + auto operator()(const Handle& handle, const Args&... args) { set_handle(handle); set_stream(handle.stream()); set_workspace(handle.workspace()); set_workspace_size_in_bytes(handle.workspace_size_in_bytes()); - return operator()(std::forward(args)...); + return operator()(args...); } template - auto operator()(Args&&... args) const { - return (*static_cast(this))(std::forward(args)...); + auto operator()(const Args&... args) const { + return (*static_cast(this))(args...); } protected: diff --git a/tests/test_cpp_api.py b/tests/test_cpp_api.py index 62eb8969d..86c0c1600 100644 --- a/tests/test_cpp_api.py +++ b/tests/test_cpp_api.py @@ -6,7 +6,7 @@ import pytest -def test_cpp_functional_add_smoke(tmp_path): +def test_cpp_operator_call_instantiation_smoke(tmp_path): install_prefix = _install_prefix() include_dir = install_prefix / "include" library_dir = _library_dir(install_prefix) @@ -72,7 +72,7 @@ def _run(command): r""" #include - #include + #include int main() { float input_data[3] = {1.0f, 2.0f, 3.0f}; @@ -89,12 +89,26 @@ def _run(command): infini::ops::Handle handle; infini::ops::Config config; - infini::ops::functional::Add(handle, config, input, other, output); + infini::ops::Add::Call(handle, config, input, other, output); - if (output_data[0] != 5.0f || output_data[1] != 7.0f || - output_data[2] != 9.0f) { + if (std::fabs(output_data[0] - 5.0f) > 1e-6f || + std::fabs(output_data[1] - 7.0f) > 1e-6f || + std::fabs(output_data[2] - 9.0f) > 1e-6f) { return 1; } + + output_data[0] = 0.0f; + output_data[1] = 0.0f; + output_data[2] = 0.0f; + + infini::ops::Add::Call(input, other, output); + + if (std::fabs(output_data[0] - 5.0f) > 1e-6f || + std::fabs(output_data[1] - 7.0f) > 1e-6f || + std::fabs(output_data[2] - 9.0f) > 1e-6f) { + return 1; + } + return 0; } """