Skip to content

Commit 77b13ef

Browse files
committed
feat(hygon): add backend infrastructure
1 parent 41812a1 commit 77b13ef

13 files changed

Lines changed: 487 additions & 10 deletions

File tree

.github/ci_config.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,41 @@ platforms:
149149
- name: test
150150
run: pytest tests/ --devices cambricon -n 4 -v --tb=short --junitxml=/workspace/results/test-results.xml
151151

152+
hygon:
153+
runner_label: Hygon
154+
execution_mode: agent_local
155+
image:
156+
dockerfile: images/hygon/
157+
build_args:
158+
BASE_IMAGE: image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04.1-py3.10
159+
APT_MIRROR: http://archive.ubuntu.com/ubuntu
160+
PIP_INDEX_URL: https://pypi.org/simple
161+
docker_args:
162+
- "--privileged"
163+
- "--network=host"
164+
- "--ipc=host"
165+
- "--device=/dev/kfd"
166+
- "--device=/dev/mkfd"
167+
- "--device=/dev/dri"
168+
- "--group-add=video"
169+
volumes:
170+
- /opt/hyhal:/opt/hyhal:ro
171+
setup: pip install .[dev] --no-build-isolation
172+
jobs:
173+
gpu:
174+
type: unittest
175+
resources:
176+
ngpus: 1
177+
gpu_style: none
178+
memory: 32GB
179+
shm_size: 64g
180+
timeout: 3600
181+
queue_timeout: 600
182+
junit_path: test-results.xml
183+
stages:
184+
- name: test
185+
run: pytest tests/ --devices hygon -n 4 -v --tb=short --junitxml=/workspace/results/test-results.xml
186+
152187
ascend:
153188
runner_label: Ascend
154189
execution_mode: agent_local

CMakeLists.txt

Lines changed: 126 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ set(PYBIND11_ENABLE_EXTRAS ON)
1111
option(WITH_CPU "Enable CPU backend" OFF)
1212
option(WITH_NVIDIA "Enable CUDA backend" OFF)
1313
option(WITH_ILUVATAR "Enable Iluvatar GPU backend" OFF)
14+
option(WITH_HYGON "Enable Hygon GPU backend" OFF)
1415
option(WITH_METAX "Enable MetaX backend" OFF)
1516
option(WITH_CAMBRICON "Enable Cambricon backend" OFF)
1617
option(WITH_MOORE "Enable Moore backend" OFF)
@@ -29,6 +30,31 @@ option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF)
2930
option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF)
3031
option(GENERATE_PYTHON_BINDINGS "Generate Python bindings" OFF)
3132

33+
set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk")
34+
35+
function(_infiniops_find_hygon_cuda_root out_var dtk_root)
36+
set(_candidates
37+
"${dtk_root}/cuda"
38+
"${dtk_root}/cuda/cuda"
39+
)
40+
41+
file(GLOB _versioned_cuda_dirs LIST_DIRECTORIES true "${dtk_root}/cuda/cuda-*")
42+
if(_versioned_cuda_dirs)
43+
list(SORT _versioned_cuda_dirs)
44+
list(REVERSE _versioned_cuda_dirs)
45+
list(APPEND _candidates ${_versioned_cuda_dirs})
46+
endif()
47+
48+
foreach(_candidate IN LISTS _candidates)
49+
if(EXISTS "${_candidate}/bin/nvcc")
50+
set(${out_var} "${_candidate}" PARENT_SCOPE)
51+
return()
52+
endif()
53+
endforeach()
54+
55+
set(${out_var} "" PARENT_SCOPE)
56+
endfunction()
57+
3258
if(AUTO_DETECT_DEVICES)
3359
message(STATUS "Auto-detecting available devices...")
3460

@@ -48,6 +74,24 @@ if(AUTO_DETECT_DEVICES)
4874
message(STATUS "Auto-detected Iluvatar environment.")
4975
endif()
5076

77+
set(_hygon_detected FALSE)
78+
if(DEFINED ENV{DTK_ROOT} AND NOT "$ENV{DTK_ROOT}" STREQUAL "")
79+
_infiniops_find_hygon_cuda_root(_HYGON_CUDA_DETECT_ROOT "$ENV{DTK_ROOT}")
80+
if(_HYGON_CUDA_DETECT_ROOT)
81+
set(_hygon_detected TRUE)
82+
endif()
83+
else()
84+
_infiniops_find_hygon_cuda_root(_HYGON_CUDA_DETECT_ROOT "${_DEFAULT_HYGON_DTK_ROOT}")
85+
if(_HYGON_CUDA_DETECT_ROOT)
86+
set(_hygon_detected TRUE)
87+
endif()
88+
endif()
89+
90+
if(_hygon_detected)
91+
set(WITH_HYGON ON)
92+
message(STATUS "Auto-detected Hygon environment.")
93+
endif()
94+
5195
if(DEFINED ENV{MACA_PATH})
5296
set(WITH_METAX ON)
5397
message(STATUS "Auto-detected MetaX environment from MACA_PATH")
@@ -172,6 +216,17 @@ if(WITH_TORCH)
172216
OUTPUT_STRIP_TRAILING_WHITESPACE
173217
)
174218

219+
execute_process(
220+
COMMAND ${Python_EXECUTABLE} -c "import pathlib, torch; p = pathlib.Path(torch.__file__).resolve().parent.parent / 'torch.libs'; print(str(p) if p.exists() else '')"
221+
OUTPUT_VARIABLE _torch_private_lib_dir
222+
OUTPUT_STRIP_TRAILING_WHITESPACE
223+
)
224+
225+
set(TORCH_RUNTIME_DIRS ${_torch_lib_dirs})
226+
if(_torch_private_lib_dir)
227+
list(APPEND TORCH_RUNTIME_DIRS ${_torch_private_lib_dir})
228+
endif()
229+
175230
find_library(TORCH_LIB torch HINTS ${_torch_lib_dirs} REQUIRED)
176231
find_library(TORCH_CPU_LIB torch_cpu HINTS ${_torch_lib_dirs} REQUIRED)
177232
find_library(C10_LIB c10 HINTS ${_torch_lib_dirs} REQUIRED)
@@ -221,14 +276,14 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
221276

222277
# Only one CUDA-like GPU backend can be enabled at a time.
223278
set(_gpu_backend_count 0)
224-
foreach(_gpu_backend WITH_NVIDIA WITH_ILUVATAR WITH_METAX WITH_MOORE WITH_ASCEND)
279+
foreach(_gpu_backend WITH_NVIDIA WITH_ILUVATAR WITH_HYGON WITH_METAX WITH_MOORE WITH_ASCEND)
225280
if(${_gpu_backend})
226281
math(EXPR _gpu_backend_count "${_gpu_backend_count} + 1")
227282
endif()
228283
endforeach()
229284

230285
if(_gpu_backend_count GREATER 1)
231-
message(FATAL_ERROR "`WITH_NVIDIA`, `WITH_ILUVATAR`, `WITH_METAX`, `WITH_MOORE`, and `WITH_ASCEND` are mutually exclusive. Build one GPU backend at a time.")
286+
message(FATAL_ERROR "`WITH_NVIDIA`, `WITH_ILUVATAR`, `WITH_HYGON`, `WITH_METAX`, `WITH_MOORE`, and `WITH_ASCEND` are mutually exclusive. Build one GPU backend at a time.")
232287
endif()
233288

234289
if(WITH_NVIDIA)
@@ -267,6 +322,70 @@ if(WITH_ILUVATAR)
267322
add_compile_options($<$<COMPILE_LANGUAGE:CUDA>:-x$<SEMICOLON>ivcore>)
268323
endif()
269324

325+
if(WITH_HYGON)
326+
add_compile_definitions(WITH_HYGON=1)
327+
set(DTK_ROOT $ENV{DTK_ROOT})
328+
if(NOT DTK_ROOT)
329+
set(DTK_ROOT "${_DEFAULT_HYGON_DTK_ROOT}")
330+
endif()
331+
if(NOT EXISTS "${DTK_ROOT}")
332+
message(FATAL_ERROR "`WITH_HYGON` is `ON` but `DTK_ROOT` (`${DTK_ROOT}`) does not exist.")
333+
endif()
334+
335+
set(_HYGON_ARCH_DEFAULT "gfx906")
336+
if(DEFINED ENV{HYGON_ARCH} AND NOT "$ENV{HYGON_ARCH}" STREQUAL "")
337+
set(_HYGON_ARCH_DEFAULT "$ENV{HYGON_ARCH}")
338+
else()
339+
find_program(HYGON_ROCMINFO_EXECUTABLE NAMES rocminfo HINTS "${DTK_ROOT}/bin")
340+
if(HYGON_ROCMINFO_EXECUTABLE)
341+
execute_process(
342+
COMMAND ${HYGON_ROCMINFO_EXECUTABLE}
343+
OUTPUT_VARIABLE _HYGON_ROCMINFO_OUTPUT
344+
ERROR_QUIET
345+
OUTPUT_STRIP_TRAILING_WHITESPACE
346+
)
347+
string(REGEX MATCH "gfx[0-9]+" _HYGON_ARCH_AUTO "${_HYGON_ROCMINFO_OUTPUT}")
348+
if(_HYGON_ARCH_AUTO)
349+
set(_HYGON_ARCH_DEFAULT "${_HYGON_ARCH_AUTO}")
350+
endif()
351+
endif()
352+
endif()
353+
354+
set(HYGON_ARCH "${_HYGON_ARCH_DEFAULT}" CACHE STRING "Hygon GPU architecture")
355+
_infiniops_find_hygon_cuda_root(HYGON_CUDA_ROOT "${DTK_ROOT}")
356+
357+
if(NOT HYGON_CUDA_ROOT)
358+
message(FATAL_ERROR "`WITH_HYGON` is `ON` but no DTK `nvcc` was found under `${DTK_ROOT}`. Checked `${DTK_ROOT}/cuda/bin/nvcc`, `${DTK_ROOT}/cuda/cuda/bin/nvcc`, and `${DTK_ROOT}/cuda/cuda-*/bin/nvcc`.")
359+
endif()
360+
361+
set(CMAKE_CUDA_COMPILER "${HYGON_CUDA_ROOT}/bin/nvcc" CACHE FILEPATH "Hygon CUDA compiler (DTK nvcc)")
362+
set(CUDAToolkit_ROOT "${HYGON_CUDA_ROOT}" CACHE PATH "Hygon CUDA toolkit root")
363+
set(CMAKE_CUDA_ARCHITECTURES OFF CACHE STRING "Disable default CUDA arch flags for Hygon" FORCE)
364+
set(CMAKE_CUDA_FLAGS "-std=c++17 -fPIC -arch=${HYGON_ARCH} -Wno-return-type -Wno-error=unused-private-field" CACHE STRING "Hygon CUDA flags")
365+
set(CMAKE_CUDA_SEPARABLE_COMPILATION OFF CACHE BOOL "Disable RDC for Hygon")
366+
367+
# DTK's nvcc wrapper derives its toolkit root from `CUDA_PATH`.
368+
set(ENV{CUDA_PATH} "${HYGON_CUDA_ROOT}")
369+
set(ENV{CUDA_HOME} "${HYGON_CUDA_ROOT}")
370+
371+
# DTK's nvcc wrapper may invoke `nvcc` by name during compiler checks.
372+
set(ENV{PATH} "${HYGON_CUDA_ROOT}/bin:$ENV{PATH}")
373+
374+
# The actual Ninja build runs in fresh processes. Keep a launcher command
375+
# for CUDA-backed Python bindings that need the DTK wrapper environment.
376+
set(_HYGON_RULE_LAUNCH_ENV
377+
"${CMAKE_COMMAND} -E env CUDA_PATH=${HYGON_CUDA_ROOT} CUDA_HOME=${HYGON_CUDA_ROOT} PATH=${HYGON_CUDA_ROOT}/bin:$ENV{PATH}")
378+
379+
include_directories("${DTK_ROOT}/include")
380+
include_directories("${HYGON_CUDA_ROOT}/include")
381+
link_directories("${DTK_ROOT}/lib")
382+
link_directories("${HYGON_CUDA_ROOT}/lib64")
383+
384+
message(STATUS "Hygon: CUDA compiler ${CMAKE_CUDA_COMPILER}, arch ${HYGON_ARCH}, DTK root ${DTK_ROOT}")
385+
enable_language(CUDA)
386+
find_package(CUDAToolkit REQUIRED)
387+
endif()
388+
270389
if(WITH_METAX)
271390
add_compile_definitions(WITH_METAX=1)
272391

@@ -350,14 +469,18 @@ if(WITH_ASCEND)
350469
endif()
351470

352471
# If all other platforms are not enabled, CPU is enabled by default.
353-
if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_METAX AND NOT WITH_MOORE AND NOT WITH_CAMBRICON AND NOT WITH_ASCEND)
472+
if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_HYGON AND NOT WITH_METAX AND NOT WITH_MOORE AND NOT WITH_CAMBRICON AND NOT WITH_ASCEND)
354473
add_compile_definitions(WITH_CPU=1)
355474
endif()
356475

357476
if(WITH_TORCH OR WITH_METAX OR WITH_MOORE)
358477
set(PYBIND11_ENABLE_EXTRAS OFF)
359478
endif()
360479

480+
if(WITH_HYGON AND NOT EXISTS "${DTK_ROOT}/llvm/lib/LLVMgold.so")
481+
set(PYBIND11_ENABLE_EXTRAS OFF)
482+
endif()
483+
361484
add_subdirectory(src)
362485

363486
if(NOT GENERATE_PYTHON_BINDINGS)

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# InfiniOps
22

3-
InfiniOps is a high-performance, cross-platform operator library supporting multiple backends: CPU, Nvidia, MetaX, Iluvatar, Moore, Cambricon, and more.
3+
InfiniOps is a high-performance, cross-platform operator library supporting multiple backends: CPU, Nvidia, MetaX, Iluvatar, Hygon, Moore, Cambricon, and more.
44

55
## Prerequisites
66

@@ -31,12 +31,16 @@ pip install . -C cmake.define.WITH_CPU=ON -C cmake.define.WITH_NVIDIA=ON
3131
| `-DWITH_NVIDIA=[ON\|OFF]` | Compile the Nvidia implementation | OFF |
3232
| `-DWITH_METAX=[ON\|OFF]` | Compile the MetaX implementation | OFF |
3333
| `-DWITH_ILUVATAR=[ON\|OFF]` | Compile the Iluvatar implementation | OFF |
34+
| `-DWITH_HYGON=[ON\|OFF]` | Compile the Hygon implementation | OFF |
3435
| `-DWITH_MOORE=[ON\|OFF]` | Compile the Moore implementation | OFF |
3536
| `-DWITH_CAMBRICON=[ON\|OFF]` | Compile the Cambricon implementation | OFF |
37+
| `-DWITH_ASCEND=[ON\|OFF]` | Compile the Ascend implementation | OFF |
3638
| `-DAUTO_DETECT_DEVICES=[ON\|OFF]` | Auto-detect available platforms | ON |
3739

3840
If no accelerator options are provided and auto-detection finds nothing, `WITH_CPU` is enabled by default.
3941

42+
For Hygon builds, set `DTK_ROOT` to the DTK installation root if it is not installed at `/opt/dtk`. You can override the default DCU arch with `-DHYGON_ARCH=<arch>` when configuring CMake.
43+
4044
## Contributing
4145

4246
See [CONTRIBUTING.md](CONTRIBUTING.md) for code style, commit conventions, PR workflow, development guide, and troubleshooting.

examples/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@ foreach(source_file ${EXAMPLE_SOURCES})
99
target_link_libraries(${example_name} PRIVATE infiniops)
1010

1111
target_include_directories(${example_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
12-
12+
1313
get_filename_component(example_dir ${source_file} DIRECTORY)
1414

1515
target_include_directories(${example_name} PRIVATE ${example_dir})
16+
17+
if(WITH_TORCH)
18+
foreach(_torch_dir ${TORCH_RUNTIME_DIRS})
19+
target_link_options(${example_name} PRIVATE "LINKER:-rpath-link,${_torch_dir}")
20+
endforeach()
21+
endif()
1622
endforeach()

src/CMakeLists.txt

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,34 @@ if(WITH_ILUVATAR)
7777
list(APPEND DEVICE_LIST "iluvatar")
7878
endif()
7979

80+
if(WITH_HYGON)
81+
set(HYGON_PATTERNS
82+
"native/cuda/*.cc"
83+
"native/cuda/*.cpp"
84+
"native/cuda/*.cu"
85+
"native/cuda/hygon/*.cc"
86+
"native/cuda/hygon/*.cpp"
87+
"native/cuda/hygon/*.cu"
88+
)
89+
90+
file(GLOB_RECURSE HYGON_SOURCES CONFIGURE_DEPENDS ${HYGON_PATTERNS})
91+
92+
enable_language(CUDA)
93+
94+
target_compile_definitions(infiniops PUBLIC WITH_HYGON=1)
95+
target_sources(infiniops PRIVATE ${HYGON_SOURCES})
96+
97+
find_package(CUDAToolkit REQUIRED)
98+
target_link_libraries(infiniops PUBLIC CUDA::cudart CUDA::cublas)
99+
100+
set_target_properties(infiniops PROPERTIES
101+
CUDA_STANDARD 17
102+
CUDA_STANDARD_REQUIRED ON
103+
)
104+
105+
list(APPEND DEVICE_LIST "hygon")
106+
endif()
107+
80108
if(WITH_METAX)
81109
set(METAX_PATTERNS
82110
"native/cuda/*.cc"
@@ -525,7 +553,7 @@ if(GENERATE_PYTHON_BINDINGS)
525553
list(APPEND PYBIND11_COMPILE_SOURCES ${PYBIND11_DISPATCH_SOURCES})
526554

527555
# TODO: There might be a better solution.
528-
if(WITH_NVIDIA OR WITH_ILUVATAR)
556+
if(WITH_NVIDIA OR WITH_ILUVATAR OR WITH_HYGON)
529557
set_source_files_properties(${PYBIND11_COMPILE_SOURCES} PROPERTIES LANGUAGE CUDA)
530558
endif()
531559

@@ -567,6 +595,13 @@ if(GENERATE_PYTHON_BINDINGS)
567595
target_compile_options(ops PRIVATE "-x" "musa")
568596
endif()
569597

598+
if(WITH_HYGON)
599+
set_target_properties(ops PROPERTIES
600+
RULE_LAUNCH_COMPILE "${_HYGON_RULE_LAUNCH_ENV}"
601+
RULE_LAUNCH_LINK "${_HYGON_RULE_LAUNCH_ENV}"
602+
)
603+
endif()
604+
570605
target_include_directories(ops PRIVATE ${PROJECT_SOURCE_DIR})
571606
target_link_libraries(ops PRIVATE infiniops)
572607

@@ -580,9 +615,13 @@ if(GENERATE_PYTHON_BINDINGS)
580615
target_link_libraries(ops PRIVATE
581616
-Wl,--whole-archive no_workspace_kernel -Wl,--no-whole-archive)
582617
endif()
618+
set(_INFINIOPS_INSTALL_RPATH "$ORIGIN")
619+
if(WITH_TORCH)
620+
list(APPEND _INFINIOPS_INSTALL_RPATH ${TORCH_RUNTIME_DIRS})
621+
endif()
583622

584-
set_target_properties(infiniops PROPERTIES INSTALL_RPATH "$ORIGIN")
585-
set_target_properties(ops PROPERTIES INSTALL_RPATH "$ORIGIN")
623+
set_target_properties(infiniops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}")
624+
set_target_properties(ops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}")
586625

587626
install(TARGETS infiniops ops DESTINATION .)
588627

src/native/cuda/hygon/blas.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#ifndef INFINI_OPS_HYGON_BLAS_H_
2+
#define INFINI_OPS_HYGON_BLAS_H_
3+
4+
#include <utility>
5+
6+
// clang-format off
7+
#include "cublas_v2.h"
8+
// clang-format on
9+
10+
#include "data_type.h"
11+
#include "native/cuda/blas.h"
12+
#include "native/cuda/hygon/blas_utils.h"
13+
#include "native/cuda/hygon/runtime_.h"
14+
15+
namespace infini::ops {
16+
17+
template <>
18+
struct Blas<Device::Type::kHygon> : public Runtime<Device::Type::kHygon> {
19+
using BlasHandle = cublasHandle_t;
20+
21+
static constexpr auto BLAS_OP_N = CUBLAS_OP_N;
22+
23+
static constexpr auto BLAS_OP_T = CUBLAS_OP_T;
24+
25+
static constexpr auto BLAS_GEMM_DEFAULT = CUBLAS_GEMM_DEFAULT_TENSOR_OP;
26+
27+
static constexpr auto BlasCreate = cublasCreate;
28+
29+
static constexpr auto BlasSetStream = cublasSetStream;
30+
31+
static constexpr auto BlasDestroy = cublasDestroy;
32+
33+
static constexpr auto BlasGemmStridedBatchedEx = [](auto&&... args) {
34+
return cublasGemmStridedBatchedEx(std::forward<decltype(args)>(args)...);
35+
};
36+
};
37+
38+
} // namespace infini::ops
39+
40+
#endif

0 commit comments

Comments
 (0)