Skip to content

Commit e227a90

Browse files
committed
feat(hygon-add): add Hygon backend support for Add
- Add `WITH_HYGON` build support and a Hygon `Add` backend that reuses the shared CUDA implementation. - Detect DTK `nvcc` from the Hygon toolkit layout and auto-detect the GPU arch from `rocminfo`. - Treat Hygon as a CUDA-like backend in shared data type, cast, and kernel helper headers. - Skip the Hygon `gemm` example for now and ignore `build-*` temporary directories. - Verified with `pip install -e .[dev]` and `pytest tests/test_add.py`.
1 parent 61fcdf7 commit e227a90

File tree

11 files changed

+170
-25
lines changed

11 files changed

+170
-25
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Generated files
22
build/
3+
build-*/
4+
cmake-build-*/
35
generated/
46

57
# Prerequisites

CMakeLists.txt

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@ set(PYBIND11_ENABLE_EXTRAS ON)
1111
option(WITH_CPU "Enable CPU backend" OFF)
1212
option(WITH_NVIDIA "Enable CUDA backend" OFF)
1313
option(WITH_ILUVATAR "Enable Iluvatar GPU backend" OFF)
14+
option(WITH_HYGON "Enable Hygon GPU backend" OFF)
1415
option(WITH_METAX "Enable MetaX backend" OFF)
1516
option(WITH_CAMBRICON "Enable Cambricon backend" OFF)
1617
option(WITH_MOORE "Enable Moore backend" OFF)
1718

1819
option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF)
1920
option(GENERATE_PYTHON_BINDINGS "Generate Python bindings" OFF)
2021

22+
set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk")
23+
2124
if(AUTO_DETECT_DEVICES)
2225
message(STATUS "Auto-detecting available devices...")
2326

@@ -37,6 +40,13 @@ if(AUTO_DETECT_DEVICES)
3740
message(STATUS "Auto-detected Iluvatar environment.")
3841
endif()
3942

43+
if(DEFINED ENV{DTK_ROOT} OR
44+
EXISTS "${_DEFAULT_HYGON_DTK_ROOT}/cuda/bin/nvcc" OR
45+
EXISTS "${_DEFAULT_HYGON_DTK_ROOT}/cuda/cuda/bin/nvcc")
46+
set(WITH_HYGON ON)
47+
message(STATUS "Auto-detected Hygon environment.")
48+
endif()
49+
4050
if(DEFINED ENV{MACA_PATH})
4151
set(WITH_METAX ON)
4252
message(STATUS "Auto-detected MetaX environment from MACA_PATH")
@@ -77,14 +87,14 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
7787

7888
# Only one CUDA-like GPU backend can be enabled at a time.
7989
set(_gpu_backend_count 0)
80-
foreach(_gpu_backend WITH_NVIDIA WITH_ILUVATAR WITH_METAX WITH_MOORE)
90+
foreach(_gpu_backend WITH_NVIDIA WITH_ILUVATAR WITH_HYGON WITH_METAX WITH_MOORE)
8191
if(${_gpu_backend})
8292
math(EXPR _gpu_backend_count "${_gpu_backend_count} + 1")
8393
endif()
8494
endforeach()
8595

8696
if(_gpu_backend_count GREATER 1)
87-
message(FATAL_ERROR "`WITH_NVIDIA`, `WITH_ILUVATAR`, `WITH_METAX`, and `WITH_MOORE` are mutually exclusive. Build one GPU backend at a time.")
97+
message(FATAL_ERROR "`WITH_NVIDIA`, `WITH_ILUVATAR`, `WITH_HYGON`, `WITH_METAX`, and `WITH_MOORE` are mutually exclusive. Build one GPU backend at a time.")
8898
endif()
8999

90100
if(WITH_NVIDIA)
@@ -111,6 +121,64 @@ if(WITH_ILUVATAR)
111121
find_package(CUDAToolkit REQUIRED)
112122
endif()
113123

124+
if(WITH_HYGON)
125+
add_compile_definitions(WITH_HYGON=1)
126+
set(DTK_ROOT $ENV{DTK_ROOT})
127+
if(NOT DTK_ROOT)
128+
set(DTK_ROOT "${_DEFAULT_HYGON_DTK_ROOT}")
129+
endif()
130+
if(NOT EXISTS "${DTK_ROOT}")
131+
message(FATAL_ERROR "`WITH_HYGON` is `ON` but `DTK_ROOT` (`${DTK_ROOT}`) does not exist.")
132+
endif()
133+
134+
set(_HYGON_ARCH_DEFAULT "gfx906")
135+
if(DEFINED ENV{HYGON_ARCH} AND NOT "$ENV{HYGON_ARCH}" STREQUAL "")
136+
set(_HYGON_ARCH_DEFAULT "$ENV{HYGON_ARCH}")
137+
else()
138+
find_program(HYGON_ROCMINFO_EXECUTABLE NAMES rocminfo HINTS "${DTK_ROOT}/bin")
139+
if(HYGON_ROCMINFO_EXECUTABLE)
140+
execute_process(
141+
COMMAND ${HYGON_ROCMINFO_EXECUTABLE}
142+
OUTPUT_VARIABLE _HYGON_ROCMINFO_OUTPUT
143+
ERROR_QUIET
144+
OUTPUT_STRIP_TRAILING_WHITESPACE
145+
)
146+
string(REGEX MATCH "gfx[0-9]+" _HYGON_ARCH_AUTO "${_HYGON_ROCMINFO_OUTPUT}")
147+
if(_HYGON_ARCH_AUTO)
148+
set(_HYGON_ARCH_DEFAULT "${_HYGON_ARCH_AUTO}")
149+
endif()
150+
endif()
151+
endif()
152+
153+
set(HYGON_ARCH "${_HYGON_ARCH_DEFAULT}" CACHE STRING "Hygon GPU architecture")
154+
set(HYGON_CUDA_ROOT "${DTK_ROOT}/cuda")
155+
if(EXISTS "${DTK_ROOT}/cuda/cuda/bin/nvcc")
156+
set(HYGON_CUDA_ROOT "${DTK_ROOT}/cuda/cuda")
157+
endif()
158+
159+
if(NOT EXISTS "${HYGON_CUDA_ROOT}/bin/nvcc")
160+
message(FATAL_ERROR "`WITH_HYGON` is `ON` but `${HYGON_CUDA_ROOT}/bin/nvcc` was not found. Checked `${DTK_ROOT}/cuda/bin/nvcc` and `${DTK_ROOT}/cuda/cuda/bin/nvcc`.")
161+
endif()
162+
163+
set(CMAKE_CUDA_COMPILER "${HYGON_CUDA_ROOT}/bin/nvcc" CACHE FILEPATH "Hygon CUDA compiler (DTK nvcc)")
164+
set(CUDAToolkit_ROOT "${HYGON_CUDA_ROOT}" CACHE PATH "Hygon CUDA toolkit root")
165+
set(CMAKE_CUDA_ARCHITECTURES OFF CACHE STRING "Disable default CUDA arch flags for Hygon" FORCE)
166+
set(CMAKE_CUDA_FLAGS "-std=c++17 -fPIC -arch=${HYGON_ARCH} -Wno-return-type -Wno-error=unused-private-field" CACHE STRING "Hygon CUDA flags")
167+
set(CMAKE_CUDA_SEPARABLE_COMPILATION OFF CACHE BOOL "Disable RDC for Hygon")
168+
169+
# DTK's nvcc wrapper may invoke `nvcc` by name during compiler checks.
170+
set(ENV{PATH} "${HYGON_CUDA_ROOT}/bin:$ENV{PATH}")
171+
172+
include_directories("${DTK_ROOT}/include")
173+
include_directories("${HYGON_CUDA_ROOT}/include")
174+
link_directories("${DTK_ROOT}/lib")
175+
link_directories("${HYGON_CUDA_ROOT}/lib64")
176+
177+
message(STATUS "Hygon: CUDA compiler ${CMAKE_CUDA_COMPILER}, arch ${HYGON_ARCH}, DTK root ${DTK_ROOT}")
178+
enable_language(CUDA)
179+
find_package(CUDAToolkit REQUIRED)
180+
endif()
181+
114182
if(WITH_METAX)
115183
add_compile_definitions(WITH_METAX=1)
116184

@@ -179,7 +247,7 @@ if(WITH_CAMBRICON)
179247
endif()
180248

181249
# If all other platforms are not enabled, CPU is enabled by default.
182-
if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_METAX AND NOT WITH_MOORE)
250+
if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_HYGON AND NOT WITH_METAX AND NOT WITH_MOORE)
183251
add_compile_definitions(WITH_CPU=1)
184252
endif()
185253

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,17 @@ For the `<OPTIONS>`:
3838
|----------------------------------------|------------------------------------|:-:
3939
| `-DWITH_CPU=[ON\|OFF]` | Compile the CPU implementation | n
4040
| `-DWITH_NVIDIA=[ON\|OFF]` | Compile the NVIDIA implementation | n
41+
| `-DWITH_ILUVATAR=[ON\|OFF]` | Compile the Iluvatar implementation| n
42+
| `-DWITH_HYGON=[ON\|OFF]` | Compile the Hygon implementation | n
4143
| `-DWITH_METAX=[ON\|OFF]` | Compile the MetaX implementation | n
4244
| `-DGENERATE_PYTHON_BINDINGS=[ON\|OFF]` | Generate Python bindings | n
4345

4446
*Note: If no accelerator options are provided, `WITH_CPU` is enabled by default.*
4547

48+
For Hygon builds, set `DTK_ROOT` to the DTK installation root if it is not
49+
installed at `/opt/dtk`. You can override the default DCU arch with
50+
`-DHYGON_ARCH=<arch>` when configuring CMake.
51+
4652
## 🚀 Running Examples
4753
After a successful build, the executables are located in the `build/examples` directory.
4854

examples/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@ file(GLOB_RECURSE EXAMPLE_SOURCES CONFIGURE_DEPENDS "*.cc")
22

33
# Iterate through each file and create an executable.
44
foreach(source_file ${EXAMPLE_SOURCES})
5+
if(WITH_HYGON AND source_file MATCHES "/gemm\\.cc$")
6+
continue()
7+
endif()
8+
59
get_filename_component(example_name ${source_file} NAME_WE)
610

711
add_executable(${example_name} ${source_file})

examples/runtime_api.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,15 @@
1919
#define DEVICE_MEMCPY_HOST_TO_DEVICE cudaMemcpyHostToDevice
2020
#define DEVICE_MEMCPY_DEVICE_TO_HOST cudaMemcpyDeviceToHost
2121
#define DEFAULT_DEVICE_TYPE Device::Type::kIluvatar
22+
#elif WITH_HYGON
23+
#include <cuda_runtime.h>
24+
#define DEVICE_MALLOC cudaMalloc
25+
#define DEVICE_FREE cudaFree
26+
#define DEVICE_MEMCPY cudaMemcpy
27+
#define DEVICE_MEMSET cudaMemset
28+
#define DEVICE_MEMCPY_HOST_TO_DEVICE cudaMemcpyHostToDevice
29+
#define DEVICE_MEMCPY_DEVICE_TO_HOST cudaMemcpyDeviceToHost
30+
#define DEFAULT_DEVICE_TYPE Device::Type::kHygon
2231
#elif WITH_METAX
2332
#include <mcr/mc_runtime.h>
2433
#define DEVICE_MALLOC mcMalloc

src/CMakeLists.txt

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,34 @@ if(WITH_ILUVATAR)
7777
list(APPEND DEVICE_LIST "iluvatar")
7878
endif()
7979

80+
if(WITH_HYGON)
81+
set(HYGON_PATTERNS
82+
"cuda/*.cc"
83+
"cuda/*.cpp"
84+
"cuda/*.cu"
85+
"hygon/*.cc"
86+
"hygon/*.cpp"
87+
"hygon/*.cu"
88+
)
89+
90+
file(GLOB_RECURSE HYGON_SOURCES CONFIGURE_DEPENDS ${HYGON_PATTERNS})
91+
92+
enable_language(CUDA)
93+
94+
target_compile_definitions(infiniops PUBLIC WITH_HYGON=1)
95+
target_sources(infiniops PRIVATE ${HYGON_SOURCES})
96+
97+
find_package(CUDAToolkit REQUIRED)
98+
target_link_libraries(infiniops PUBLIC CUDA::cudart CUDA::cublas)
99+
100+
set_target_properties(infiniops PROPERTIES
101+
CUDA_STANDARD 17
102+
CUDA_STANDARD_REQUIRED ON
103+
)
104+
105+
list(APPEND DEVICE_LIST "hygon")
106+
endif()
107+
80108
if(WITH_METAX)
81109
set(METAX_PATTERNS
82110
"cuda/*.cc"
@@ -154,7 +182,7 @@ if(GENERATE_PYTHON_BINDINGS)
154182
set(PYBIND11_SOURCES "${PROJECT_SOURCE_DIR}/generated/bindings/ops.cc")
155183

156184
# TODO: There might be a better solution.
157-
if(WITH_NVIDIA OR WITH_ILUVATAR)
185+
if(WITH_NVIDIA OR WITH_ILUVATAR OR WITH_HYGON)
158186
set_source_files_properties(${PYBIND11_SOURCES} PROPERTIES LANGUAGE CUDA)
159187
endif()
160188

src/common/cast.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#ifndef INFINI_OPS_COMMON_CAST_H_
22
#define INFINI_OPS_COMMON_CAST_H_
33

4-
#if defined(WITH_NVIDIA) || defined(WITH_ILUVATAR) || defined(WITH_METAX) || \
5-
defined(WITH_MOORE)
4+
#if defined(WITH_NVIDIA) || defined(WITH_ILUVATAR) || defined(WITH_HYGON) || \
5+
defined(WITH_METAX) || defined(WITH_MOORE)
66
#include "common/cuda/cast.h"
77
#else
88
#include "common/cpu/cast.h"

src/common/cuda/cast.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
#ifndef INFINI_OPS_COMMON_CUDA_CAST_H_
22
#define INFINI_OPS_COMMON_CUDA_CAST_H_
33

4-
#ifdef WITH_NVIDIA
5-
#include <cuda_runtime.h>
6-
#elif defined(WITH_ILUVATAR)
7-
#include <cuda_runtime.h>
8-
#elif defined(WITH_METAX)
4+
#ifdef WITH_METAX
95
#include <mcr/mc_runtime.h>
106
#elif defined(WITH_MOORE)
117
#include <musa_runtime.h>

src/common/cuda/kernel_commons.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,8 @@
11
#ifndef INFINI_OPS_COMMON_CUDA_KERNEL_COMMONS_H_
22
#define INFINI_OPS_COMMON_CUDA_KERNEL_COMMONS_H_
33

4-
#ifdef WITH_NVIDIA
4+
#if defined(WITH_NVIDIA) || defined(WITH_ILUVATAR) || defined(WITH_HYGON)
55
#include <cuda_bf16.h>
6-
#include <cuda_fp16.h>
7-
#include <cuda_runtime.h>
8-
using cuda_bfloat16 = nv_bfloat16;
9-
using cuda_bfloat162 = nv_bfloat162;
10-
#elif defined(WITH_ILUVATAR)
11-
#include <cuda_bf16.h>
12-
#include <cuda_fp16.h>
136
#include <cuda_runtime.h>
147
using cuda_bfloat16 = nv_bfloat16;
158
using cuda_bfloat162 = nv_bfloat162;
@@ -39,7 +32,7 @@ constexpr int CUDA_BLOCK_SIZE_512 = 512;
3932
constexpr int CUDA_BLOCK_SIZE_1024 = 1024;
4033
constexpr int CUDA_BLOCK_SIZE_2048 = 2048;
4134

42-
#if defined(WITH_NVIDIA) || defined(WITH_ILUVATAR)
35+
#if defined(WITH_NVIDIA) || defined(WITH_ILUVATAR) || defined(WITH_HYGON)
4336
// Cache `cudaDeviceProp` per device, initialized once at first access.
4437
class DevicePropertyCache {
4538
public:

src/data_type.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@
55
#include <cstring>
66
#include <string>
77

8-
#ifdef WITH_NVIDIA
9-
#include <cuda_bf16.h>
10-
#include <cuda_fp16.h>
11-
#elif defined(WITH_ILUVATAR)
8+
#if defined(WITH_NVIDIA) || defined(WITH_ILUVATAR) || defined(WITH_HYGON)
129
#include <cuda_bf16.h>
1310
#include <cuda_fp16.h>
1411
#elif defined(WITH_METAX)
@@ -198,7 +195,7 @@ DEFINE_DATA_TYPE_MAPPING(kInt64, std::int64_t)
198195
DEFINE_DATA_TYPE_MAPPING(kFloat32, float)
199196
DEFINE_DATA_TYPE_MAPPING(kFloat64, double)
200197

201-
#if defined(WITH_NVIDIA) || defined(WITH_ILUVATAR)
198+
#if defined(WITH_NVIDIA) || defined(WITH_ILUVATAR) || defined(WITH_HYGON)
202199
DEFINE_DATA_TYPE_MAPPING(kFloat16, half)
203200
DEFINE_DATA_TYPE_MAPPING(kBFloat16, __nv_bfloat16)
204201
#elif defined(WITH_METAX)

0 commit comments

Comments
 (0)