Skip to content

Commit 9d73c35

Browse files
committed
[ROCm] Add AMD GPU support via ROCm/HIP
This adds an optional AMD GPU build to cuPDLPx through ROCm/HIP, alongside the existing CUDA path. The CUDA build is unchanged when USE_HIP is off. To review: start with internal/cuda_to_hip.h, which routes the CUDA runtime, cuBLAS, cuSPARSE, and CUB symbols used by the solver to their hipRT, hipBLAS, hipSPARSE, and hipCUB equivalents on a HIP build, and includes the standard CUDA headers otherwise. The device sources keep their CUDA spelling and are compiled as HIP. internal/cusparse_compat.h selects the standard hipsparseSpMV path on ROCm, since hipSPARSE does not provide the cusparseSpMVOp variant. CMakeLists.txt gains a USE_HIP option (off by default). When enabled the project is configured with the HIP language, the .cu sources are compiled as HIP, and the targets link hipBLAS, hipSPARSE, and hipCUB instead of the CUDA libraries. GPU architectures are chosen with CMAKE_HIP_ARCHITECTURES, defaulting to gfx90a. On Windows the CLI-only mps_parser.c is excluded from the core library because it relies on strtok_r. The interface test gains a case that runs the GPU solver path with presolve disabled, exercising the hipBLAS and hipSPARSE execution path end to end. Test Plan: Built and ran on an AMD Instinct MI200 (gfx90a) with ROCm 7.2.1: ``` cmake -B build -DUSE_HIP=ON -DCMAKE_HIP_ARCHITECTURES=gfx90a -DCMAKE_PREFIX_PATH=/opt/rocm \ -DCUPDLPX_BUILD_CLI=ON -DCUPDLPX_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release cmake --build build -j$(nproc) ./build/tests/test_interface ``` The interface suite passes, including the GPU solver case (Status: OPTIMAL). The same configuration builds cleanly for gfx1100 (RDNA3) and gfx1201 (RDNA4); the device code objects are identical across the documentation and formatting commits that followed validation. The CUDA build path is unaffected by these changes. This work was authored with the assistance of Claude, an AI assistant by Anthropic.
1 parent 931c94c commit 9d73c35

10 files changed

Lines changed: 361 additions & 27 deletions

File tree

CMakeLists.txt

Lines changed: 94 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,15 @@
33
# -----------------------------------------------------------------------------
44
cmake_minimum_required(VERSION 3.20)
55

6+
# HIP/ROCm support option (must be set before project() to influence language detection)
7+
option(USE_HIP "Build with HIP for AMD GPUs" OFF)
8+
69
# Project config
7-
project(cupdlpx LANGUAGES C CXX CUDA)
10+
if(USE_HIP)
11+
project(cupdlpx LANGUAGES C CXX HIP)
12+
else()
13+
project(cupdlpx LANGUAGES C CXX CUDA)
14+
endif()
815

916
set(CUPDLPX_VERSION_MAJOR 0)
1017
set(CUPDLPX_VERSION_MINOR 2)
@@ -32,8 +39,16 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
3239
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
3340
endif()
3441

35-
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
36-
set(CMAKE_CUDA_ARCHITECTURES 60 70 75 80 86 89 90)
42+
if(USE_HIP)
43+
# HIP architecture configuration
44+
# Default to gfx90a if not specified; can override with -DCMAKE_HIP_ARCHITECTURES=gfx1100, etc.
45+
if(NOT DEFINED CMAKE_HIP_ARCHITECTURES OR CMAKE_HIP_ARCHITECTURES STREQUAL "")
46+
set(CMAKE_HIP_ARCHITECTURES "gfx90a")
47+
endif()
48+
else()
49+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
50+
set(CMAKE_CUDA_ARCHITECTURES 60 70 75 80 86 89 90)
51+
endif()
3752
endif()
3853

3954
# -----------------------------------------------------------------------------
@@ -61,9 +76,14 @@ else()
6176
endif()
6277
endif()
6378

64-
# CUDA standards and RDC
65-
set(CMAKE_CUDA_STANDARD 17)
66-
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
79+
# CUDA/HIP standards and RDC
80+
if(USE_HIP)
81+
set(CMAKE_HIP_STANDARD 17)
82+
set(CMAKE_HIP_STANDARD_REQUIRED ON)
83+
else()
84+
set(CMAKE_CUDA_STANDARD 17)
85+
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
86+
endif()
6787

6888
# -----------------------------------------------------------------------------
6989
# CONTROL OPTIONS
@@ -85,7 +105,16 @@ endif()
85105
# -----------------------------------------------------------------------------
86106
# FIND DEPENDENCIES
87107
# -----------------------------------------------------------------------------
88-
find_package(CUDAToolkit REQUIRED)
108+
if(USE_HIP)
109+
# Find ROCm/HIP libraries
110+
find_package(hip REQUIRED)
111+
find_package(hipblas REQUIRED)
112+
find_package(hipsparse REQUIRED)
113+
find_package(hipcub REQUIRED)
114+
find_package(rocprim REQUIRED)
115+
else()
116+
find_package(CUDAToolkit REQUIRED)
117+
endif()
89118
include(FetchContent)
90119

91120
# 1. ZLIB Configuration
@@ -152,20 +181,40 @@ target_compile_definitions(cupdlpx_compile_flags INTERFACE PSLP_VERSION="${PSLP_
152181
file(GLOB C_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c")
153182
file(GLOB CU_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cu")
154183
list(REMOVE_ITEM C_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/cli.c")
184+
if(WIN32)
185+
# mps_parser.c is CLI-only; exclude it on Windows where strtok_r is unavailable
186+
list(REMOVE_ITEM C_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/mps_parser.c")
187+
endif()
155188

156189
set(CORE_INCLUDE_DIRS
157190
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
158191
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/internal
159192
)
160193

161-
set(CORE_LINK_LIBS
162-
PUBLIC cupdlpx_compile_flags
163-
PUBLIC CUDA::cudart
164-
PUBLIC CUDA::cublas
165-
PUBLIC CUDA::cusparse
166-
PUBLIC ZLIB::ZLIB
167-
PUBLIC PSLP
168-
)
194+
if(USE_HIP)
195+
set(CORE_LINK_LIBS
196+
PUBLIC cupdlpx_compile_flags
197+
PUBLIC hip::device
198+
PUBLIC roc::hipblas
199+
PUBLIC roc::hipsparse
200+
PUBLIC hip::hipcub
201+
PUBLIC ZLIB::ZLIB
202+
PUBLIC PSLP
203+
)
204+
# Mark .cu files as HIP language
205+
set_source_files_properties(${CU_SOURCES} PROPERTIES LANGUAGE HIP)
206+
# Define USE_HIP for the compat header
207+
add_compile_definitions(USE_HIP)
208+
else()
209+
set(CORE_LINK_LIBS
210+
PUBLIC cupdlpx_compile_flags
211+
PUBLIC CUDA::cudart
212+
PUBLIC CUDA::cublas
213+
PUBLIC CUDA::cusparse
214+
PUBLIC ZLIB::ZLIB
215+
PUBLIC PSLP
216+
)
217+
endif()
169218

170219
# 1. Core STATIC Library
171220
if(CUPDLPX_BUILD_STATIC_LIB)
@@ -174,9 +223,17 @@ if(CUPDLPX_BUILD_STATIC_LIB)
174223
target_link_libraries(cupdlpx_core ${CORE_LINK_LIBS})
175224
set_target_properties(cupdlpx_core PROPERTIES
176225
POSITION_INDEPENDENT_CODE ON
177-
CUDA_SEPARABLE_COMPILATION ON
178-
CUDA_RESOLVE_DEVICE_SYMBOLS ON
179226
)
227+
if(USE_HIP)
228+
set_target_properties(cupdlpx_core PROPERTIES
229+
HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}"
230+
)
231+
else()
232+
set_target_properties(cupdlpx_core PROPERTIES
233+
CUDA_SEPARABLE_COMPILATION ON
234+
CUDA_RESOLVE_DEVICE_SYMBOLS ON
235+
)
236+
endif()
180237
endif()
181238

182239
# 2. Shared Library
@@ -187,9 +244,17 @@ if(CUPDLPX_BUILD_SHARED_LIB)
187244
set_target_properties(cupdlpx_shared PROPERTIES
188245
OUTPUT_NAME "cupdlpx"
189246
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
247+
)
248+
if(USE_HIP)
249+
set_target_properties(cupdlpx_shared PROPERTIES
250+
HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}"
251+
)
252+
else()
253+
set_target_properties(cupdlpx_shared PROPERTIES
190254
CUDA_SEPARABLE_COMPILATION ON
191255
CUDA_RESOLVE_DEVICE_SYMBOLS ON
192-
)
256+
)
257+
endif()
193258
endif()
194259

195260
# 3. CLI Executable
@@ -204,8 +269,12 @@ if(CUPDLPX_BUILD_CLI)
204269
set_target_properties(cupdlpx_cli PROPERTIES
205270
OUTPUT_NAME "cupdlpx"
206271
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
207-
CUDA_RESOLVE_DEVICE_SYMBOLS ON
208272
)
273+
if(NOT USE_HIP)
274+
set_target_properties(cupdlpx_cli PROPERTIES
275+
CUDA_RESOLVE_DEVICE_SYMBOLS ON
276+
)
277+
endif()
209278
endif()
210279

211280
# 4. Tests
@@ -217,14 +286,18 @@ if(CUPDLPX_BUILD_TESTS)
217286
enable_testing()
218287
file(GLOB TEST_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/test/*.c" "${CMAKE_CURRENT_SOURCE_DIR}/test/*.cu")
219288
foreach(TEST_SRC ${TEST_SOURCES})
220-
get_filename_component(TEST_NAME ${TEST_SRC} NAME_WE)
289+
get_filename_component(TEST_NAME ${TEST_SRC} NAME_WE)
221290
add_executable(${TEST_NAME} ${TEST_SRC})
222291
target_link_libraries(${TEST_NAME} PRIVATE cupdlpx_core)
223292
target_include_directories(${TEST_NAME} PRIVATE include internal)
224293
set_target_properties(${TEST_NAME} PROPERTIES
225294
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tests"
226-
CUDA_RESOLVE_DEVICE_SYMBOLS ON
227295
)
296+
if(NOT USE_HIP)
297+
set_target_properties(${TEST_NAME} PROPERTIES
298+
CUDA_RESOLVE_DEVICE_SYMBOLS ON
299+
)
300+
endif()
228301
add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME})
229302
endforeach()
230303
endif()

README.md

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@ Our work is presented in two papers:
2626
## Installation
2727

2828
### Requirements
29-
* **GPU:** NVIDIA GPU with CUDA 12.4+.
30-
* **Build Tools:** CMake (≥ 3.20), GCC, NVCC.
29+
* **GPU:** NVIDIA GPU with CUDA 12.4+, or AMD GPU with ROCm 7.2+.
30+
* **Build Tools:** CMake (≥ 3.20), GCC, and NVCC (CUDA) or hipcc (ROCm).
3131

3232
> **SpMV backend** is selected automatically at compile time based on cuSPARSE version:
3333
> - `cusparseSpMV` — CUDA 12.4 – 13.1 (cuSPARSE < 12.7.3)
3434
> - `cusparseSpMVOp` — CUDA 13.1 Update 1+ (cuSPARSE ≥ 12.7.3)
35+
>
36+
> On AMD GPUs the solver uses the `hipsparseSpMV` backend via hipSPARSE.
3537
3638
### Build from Source
3739
Clone the repository and compile the project using CMake.
@@ -43,6 +45,21 @@ cmake --build build --clean-first
4345
```
4446
This will create the solver binary at `./build/cupdlpx`.
4547

48+
#### Building for AMD GPUs (ROCm/HIP)
49+
To target AMD GPUs, configure with `-DUSE_HIP=ON` and select the GPU
50+
architecture with `-DCMAKE_HIP_ARCHITECTURES`. The CUDA sources are compiled
51+
as HIP and the cuBLAS/cuSPARSE/CUB calls are mapped to hipBLAS/hipSPARSE/hipCUB.
52+
```bash
53+
cmake -B build -DUSE_HIP=ON -DCMAKE_HIP_ARCHITECTURES=gfx90a -DCMAKE_PREFIX_PATH=/opt/rocm
54+
cmake --build build --clean-first
55+
```
56+
Set `CMAKE_HIP_ARCHITECTURES` to match your GPU (for example `gfx90a` for
57+
MI200, `gfx1100` for RDNA3 desktop, or `gfx1201` for RDNA4). If the ROCm
58+
install is not on CMake's default search path, point `-DCMAKE_PREFIX_PATH` at
59+
it (e.g. `/opt/rocm`) so `find_package` can locate hip, hipBLAS, hipSPARSE,
60+
and hipCUB. The resulting `./build/cupdlpx` binary is used exactly as in the
61+
CUDA build.
62+
4663
#### Verifying the Installation
4764
Run a small test problem to confirm that the solver was built correctly.
4865
```bash

0 commit comments

Comments
 (0)