Skip to content

Commit 7bd4064

Browse files
committed
Build: CUDA include path, nvcc arch string handling, embedding error text
- xmake: ensure CUDA headers visible for host-compiled operator code. - nvidia.lua: coerce cuda_arch to string for xmake 3; align infiniop/ infinirt device link -gencode with selected arch. - embedding_nvidia: log cudaGetErrorString on launch failure for easier debug. Made-with: Cursor
1 parent f3c4cf5 commit 7bd4064

3 files changed

Lines changed: 26 additions & 4 deletions

File tree

src/infiniop/ops/embedding/nvidia/embedding_nvidia.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "../../../tensor.h"
55
#include "../cuda/embedding_kernel.cuh"
66
#include "embedding_nvidia.cuh"
7+
#include <cstdio>
78
#include <cuda_runtime.h>
89

910
template <typename T, typename IndexType>
@@ -212,9 +213,9 @@ infiniStatus_t Descriptor::calculate(
212213
return INFINI_STATUS_BAD_TENSOR_DTYPE;
213214
}
214215

215-
// Check for kernel launch errors
216216
cudaError_t err = cudaGetLastError();
217217
if (err != cudaSuccess) {
218+
fprintf(stderr, "[infiniop embedding nvidia] cudaGetLastError: %s\n", cudaGetErrorString(err));
218219
return INFINI_STATUS_INTERNAL_ERROR;
219220
}
220221

xmake.lua

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ option_end()
5454

5555
if has_config("nv-gpu") then
5656
add_defines("ENABLE_NVIDIA_API")
57+
local cuda_home = os.getenv("CUDA_HOME") or "/usr/local/cuda"
58+
add_includedirs(path.join(cuda_home, "include"))
5759
includes("xmake/nvidia.lua")
5860
end
5961

xmake/nvidia.lua

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,17 @@ target("infiniop-nvidia")
6363

6464
add_cuflags("-Xcompiler=-Wno-error=deprecated-declarations", "-Xcompiler=-Wno-error=unused-function")
6565

66+
-- xmake 3 may surface option values as non-string types; coerce so we do not fall back to "native".
6667
local arch_opt = get_config("cuda_arch")
67-
if arch_opt and type(arch_opt) == "string" then
68-
for _, arch in ipairs(arch_opt:split(",")) do
68+
local arch_str = arch_opt and tostring(arch_opt):gsub("^%s+", ""):gsub("%s+$", "") or ""
69+
if arch_str ~= "" and arch_str ~= "nil" then
70+
for _, arch in ipairs(arch_str:split(",")) do
6971
arch = arch:trim()
7072
local compute = arch:gsub("sm_", "compute_")
71-
add_cuflags("-gencode=arch=" .. compute .. ",code=" .. arch)
73+
local gencode = "-gencode=arch=" .. compute .. ",code=" .. arch
74+
add_cuflags(gencode)
75+
-- Separate nvcc device-link step must use the same arch or linked GPU code defaults wrong.
76+
add_culdflags(gencode)
7277
end
7378
else
7479
add_cugencodes("native")
@@ -101,6 +106,20 @@ target("infinirt-nvidia")
101106
add_cxxflags("-fPIC")
102107
end
103108

109+
local arch_opt_rt = get_config("cuda_arch")
110+
local arch_str_rt = arch_opt_rt and tostring(arch_opt_rt):gsub("^%s+", ""):gsub("%s+$", "") or ""
111+
if arch_str_rt ~= "" and arch_str_rt ~= "nil" then
112+
for _, arch in ipairs(arch_str_rt:split(",")) do
113+
arch = arch:trim()
114+
local compute = arch:gsub("sm_", "compute_")
115+
local gencode = "-gencode=arch=" .. compute .. ",code=" .. arch
116+
add_cuflags(gencode)
117+
add_culdflags(gencode)
118+
end
119+
else
120+
add_cugencodes("native")
121+
end
122+
104123
set_languages("cxx17")
105124
add_files("../src/infinirt/cuda/*.cu")
106125
target_end()

0 commit comments

Comments
 (0)