Skip to content

Commit 5162511

Browse files
TimDettmersclaude
andcommitted
fix: Use OBJECT library for SM_120a GEMM kernel compilation
set_source_files_properties with CUDA_ARCHITECTURES OFF doesn't properly override global architectures. Use a separate OBJECT library with its own CUDA_ARCHITECTURES=120a instead. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 858a8c4 commit 5162511

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

CMakeLists.txt

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -240,12 +240,15 @@ if(BUILD_CUDA)
240240
set(_HAS_SM120 TRUE)
241241
endif()
242242
if(_HAS_SM120)
243-
set(SM120A_FILE csrc/kernels_nvfp4_sm120.cu)
244-
list(APPEND SRC_FILES ${SM120A_FILE})
245-
set_source_files_properties(${SM120A_FILE} PROPERTIES
246-
COMPILE_FLAGS "-gencode=arch=compute_120a,code=sm_120a"
247-
CUDA_ARCHITECTURES "OFF"
243+
# Build as separate OBJECT library with its own CUDA_ARCHITECTURES
244+
# to avoid conflict with the global architecture settings
245+
add_library(nvfp4_sm120a OBJECT csrc/kernels_nvfp4_sm120.cu)
246+
set_target_properties(nvfp4_sm120a PROPERTIES
247+
CUDA_ARCHITECTURES "120a"
248+
POSITION_INDEPENDENT_CODE ON
249+
CUDA_SEPARABLE_COMPILATION OFF
248250
)
251+
target_compile_options(nvfp4_sm120a PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--use_fast_math>)
249252
message(STATUS "NVFP4 SM_120a GEMM kernel enabled")
250253
endif()
251254

@@ -337,6 +340,11 @@ add_library(bitsandbytes SHARED ${SRC_FILES})
337340
target_compile_features(bitsandbytes PUBLIC cxx_std_17)
338341
target_include_directories(bitsandbytes PUBLIC csrc)
339342

343+
# Link NVFP4 SM_120a object library if available
344+
if(TARGET nvfp4_sm120a)
345+
target_sources(bitsandbytes PRIVATE $<TARGET_OBJECTS:nvfp4_sm120a>)
346+
endif()
347+
340348
if (BUILD_CPU)
341349
if (OpenMP_CXX_FOUND)
342350
target_link_libraries(bitsandbytes PRIVATE OpenMP::OpenMP_CXX)

0 commit comments

Comments
 (0)