Skip to content
This repository was archived by the owner on Apr 6, 2026. It is now read-only.

Commit d1ee9bf

Browse files
authored
Append sycl flags for cutlass in 2025.2.x (#233)
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
1 parent 9b791a8 commit d1ee9bf

3 files changed

Lines changed: 15 additions & 6 deletions

File tree

build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
find_package(CutlassSycl)
22

3+
set(CUTLASS_SYCL_REVISION "v{{ version }}" CACHE STRING "CUTLASS revision to use")
34
if (NOT CutlassSycl_FOUND)
45
set(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library")
56
set(CUTLASS_ENABLE_BENCHMARKS OFF CACHE BOOL "Disable CUTLASS Benchmarks")
6-
7-
# Set CUTLASS_REVISION manually -- its revision detection doesn't work in this case.
8-
set(CUTLASS_REVISION "v{{ version }}" CACHE STRING "CUTLASS revision to use")
9-
107
# Use the specified CUTLASS source directory for compilation if CUTLASS_SYCL_SRC_DIR is provided
118
if (DEFINED ENV{CUTLASS_SYCL_SRC_DIR})
129
set(CUTLASS_SYCL_SRC_DIR $ENV{CUTLASS_SYCL_SRC_DIR})
@@ -22,7 +19,7 @@ if (NOT CutlassSycl_FOUND)
2219
FetchContent_Declare(
2320
cutlass
2421
GIT_REPOSITORY https://github.com/intel/cutlass-sycl.git
25-
GIT_TAG ${CUTLASS_REVISION}
22+
GIT_TAG ${CUTLASS_SYCL_REVISION}
2623
GIT_PROGRESS TRUE
2724

2825
# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
@@ -54,8 +51,15 @@ else()
5451
include_directories(${CUTLASS_INCLUDE_DIR})
5552
include_directories(${CUTLASS_TOOLS_UTIL_INCLUDE_DIR})
5653
endif(NOT CutlassSycl_FOUND)
54+
if(CUTLASS_SYCL_REVISION MATCHES "^v3\\.9")
55+
add_compile_definitions(OLD_API=1)
56+
endif()
57+
5758
string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_link_flags "${sycl_link_flags}")
5859
string(REPLACE "-device pvc,xe-lpg,ats-m150" "" sycl_link_flags "${sycl_link_flags}")
59-
string(APPEND sycl_link_flags "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier;")
60+
string(APPEND sycl_link_flags "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier")
61+
if(CUTLASS_SYCL_REVISION STREQUAL "v0.5")
62+
string(APPEND sycl_link_flags ",+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate")
63+
endif()
6064
string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_flags "${sycl_flags}")
6165

build2cmake/src/torch/xpu.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ fn render_deps(env: &Environment, build: &Build, write: &mut impl Write) -> Resu
202202
let version = match dpcpp_version.as_str() {
203203
"2025.0" => "3.9-0.2",
204204
"2025.1" => "3.9-0.3",
205+
"2025.2" => "0.5",
205206
_ => bail!(
206207
"No cutlass_sycl version mapped for DPCPP_VERSION {}",
207208
dpcpp_version

examples/cutlass-gemm/gemm_sycl.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,5 +166,9 @@ void cutlass_gemm(torch::Tensor &out, torch::Tensor const &A, torch::Tensor cons
166166
TORCH_CHECK(gemm_op.can_implement(arguments) == cutlass::Status::kSuccess, "Invalid GEMM problem size or configuration");
167167
CUTLASS_CHECK(gemm_op.initialize(arguments, workspace.get()));
168168
CUTLASS_CHECK(gemm_op.run());
169+
#if defined(OLD_API)
169170
syclcompat::wait();
171+
#else
172+
compat::wait();
173+
#endif
170174
}

0 commit comments

Comments
 (0)