Skip to content
This repository was archived by the owner on Apr 6, 2026. It is now read-only.

Commit b4accba

Browse files
[XPU]Add support for cutlass-sycl (#200)
Co-authored-by: YangKai0616 <kai.yang@intel.com>
1 parent 85da46f commit b4accba

6 files changed

Lines changed: 93 additions & 8 deletions

File tree

build2cmake/src/config/v2.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ pub enum Dependencies {
217217
Cutlass3_8,
218218
#[serde(rename = "cutlass_3_9")]
219219
Cutlass3_9,
220+
#[serde(rename = "cutlass_sycl_3_9")]
221+
CutlassSycl3_9,
220222
Torch,
221223
}
222224

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
find_package(CutlassSycl)
2+
3+
if (NOT CutlassSycl_FOUND)
4+
set(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library")
5+
set(CUTLASS_ENABLE_BENCHMARKS OFF CACHE BOOL "Disable CUTLASS Benchmarks")
6+
7+
# Set CUTLASS_REVISION manually -- its revision detection doesn't work in this case.
8+
set(CUTLASS_REVISION "v{{ version }}" CACHE STRING "CUTLASS revision to use")
9+
10+
# Use the specified CUTLASS source directory for compilation if CUTLASS_SYCL_SRC_DIR is provided
11+
if (DEFINED ENV{CUTLASS_SYCL_SRC_DIR})
12+
set(CUTLASS_SYCL_SRC_DIR $ENV{CUTLASS_SYCL_SRC_DIR})
13+
endif()
14+
15+
if(CUTLASS_SYCL_SRC_DIR)
16+
if(NOT IS_ABSOLUTE CUTLASS_SYCL_SRC_DIR)
17+
get_filename_component(CUTLASS_SYCL_SRC_DIR "${CUTLASS_SYCL_SRC_DIR}" ABSOLUTE)
18+
endif()
19+
message(STATUS "The CUTLASS_SYCL_SRC_DIR is set, using ${CUTLASS_SYCL_SRC_DIR} for compilation")
20+
FetchContent_Declare(cutlass SOURCE_DIR ${CUTLASS_SYCL_SRC_DIR})
21+
else()
22+
FetchContent_Declare(
23+
cutlass
24+
GIT_REPOSITORY https://github.com/intel/cutlass-sycl.git
25+
GIT_TAG ${CUTLASS_REVISION}
26+
GIT_PROGRESS TRUE
27+
28+
# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
29+
# Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags.
30+
# So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE
31+
GIT_SHALLOW TRUE
32+
)
33+
endif()
34+
35+
# Set Intel backend env
36+
message(STATUS "Setting Intel GPU optimization env vars for Cutlass-SYCL")
37+
string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_link_flags "${sycl_link_flags}")
38+
string(REPLACE "-device pvc,xe-lpg,ats-m150" "" sycl_link_flags "${sycl_link_flags}")
39+
string(APPEND sycl_link_flags "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier;")
40+
string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_flags "${sycl_flags}")
41+
42+
set(CUTLASS_ENABLE_SYCL ON CACHE BOOL "Enable SYCL for CUTLASS")
43+
add_compile_definitions(CUTLASS_ENABLE_SYCL=1)
44+
set(DPCPP_SYCL_TARGET "intel_gpu_pvc" CACHE STRING "SYCL target for Intel GPU")
45+
add_compile_definitions(DPCPP_SYCL_TARGET=intel_gpu_pvc)
46+
set(SYCL_INTEL_TARGET ON CACHE BOOL "Enable SYCL for INTEL")
47+
add_compile_definitions(SYCL_INTEL_TARGET=1)
48+
49+
set(ENV{SYCL_PROGRAM_COMPILE_OPTIONS} "-ze-opt-large-register-file")
50+
set(ENV{IGC_VISAOptions} "-perfmodel")
51+
set(ENV{IGC_VectorAliasBBThreshold} "10000")
52+
set(ENV{IGC_ExtraOCLOptions} "-cl-intel-256-GRF-per-thread")
53+
54+
FetchContent_MakeAvailable(cutlass)
55+
56+
include_directories(${CUTLASS_INCLUDE_DIR})
57+
include_directories(${CUTLASS_TOOLS_UTIL_INCLUDE_DIR})
58+
else()
59+
message(STATUS "Using system cutlass with version: ${CutlassSycl_VERSION}")
60+
endif(NOT CutlassSycl_FOUND)

build2cmake/src/templates/xpu/preamble.cmake

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,22 @@
11
cmake_minimum_required(VERSION 3.26)
22

33
# Set Intel SYCL compiler before project() call
4+
find_program(ICX_COMPILER icx)
45
find_program(ICPX_COMPILER icpx)
5-
if(ICPX_COMPILER)
6+
if(ICX_COMPILER AND ICPX_COMPILER)
7+
set(CMAKE_C_COMPILER ${ICX_COMPILER})
68
set(CMAKE_CXX_COMPILER ${ICPX_COMPILER})
7-
message(STATUS "Using Intel SYCL compiler: ${ICPX_COMPILER}")
9+
message(STATUS "Using Intel SYCL C++ compiler: ${ICPX_COMPILER} and C compiler: ${ICX_COMPILER}")
810
else()
9-
message(FATAL_ERROR "Intel SYCL compiler (icpx) not found. Please install Intel oneAPI toolkit.")
11+
message(FATAL_ERROR "Intel SYCL C++ compiler (icpx) and/or C compiler (icx) not found. Please install Intel oneAPI toolkit.")
1012
endif()
1113

1214
project({{ name }})
1315

16+
include(FetchContent)
17+
file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
18+
message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")
19+
1420
include("cmake/utils.cmake")
1521

1622
# Find Python with all necessary components for building extensions
@@ -42,6 +48,6 @@ add_compile_definitions(USE_XPU)
4248

4349
# Set SYCL-specific flags
4450
# Set comprehensive SYCL compilation and linking flags
45-
set(sycl_link_flags "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required'")
51+
set(sycl_link_flags "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required';")
4652
set(sycl_flags "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;")
4753
message(STATUS "Configuring for Intel XPU backend using SYCL")

build2cmake/src/torch/cuda.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,9 @@ fn render_deps(env: &Environment, build: &Build, write: &mut impl Write) -> Resu
277277
.wrap_err("Cannot render CUTLASS dependency template")?;
278278
}
279279
Dependencies::Torch => (),
280+
_ => {
281+
eprintln!("Warning: CUDA backend doesn't need/support dependency: {dep:?}");
282+
}
280283
};
281284
write.write_all(b"\n")?;
282285
}

build2cmake/src/torch/xpu.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ fn write_cmake(
147147

148148
render_preamble(env, name, cmake_writer)?;
149149

150-
render_deps(build, cmake_writer)?;
150+
render_deps(env, build, cmake_writer)?;
151151

152152
render_binding(env, torch, name, cmake_writer)?;
153153

@@ -187,7 +187,7 @@ fn render_binding(
187187
Ok(())
188188
}
189189

190-
fn render_deps(build: &Build, write: &mut impl Write) -> Result<()> {
190+
fn render_deps(env: &Environment, build: &Build, write: &mut impl Write) -> Result<()> {
191191
let mut deps = HashSet::new();
192192

193193
for kernel in build.kernels.values() {
@@ -196,10 +196,21 @@ fn render_deps(build: &Build, write: &mut impl Write) -> Result<()> {
196196

197197
for dep in deps {
198198
match dep {
199+
Dependencies::CutlassSycl3_9 => {
200+
env.get_template("xpu/dep-cutlass-sycl.cmake")
201+
.wrap_err("Cannot get CUTLASS-SYCL dependency template")?
202+
.render_to_write(
203+
context! {
204+
version => "3.9-0.3",
205+
},
206+
&mut *write,
207+
)
208+
.wrap_err("Cannot render CUTLASS-SYCL dependency template")?;
209+
}
199210
Dependencies::Torch => (),
200211
_ => {
201-
// XPU doesn't support CUTLASS dependencies yet
202-
eprintln!("Warning: XPU backend doesn't support dependency: {dep:?}");
212+
// XPU supports CUTLASS-SYCL instead of CUTLASS
213+
eprintln!("Warning: XPU backend doesn't need/support dependency: {dep:?}");
203214
}
204215
}
205216
write.write_all(b"\n")?;

lib/deps.nix

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ let
2525
"cutlass_3_9" = [
2626
pkgs.cutlass_3_9
2727
];
28+
"cutlass_sycl_3_9" = [
29+
pkgs.cutlass_sycl_3_9
30+
];
2831
"torch" = [
2932
torch
3033
torch.cxxdev

0 commit comments

Comments
 (0)