diff --git a/build2cmake/src/templates/cpu/preamble.cmake b/build2cmake/src/templates/cpu/preamble.cmake
deleted file mode 100644
index 2b118987..00000000
--- a/build2cmake/src/templates/cpu/preamble.cmake
+++ /dev/null
@@ -1,50 +0,0 @@
-cmake_minimum_required(VERSION 3.26)
-project({{name}} LANGUAGES CXX)
-
-set(CMAKE_OSX_DEPLOYMENT_TARGET "15.0" CACHE STRING "Minimum macOS deployment version")
-
-install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
-
-include(FetchContent)
-file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
-message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")
-
-include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/cmake/kernel.cmake)
-
-if(DEFINED Python3_EXECUTABLE)
-  # Allow passing through the interpreter (e.g. from setup.py).
-  find_package(Python3 COMPONENTS Development Development.SABIModule Interpreter)
-  if (NOT Python3_FOUND)
-    message(FATAL_ERROR "Unable to find python matching: ${EXECUTABLE}.")
-  endif()
-else()
-  find_package(Python3 REQUIRED COMPONENTS Development Development.SABIModule Interpreter)
-endif()
-
-append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
-
-find_package(Torch REQUIRED)
-
-run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version")
-
-{% if torch_minver %}
-if (TORCH_VERSION VERSION_LESS {{ torch_minver }})
-  message(FATAL_ERROR "Torch version ${TORCH_VERSION} is too old. "
-    "Minimum required version is {{ torch_minver }}.")
-endif()
-{% endif %}
-
-{% if torch_maxver %}
-if (TORCH_VERSION VERSION_GREATER {{ torch_maxver }})
-  message(FATAL_ERROR "Torch version ${TORCH_VERSION} is too new. "
-    "Maximum supported version is {{ torch_maxver }}.")
-endif()
-{% endif %}
-
-set(GPU_LANG "CPU")
-
-add_compile_definitions(CPU_KERNEL)
-
-# Initialize SRC list for kernel and binding sources
-set(SRC "")
diff --git a/build2cmake/src/templates/get_gpu_lang.cmake b/build2cmake/src/templates/get_gpu_lang.cmake
new file mode 100644
index 00000000..004f219c
--- /dev/null
+++ b/build2cmake/src/templates/get_gpu_lang.cmake
@@ -0,0 +1,17 @@
+#
+# Get the GPU language from Torch.
+#
+function(get_gpu_lang OUT)
+    execute_process(
+    COMMAND
+    "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/get_gpu_lang.py"
+    OUTPUT_VARIABLE PYTHON_OUT
+    RESULT_VARIABLE PYTHON_ERROR_CODE
+    ERROR_VARIABLE PYTHON_STDERR
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+    if(NOT PYTHON_ERROR_CODE EQUAL 0)
+        message(FATAL_ERROR "Cannot detect GPU language: ${PYTHON_STDERR}")
+    endif()
+    set(${OUT} ${PYTHON_OUT} PARENT_SCOPE)
+endfunction()
diff --git a/build2cmake/src/templates/get_gpu_lang.py b/build2cmake/src/templates/get_gpu_lang.py
new file mode 100644
index 00000000..1eedff7e
--- /dev/null
+++ b/build2cmake/src/templates/get_gpu_lang.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+
+import sys
+
+try:
+    import torch
+except ImportError:
+    print("Torch is required for configuring a kernel build.", file=sys.stderr)
+    sys.exit(1)
+
+if torch.version.cuda is not None:
+    print("CUDA")
+elif torch.version.hip is not None:
+    print("HIP")
+elif torch.backends.mps.is_available():
+    print("METAL")
+elif hasattr(torch.version, "xpu") and torch.version.xpu is not None:
+    print("SYCL")
+else:
+    print("CPU")
diff --git a/build2cmake/src/templates/metal/preamble.cmake b/build2cmake/src/templates/metal/preamble.cmake
deleted file mode 100644
index 8f67ca0f..00000000
--- a/build2cmake/src/templates/metal/preamble.cmake
+++ /dev/null
@@ -1,54 +0,0 @@
-cmake_minimum_required(VERSION 3.26)
-project({{name}} LANGUAGES CXX C OBJC OBJCXX)
-
-set(CMAKE_OSX_DEPLOYMENT_TARGET "26.0" CACHE STRING "Minimum macOS deployment version")
-
-install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
-
-include(FetchContent)
-file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
-message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")
-
-include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/cmake/kernel.cmake)
-
-if(DEFINED Python3_EXECUTABLE)
-  # Allow passing through the interpreter (e.g. from setup.py).
-  find_package(Python3 COMPONENTS Development Development.SABIModule Interpreter)
-  if (NOT Python3_FOUND)
-    message(FATAL_ERROR "Unable to find python matching: ${EXECUTABLE}.")
-  endif()
-else()
-  find_package(Python3 REQUIRED COMPONENTS Development Development.SABIModule Interpreter)
-endif()
-
-append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
-
-find_package(Torch REQUIRED)
-
-run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version")
-
-{% if torch_minver %}
-if (TORCH_VERSION VERSION_LESS {{ torch_minver }})
-  message(FATAL_ERROR "Torch version ${TORCH_VERSION} is too old. "
-    "Minimum required version is {{ torch_minver }}.")
-endif()
-{% endif %}
-
-{% if torch_maxver %}
-if (TORCH_VERSION VERSION_GREATER {{ torch_maxver }})
-  message(FATAL_ERROR "Torch version ${TORCH_VERSION} is too new. "
-    "Maximum supported version is {{ torch_maxver }}.")
-endif()
-{% endif %}
-
-set(GPU_LANG "METAL")
-
-add_compile_definitions(METAL_KERNEL)
-
-# Initialize SRC list for kernel and binding sources
-set(SRC "")
-
-# Initialize lists for Metal shader sources and their include directories
-set(ALL_METAL_SOURCES)
-set(METAL_INCLUDE_DIRS)
diff --git a/build2cmake/src/templates/cuda/preamble.cmake b/build2cmake/src/templates/preamble.cmake
similarity index 62%
rename from build2cmake/src/templates/cuda/preamble.cmake
rename to build2cmake/src/templates/preamble.cmake
index 187ea905..a1590af0 100644
--- a/build2cmake/src/templates/cuda/preamble.cmake
+++ b/build2cmake/src/templates/preamble.cmake
@@ -1,8 +1,6 @@
 cmake_minimum_required(VERSION 3.26)
 project({{name}} LANGUAGES CXX)
 
-set(TARGET_DEVICE "cuda" CACHE STRING "Target device backend for kernel")
-
 install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
 
 include(FetchContent)
@@ -13,6 +11,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1
 
 include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/cmake/kernel.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/cmake/get_gpu_lang.cmake)
 
 if(DEFINED Python3_EXECUTABLE)
   # Allow passing through the interpreter (e.g. from setup.py).
@@ -30,6 +29,10 @@ find_package(Torch REQUIRED)
 
 run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version")
 
+get_gpu_lang(DETECTED_GPU_LANG)
+set(GPU_LANG "${DETECTED_GPU_LANG}" CACHE STRING "GPU language")
+message(STATUS "Using GPU language: ${GPU_LANG}")
+
 {% if torch_minver %}
 if (TORCH_VERSION VERSION_LESS {{ torch_minver }})
   message(FATAL_ERROR "Torch version ${TORCH_VERSION} is too old. "
@@ -44,11 +47,6 @@ if (TORCH_VERSION VERSION_GREATER {{ torch_maxver }})
 endif()
 {% endif %}
 
-if (NOT TARGET_DEVICE STREQUAL "cuda" AND
-    NOT TARGET_DEVICE STREQUAL "rocm")
-    return()
-endif()
-
 option(BUILD_ALL_SUPPORTED_ARCHS "Build all supported architectures" off)
 
 if(DEFINED CMAKE_CUDA_COMPILER_VERSION AND
@@ -61,8 +59,12 @@ else()
   set(CUDA_DEFAULT_KERNEL_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0+PTX")
 endif()
 
-if (NOT HIP_FOUND AND CUDA_FOUND)
-  set(GPU_LANG "CUDA")
+
+# Basic checks for each GPU language.
+if(GPU_LANG STREQUAL "CUDA")
+  if(NOT CUDA_FOUND)
+    message(FATAL_ERROR "GPU language is set to CUDA, but cannot find CUDA toolkit")
+  endif()
 
   {% if cuda_minver %}
     if (CUDA_VERSION VERSION_LESS {{ cuda_minver }})
@@ -78,18 +80,42 @@ if (NOT HIP_FOUND AND CUDA_FOUND)
     endif()
   {% endif %}
 
-elseif(HIP_FOUND)
-  set(GPU_LANG "HIP")
+  # TODO: deprecate one of these settings.
+  add_compile_definitions(USE_CUDA=1)
+  add_compile_definitions(CUDA_KERNEL)
+elseif(GPU_LANG STREQUAL "HIP")
+  if(NOT HIP_FOUND)
+    message(FATAL_ERROR "GPU language is set to HIP, but cannot find ROCm toolkit")
+  endif()
 
   # Importing torch recognizes and sets up some HIP/ROCm configuration but does
   # not let cmake recognize .hip files. In order to get cmake to understand the
   # .hip extension automatically, HIP must be enabled explicitly.
   enable_language(HIP)
+
+  # TODO: deprecate one of these settings.
+  add_compile_definitions(USE_ROCM=1)
+  add_compile_definitions(ROCM_KERNEL)
+elseif(GPU_LANG STREQUAL "CPU")
+  add_compile_definitions(CPU_KERNEL)
+  set(CMAKE_OSX_DEPLOYMENT_TARGET "15.0" CACHE STRING "Minimum macOS deployment version")
+elseif(GPU_LANG STREQUAL "METAL")
+  set(CMAKE_OSX_DEPLOYMENT_TARGET "26.0" CACHE STRING "Minimum macOS deployment version")
+  enable_language(C OBJC OBJCXX)
+
+  add_compile_definitions(METAL_KERNEL)
+
+  # Initialize lists for Metal shader sources and their include directories
+  set(ALL_METAL_SOURCES)
+  set(METAL_INCLUDE_DIRS)
+elseif(GPU_LANG STREQUAL "SYCL")
+  add_compile_definitions(XPU_KERNEL)
+  add_compile_definitions(USE_XPU)
 else()
-  message(FATAL_ERROR "Can't find CUDA or HIP installation.")
+  message(FATAL_ERROR "Unsupported GPU language: ${GPU_LANG}")
 endif()
 
-
+# CUDA build options.
 if(GPU_LANG STREQUAL "CUDA")
   # This clears out -gencode arguments from `CMAKE_CUDA_FLAGS`, which we need
   # to set our own set of capabilities.
@@ -116,13 +142,40 @@ if(GPU_LANG STREQUAL "CUDA")
     list(APPEND GPU_FLAGS "--threads=${NVCC_THREADS}")
   endif()
 
-  add_compile_definitions(CUDA_KERNEL)
 elseif(GPU_LANG STREQUAL "HIP")
   override_gpu_arches(GPU_ARCHES HIP ${HIP_SUPPORTED_ARCHS})
   set(ROCM_ARCHS ${GPU_ARCHES})
   message(STATUS "ROCM supported target architectures: ${ROCM_ARCHS}")
+elseif(GPU_LANG STREQUAL "SYCL")
+  find_program(ICX_COMPILER icx)
+  find_program(ICPX_COMPILER icpx)
 
-  add_compile_definitions(ROCM_KERNEL)
+  if(NOT ICX_COMPILER AND NOT ICPX_COMPILER)
+    message(FATAL_ERROR "Intel SYCL C++ compiler (icpx) and/or C compiler (icx) not found. Please install Intel oneAPI toolkit.")
+  endif()
+
+  execute_process(
+    COMMAND ${ICPX_COMPILER} --version
+    OUTPUT_VARIABLE ICPX_VERSION_OUTPUT
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+  string(REGEX MATCH "[0-9]+\\.[0-9]+" DPCPP_VERSION "${ICPX_VERSION_OUTPUT}")
+  set(DPCPP_VERSION "${DPCPP_VERSION}" CACHE STRING "DPCPP major.minor version")
+  set(CMAKE_C_COMPILER ${ICX_COMPILER})
+
+  # On Windows, use icx (MSVC-compatible) for C++ to work with Ninja generator
+  # On Linux, use icpx (GNU-compatible) for C++
+  if(WIN32)
+    set(CMAKE_CXX_COMPILER ${ICX_COMPILER})
+    message(STATUS "Using Intel SYCL C++ compiler: ${ICX_COMPILER} and C compiler: ${ICX_COMPILER} Version: ${DPCPP_VERSION} (Windows MSVC-compatible mode)")
+  else()
+    set(CMAKE_CXX_COMPILER ${ICPX_COMPILER})
+    message(STATUS "Using Intel SYCL C++ compiler: ${ICPX_COMPILER} and C compiler: ${ICX_COMPILER} Version: ${DPCPP_VERSION}")
+  endif()
+
+  set(sycl_link_flags "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required';")
+  set(GPU_FLAGS "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;")
+  set(GPU_ARCHES "")
 else()
   override_gpu_arches(GPU_ARCHES
     ${GPU_LANG}
@@ -133,17 +186,11 @@ endif()
 set(SRC "")
 
 message(STATUS "Rendered for platform {{ platform }}")
+
 {% if platform == 'windows' %}
 include(${CMAKE_CURRENT_LIST_DIR}/cmake/windows.cmake)
 
-if(GPU_LANG STREQUAL "CUDA")
-  add_compile_definitions(USE_CUDA=1)
-elseif(GPU STREQUAL "HIP")
-  add_compile_definitions(USE_ROCM=1)
-endif()
-
 # Generate standardized build name
-run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version")
 cmake_host_system_information(RESULT HOST_ARCH QUERY OS_PLATFORM)
 
 set(SYSTEM_STRING "${HOST_ARCH}-windows")
@@ -153,5 +200,9 @@ if(GPU_LANG STREQUAL "CUDA")
 elseif(GPU_LANG STREQUAL "HIP")
   run_python(ROCM_VERSION "import torch.version; print(torch.version.hip.split('.')[0] + '.' + torch.version.hip.split('.')[1])" "Failed to get ROCm version")
   generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" "rocm" "${ROCM_VERSION}" "${SYSTEM_STRING}")
+elseif(GPU_LANG STREQUAL "SYCL")
+  generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" "xpu" "${DPCPP_VERSION}")
+else()
+  generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" "cpu" "${SYSTEM_STRING}")
 endif()
 {% endif %}
diff --git a/build2cmake/src/templates/xpu/preamble.cmake b/build2cmake/src/templates/xpu/preamble.cmake
deleted file mode 100644
index 1687264a..00000000
--- a/build2cmake/src/templates/xpu/preamble.cmake
+++ /dev/null
@@ -1,95 +0,0 @@
-cmake_minimum_required(VERSION 3.26)
-
-# Set Intel SYCL compiler before project() call
-find_program(ICX_COMPILER icx)
-find_program(ICPX_COMPILER icpx)
-if(ICX_COMPILER AND ICPX_COMPILER)
-    execute_process(
-      COMMAND ${ICPX_COMPILER} --version
-      OUTPUT_VARIABLE ICPX_VERSION_OUTPUT
-      OUTPUT_STRIP_TRAILING_WHITESPACE
-    )
-    string(REGEX MATCH "[0-9]+\\.[0-9]+" DPCPP_VERSION "${ICPX_VERSION_OUTPUT}")
-    set(DPCPP_VERSION "${DPCPP_VERSION}" CACHE STRING "DPCPP major.minor version")
-    set(CMAKE_C_COMPILER ${ICX_COMPILER})
-
-    # On Windows, use icx (MSVC-compatible) for C++ to work with Ninja generator
-    # On Linux, use icpx (GNU-compatible) for C++
-    if(WIN32)
-        set(CMAKE_CXX_COMPILER ${ICX_COMPILER})
-        message(STATUS "Using Intel SYCL C++ compiler: ${ICX_COMPILER} and C compiler: ${ICX_COMPILER} Version: ${DPCPP_VERSION} (Windows MSVC-compatible mode)")
-    else()
-        set(CMAKE_CXX_COMPILER ${ICPX_COMPILER})
-        message(STATUS "Using Intel SYCL C++ compiler: ${ICPX_COMPILER} and C compiler: ${ICX_COMPILER} Version: ${DPCPP_VERSION}")
-    endif()
-else()
-    message(FATAL_ERROR "Intel SYCL C++ compiler (icpx) and/or C compiler (icx) not found. Please install Intel oneAPI toolkit.")
-endif()
-
-project({{ name }})
-
-include(FetchContent)
-file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
-message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")
-
-include("cmake/utils.cmake")
-include("cmake/kernel.cmake")
-
-# Find Python with all necessary components for building extensions
-find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module Development.SABIModule)
-
-append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
-
-find_package(Torch REQUIRED)
-
-# Intel XPU backend detection and setup
-if(NOT TORCH_VERSION)
-  run_python(TORCH_VERSION "import torch; print(torch.__version__.split('+')[0])" "Failed to get Torch version")
-endif()
-
-{% if torch_minver %}
-if (TORCH_VERSION VERSION_LESS {{ torch_minver }})
-  message(FATAL_ERROR "Torch version ${TORCH_VERSION} is too old. "
-    "Minimum required version is {{ torch_minver }}.")
-endif()
-{% endif %}
-
-{% if torch_maxver %}
-if (TORCH_VERSION VERSION_GREATER {{ torch_maxver }})
-  message(FATAL_ERROR "Torch version ${TORCH_VERSION} is too new. "
-    "Maximum supported version is {{ torch_maxver }}.")
-endif()
-{% endif %}
-
-# Check for Intel XPU support in PyTorch
-run_python(XPU_AVAILABLE
-  "import torch; print('true' if hasattr(torch, 'xpu') else 'false')"
-  "Failed to check XPU availability")
-
-if(NOT XPU_AVAILABLE STREQUAL "true")
-  message(WARNING "Intel XPU is not available in this PyTorch installation. XPU kernels will be skipped.")
-  return()
-endif()
-
-# Set up XPU compilation flags
-set(GPU_LANG "SYCL")
-add_compile_definitions(XPU_KERNEL)
-add_compile_definitions(USE_XPU)
-
-# Set SYCL-specific flags
-# Set comprehensive SYCL compilation and linking flags
-set(sycl_link_flags "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required';")
-set(GPU_FLAGS "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;")
-set(GPU_ARCHES "")
-message(STATUS "Configuring for Intel XPU backend using SYCL")
-
-{% if platform == 'windows' %}
-# Include Windows-specific functions for local_install and kernels_install targets
-include(${CMAKE_CURRENT_LIST_DIR}/cmake/windows.cmake)
-
-# Generate build variant name for XPU (e.g., torch291-xpu-x86_64-windows)
-generate_build_name(BUILD_VARIANT_NAME "${TORCH_VERSION}" "xpu" "${DPCPP_VERSION}")
-{% endif %}
-
-# Initialize SRC list for kernel and binding sources
-set(SRC "")
diff --git a/build2cmake/src/torch/common.rs b/build2cmake/src/torch/common.rs
index feb37825..64d298ff 100644
--- a/build2cmake/src/torch/common.rs
+++ b/build2cmake/src/torch/common.rs
@@ -7,6 +7,7 @@ use minijinja::{context, Environment};
 
 use crate::config::{Backend, General, Torch};
 use crate::metadata::Metadata;
+use crate::version::Version;
 use crate::FileSet;
 
 static REGISTRATION_H: &str = include_str!("../templates/registration.h");
@@ -16,6 +17,8 @@ static WINDOWS_UTILS: &str = include_str!("../templates/windows.cmake");
 static HIPIFY: &str = include_str!("../templates/cuda/hipify.py");
 static COMPILE_METAL_CMAKE: &str = include_str!("../templates/metal/compile-metal.cmake");
 static METALLIB_TO_HEADER_PY: &str = include_str!("../templates/metal/metallib_to_header.py");
+static GET_GPU_LANG: &str = include_str!("../templates/get_gpu_lang.cmake");
+static GET_GPU_LANG_PY: &str = include_str!("../templates/get_gpu_lang.py");
 
 pub fn write_setup_py(
     env: &Environment,
@@ -187,6 +190,8 @@ pub fn write_cmake_helpers(file_set: &mut FileSet) {
         "metallib_to_header.py",
         METALLIB_TO_HEADER_PY.as_bytes(),
     );
+    write_cmake_file(file_set, "get_gpu_lang.cmake", GET_GPU_LANG.as_bytes());
+    write_cmake_file(file_set, "get_gpu_lang.py", GET_GPU_LANG_PY.as_bytes());
 }
 
 pub fn render_extension(
@@ -211,3 +216,32 @@ pub fn render_extension(
 
     Ok(())
 }
+
+pub fn render_preamble(
+    env: &Environment,
+    name: &str,
+    cuda_minver: Option<&Version>,
+    cuda_maxver: Option<&Version>,
+    torch_minver: Option<&Version>,
+    torch_maxver: Option<&Version>,
+    write: &mut impl Write,
+) -> Result<()> {
+    env.get_template("preamble.cmake")
+        .wrap_err("Cannot get CMake prelude template")?
+        .render_to_write(
+            context! {
+                name => name,
+                cuda_minver => cuda_minver.map(|v| v.to_string()),
+                cuda_maxver => cuda_maxver.map(|v| v.to_string()),
+                torch_minver => torch_minver.map(|v| v.to_string()),
+                torch_maxver => torch_maxver.map(|v| v.to_string()),
+                platform => std::env::consts::OS
+            },
+            &mut *write,
+        )
+        .wrap_err("Cannot render CMake prelude template")?;
+
+    write.write_all(b"\n")?;
+
+    Ok(())
+}
diff --git a/build2cmake/src/torch/cpu.rs b/build2cmake/src/torch/cpu.rs
index f73ccfc9..a4cc6cf3 100644
--- a/build2cmake/src/torch/cpu.rs
+++ b/build2cmake/src/torch/cpu.rs
@@ -1,17 +1,16 @@
-use std::{io::Write, path::PathBuf};
+use std::path::PathBuf;
 
-use eyre::{bail, Context, Result};
-use minijinja::{context, Environment};
+use eyre::{bail, Result};
+use minijinja::Environment;
 
 use crate::config::{Backend, Build, Torch};
 use crate::fileset::FileSet;
 use crate::torch::common::{
-    render_binding, render_extension, write_cmake_helpers, write_metadata, write_ops_py,
-    write_pyproject_toml, write_setup_py, write_torch_registration_macros,
+    render_binding, render_extension, render_preamble, write_cmake_helpers, write_metadata,
+    write_ops_py, write_pyproject_toml, write_setup_py, write_torch_registration_macros,
 };
 use crate::torch::kernel::render_kernel_components;
 use crate::torch::kernel_ops_identifier;
-use crate::version::Version;
 
 pub fn write_torch_ext_cpu(
     env: &Environment,
@@ -71,6 +70,8 @@ fn write_cmake(
     render_preamble(
         env,
         name,
+        None,
+        None,
         torch.minver.as_ref(),
         torch.maxver.as_ref(),
         cmake_writer,
@@ -87,27 +88,3 @@ fn write_cmake(
 
     Ok(())
 }
-
-fn render_preamble(
-    env: &Environment,
-    name: &str,
-    torch_minver: Option<&Version>,
-    torch_maxver: Option<&Version>,
-    write: &mut impl Write,
-) -> Result<()> {
-    env.get_template("cpu/preamble.cmake")
-        .wrap_err("Cannot get CMake prelude template")?
-        .render_to_write(
-            context! {
-                name => name,
-                torch_minver => torch_minver.map(|v| v.to_string()),
-                torch_maxver => torch_maxver.map(|v| v.to_string()),
-            },
-            &mut *write,
-        )
-        .wrap_err("Cannot render CMake prelude template")?;
-
-    write.write_all(b"\n")?;
-
-    Ok(())
-}
diff --git a/build2cmake/src/torch/cuda.rs b/build2cmake/src/torch/cuda.rs
index 54093da1..a94fe7bc 100644
--- a/build2cmake/src/torch/cuda.rs
+++ b/build2cmake/src/torch/cuda.rs
@@ -1,19 +1,16 @@
-use std::env;
-use std::io::Write;
 use std::path::PathBuf;
 
-use eyre::{bail, Context, Result};
-use minijinja::{context, Environment};
+use eyre::{bail, Result};
+use minijinja::Environment;
 
 use crate::config::{Backend, Build, Torch};
 use crate::torch::common::{
-    render_binding, render_extension, write_cmake_helpers, write_metadata, write_ops_py,
-    write_pyproject_toml, write_setup_py, write_torch_registration_macros,
+    render_binding, render_extension, render_preamble, write_cmake_helpers, write_metadata,
+    write_ops_py, write_pyproject_toml, write_setup_py, write_torch_registration_macros,
 };
 use crate::torch::deps::render_deps;
 use crate::torch::kernel::render_kernel_components;
 use crate::torch::kernel_ops_identifier;
-use crate::version::Version;
 use crate::FileSet;
 
 pub fn write_torch_ext_cuda(
@@ -94,32 +91,3 @@ fn write_cmake(
 
     Ok(())
 }
-
-pub fn render_preamble(
-    env: &Environment,
-    name: &str,
-    cuda_minver: Option<&Version>,
-    cuda_maxver: Option<&Version>,
-    torch_minver: Option<&Version>,
-    torch_maxver: Option<&Version>,
-    write: &mut impl Write,
-) -> Result<()> {
-    env.get_template("cuda/preamble.cmake")
-        .wrap_err("Cannot get CMake prelude template")?
-        .render_to_write(
-            context! {
-                name => name,
-                cuda_minver => cuda_minver.map(|v| v.to_string()),
-                cuda_maxver => cuda_maxver.map(|v| v.to_string()),
-                torch_minver => torch_minver.map(|v| v.to_string()),
-                torch_maxver => torch_maxver.map(|v| v.to_string()),
-                platform => env::consts::OS
-            },
-            &mut *write,
-        )
-        .wrap_err("Cannot render CMake prelude template")?;
-
-    write.write_all(b"\n")?;
-
-    Ok(())
-}
diff --git a/build2cmake/src/torch/metal.rs b/build2cmake/src/torch/metal.rs
index 6342dab4..75ef7ccb 100644
--- a/build2cmake/src/torch/metal.rs
+++ b/build2cmake/src/torch/metal.rs
@@ -1,17 +1,16 @@
-use std::{io::Write, path::PathBuf};
+use std::path::PathBuf;
 
-use eyre::{bail, Context, Result};
-use minijinja::{context, Environment};
+use eyre::{bail, Result};
+use minijinja::Environment;
 
 use crate::config::{Backend, Build, Torch};
 use crate::fileset::FileSet;
 use crate::torch::common::{
-    render_binding, render_extension, write_cmake_helpers, write_metadata, write_ops_py,
-    write_pyproject_toml, write_setup_py, write_torch_registration_macros,
+    render_binding, render_extension, render_preamble, write_cmake_helpers, write_metadata,
+    write_ops_py, write_pyproject_toml, write_setup_py, write_torch_registration_macros,
 };
 use crate::torch::kernel::render_kernel_components;
 use crate::torch::kernel_ops_identifier;
-use crate::version::Version;
 
 pub fn write_torch_ext_metal(
     env: &Environment,
@@ -71,6 +70,8 @@ fn write_cmake(
     render_preamble(
         env,
         name,
+        None,
+        None,
         torch.minver.as_ref(),
         torch.maxver.as_ref(),
         cmake_writer,
@@ -87,27 +88,3 @@ fn write_cmake(
 
     Ok(())
 }
-
-fn render_preamble(
-    env: &Environment,
-    name: &str,
-    torch_minver: Option<&Version>,
-    torch_maxver: Option<&Version>,
-    write: &mut impl Write,
-) -> Result<()> {
-    env.get_template("metal/preamble.cmake")
-        .wrap_err("Cannot get CMake prelude template")?
-        .render_to_write(
-            context! {
-                name => name,
-                torch_minver => torch_minver.map(|v| v.to_string()),
-                torch_maxver => torch_maxver.map(|v| v.to_string()),
-            },
-            &mut *write,
-        )
-        .wrap_err("Cannot render CMake prelude template")?;
-
-    write.write_all(b"\n")?;
-
-    Ok(())
-}
diff --git a/build2cmake/src/torch/xpu.rs b/build2cmake/src/torch/xpu.rs
index 8bb6f5fe..4eecd852 100644
--- a/build2cmake/src/torch/xpu.rs
+++ b/build2cmake/src/torch/xpu.rs
@@ -1,18 +1,16 @@
-use std::io::Write;
 use std::path::PathBuf;
 
-use eyre::{bail, Context, Result};
-use minijinja::{context, Environment};
+use eyre::{bail, Result};
+use minijinja::Environment;
 
 use crate::config::{Backend, Build, Torch};
 use crate::torch::common::{
-    render_binding, render_extension, write_cmake_helpers, write_metadata, write_ops_py,
-    write_pyproject_toml, write_setup_py, write_torch_registration_macros,
+    render_binding, render_extension, render_preamble, write_cmake_helpers, write_metadata,
+    write_ops_py, write_pyproject_toml, write_setup_py, write_torch_registration_macros,
 };
 use crate::torch::deps::render_deps;
 use crate::torch::kernel::render_kernel_components;
 use crate::torch::kernel_ops_identifier;
-use crate::version::Version;
 use crate::FileSet;
 
 pub fn write_torch_ext_xpu(
@@ -73,6 +71,8 @@ fn write_cmake(
     render_preamble(
         env,
         name,
+        None,
+        None,
         torch.minver.as_ref(),
         torch.maxver.as_ref(),
         cmake_writer,
@@ -88,28 +88,3 @@ fn write_cmake(
 
     Ok(())
 }
-
-pub fn render_preamble(
-    env: &Environment,
-    name: &str,
-    torch_minver: Option<&Version>,
-    torch_maxver: Option<&Version>,
-    write: &mut impl Write,
-) -> Result<()> {
-    env.get_template("xpu/preamble.cmake")
-        .wrap_err("Cannot get CMake prelude template")?
-        .render_to_write(
-            context! {
-                name => name,
-                torch_minver => torch_minver.map(|v| v.to_string()),
-                torch_maxver => torch_maxver.map(|v| v.to_string()),
-                platform => std::env::consts::OS,
-            },
-            &mut *write,
-        )
-        .wrap_err("Cannot render CMake prelude template")?;
-
-    write.write_all(b"\n")?;
-
-    Ok(())
-}