diff --git a/CMakePresets.json b/CMakePresets.json index ca4da226ba1..4d8b70f08b2 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -217,6 +217,21 @@ "rhs": "Darwin" } }, + { + "name": "llm-debug-vulkan", + "displayName": "LLM debug build with Vulkan", + "inherits": [ + "llm-debug" + ], + "cacheVariables": { + "EXECUTORCH_BUILD_VULKAN": "ON" + }, + "condition": { + "type": "inList", + "string": "${hostSystemName}", + "list": ["Linux", "Windows"] + } + }, { "name": "llm-metal-stats", "displayName": "LLM Metal build with stats collection and logging", @@ -354,6 +369,15 @@ ], "jobs": 0 }, + { + "name": "llm-debug-vulkan-install", + "displayName": "Build and install LLM extension debug artifacts (Vulkan)", + "configurePreset": "llm-debug-vulkan", + "targets": [ + "install" + ], + "jobs": 0 + }, { "name": "llm-metal-stats-install", "displayName": "Build and install LLM extension artifacts with Metal stats", @@ -449,6 +473,20 @@ } ] }, + { + "name": "llm-debug-vulkan", + "displayName": "Configure, build and install ExecuTorch LLM extension with Vulkan enabled (Debug)", + "steps": [ + { + "type": "configure", + "name": "llm-debug-vulkan" + }, + { + "type": "build", + "name": "llm-debug-vulkan-install" + } + ] + }, { "name": "llm-metal-stats", "displayName": "Configure, build and install ExecuTorch LLM extension with Metal stats and logging", diff --git a/Makefile b/Makefile index 6b8ea37e7b2..9dcfa26027e 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,7 @@ # # ============================================================================== -.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help +.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-vulkan dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help help: @echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make \`. Available targets:" @@ -109,6 +109,7 @@ help: @echo " parakeet-cuda-debug - Build Parakeet runner with CUDA backend (debug mode)" @echo " parakeet-cpu - Build Parakeet runner with CPU backend" @echo " parakeet-metal - Build Parakeet runner with Metal backend (macOS only)" + @echo " parakeet-vulkan - Build Parakeet runner with Vulkan backend" @echo " dinov2-cuda - Build DINOv2 runner with CUDA backend" @echo " dinov2-cuda-debug - Build DINOv2 runner with CUDA backend (debug mode)" @echo " sortformer-cuda - Build Sortformer runner with CUDA backend" @@ -221,6 +222,15 @@ parakeet-metal: @echo "✓ Build complete!" @echo " Binary: cmake-out/examples/models/parakeet/parakeet_runner" +parakeet-vulkan: + @echo "==> Building and installing ExecuTorch with Vulkan..." + cmake --workflow --preset llm-debug-vulkan + @echo "==> Building Parakeet runner with Vulkan..." + cd examples/models/parakeet && cmake --workflow --preset parakeet-vulkan + @echo "" + @echo "✓ Build complete!" + @echo " Binary: cmake-out/examples/models/parakeet/parakeet_runner" + dinov2-cuda: @echo "==> Building and installing ExecuTorch with CUDA..." cmake --workflow --preset llm-release-cuda diff --git a/examples/models/parakeet/CMakeLists.txt b/examples/models/parakeet/CMakeLists.txt index ec52a596af2..9354afe5f86 100644 --- a/examples/models/parakeet/CMakeLists.txt +++ b/examples/models/parakeet/CMakeLists.txt @@ -91,6 +91,11 @@ if(EXECUTORCH_BUILD_METAL) executorch_target_link_options_shared_lib(metal_backend) endif() +if(EXECUTORCH_BUILD_VULKAN) + list(APPEND link_libraries vulkan_backend) + executorch_target_link_options_shared_lib(vulkan_backend) +endif() + add_executable(parakeet_runner main.cpp timestamp_utils.cpp tokenizer_utils.cpp) if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") target_link_options_gc_sections(parakeet_runner) diff --git a/examples/models/parakeet/CMakePresets.json b/examples/models/parakeet/CMakePresets.json index ccb4f4fcdd2..afcfd99491c 100644 --- a/examples/models/parakeet/CMakePresets.json +++ b/examples/models/parakeet/CMakePresets.json @@ -55,6 +55,19 @@ "type": "equals", "rhs": "Darwin" } + }, + { + "name": "parakeet-vulkan", + "displayName": "Parakeet runner (Vulkan)", + "inherits": ["parakeet-base"], + "cacheVariables": { + "EXECUTORCH_BUILD_VULKAN": "ON" + }, + "condition": { + "type": "inList", + "string": "${hostSystemName}", + "list": ["Linux", "Windows"] + } } ], "buildPresets": [ @@ -85,6 +98,13 @@ "configurePreset": "parakeet-metal", "configuration": "Release", "targets": ["parakeet_runner"] + }, + { + "name": "parakeet-vulkan", + "displayName": "Build Parakeet runner (Vulkan)", + "configurePreset": "parakeet-vulkan", + "configuration": "Release", + "targets": ["parakeet_runner"] } ], "workflowPresets": [ @@ -143,6 +163,20 @@ "name": "parakeet-metal" } ] + }, + { + "name": "parakeet-vulkan", + "displayName": "Configure and build Parakeet runner (Vulkan)", + "steps": [ + { + "type": "configure", + "name": "parakeet-vulkan" + }, + { + "type": "build", + "name": "parakeet-vulkan" + } + ] } ] } diff --git a/examples/models/parakeet/export_parakeet_tdt.py b/examples/models/parakeet/export_parakeet_tdt.py index 6747880cd9e..f3ed0d2b070 100644 --- a/examples/models/parakeet/export_parakeet_tdt.py +++ b/examples/models/parakeet/export_parakeet_tdt.py @@ -9,7 +9,6 @@ import torch import torchaudio - from executorch.examples.models.parakeet.quantize import quantize_model_ from executorch.exir import ( EdgeCompileConfig, @@ -560,7 +559,25 @@ def _create_cuda_partitioners(programs, is_windows=False): return partitioner, updated_programs -def lower_to_executorch(programs, metadata=None, backend="portable"): +def _create_vulkan_partitioners(programs, vulkan_force_fp16=False): + """Create Vulkan partitioners for all programs except preprocessor.""" + from executorch.backends.vulkan.partitioner.vulkan_partitioner import ( + VulkanPartitioner, + ) + + print("\nLowering to ExecuTorch with Vulkan...") + partitioner = {} + for key in programs.keys(): + if key == "preprocessor": + partitioner[key] = [] + else: + partitioner[key] = [VulkanPartitioner({"force_fp16": vulkan_force_fp16})] + return partitioner, programs + + +def lower_to_executorch( + programs, metadata=None, backend="portable", vulkan_force_fp16=False +): if backend == "xnnpack": partitioner, programs = _create_xnnpack_partitioners(programs) elif backend == "metal": @@ -569,6 +586,10 @@ def lower_to_executorch(programs, metadata=None, backend="portable"): partitioner, programs = _create_cuda_partitioners( programs, is_windows=(backend == "cuda-windows") ) + elif backend == "vulkan": + partitioner, programs = _create_vulkan_partitioners( + programs, vulkan_force_fp16=vulkan_force_fp16 + ) else: print("\nLowering to ExecuTorch...") partitioner = [] @@ -607,7 +628,7 @@ def main(): "--backend", type=str, default="xnnpack", - choices=["portable", "xnnpack", "metal", "cuda", "cuda-windows"], + choices=["portable", "xnnpack", "metal", "cuda", "cuda-windows", "vulkan"], help="Backend for acceleration (default: xnnpack)", ) parser.add_argument( @@ -672,6 +693,8 @@ def main(): help="Group size for embedding quantization (default: 0 = per-axis)", ) + parser.add_argument("--vulkan_force_fp16", action="store_true") + args = parser.parse_args() # Validate dtype @@ -719,7 +742,12 @@ def main(): qembedding_group_size=args.qembedding_group_size, ) - et = lower_to_executorch(programs, metadata=metadata, backend=args.backend) + et = lower_to_executorch( + programs, + metadata=metadata, + backend=args.backend, + vulkan_force_fp16=args.vulkan_force_fp16, + ) pte_path = os.path.join(args.output_dir, "model.pte") print(f"\nSaving ExecuTorch program to: {pte_path}") diff --git a/examples/models/parakeet/main.cpp b/examples/models/parakeet/main.cpp index 2941484bea6..87768cec38b 100644 --- a/examples/models/parakeet/main.cpp +++ b/examples/models/parakeet/main.cpp @@ -489,7 +489,7 @@ int main(int argc, char** argv) { static_cast(pred_hidden), static_cast(sample_rate), window_stride, - encoder_subsampling_factor); + static_cast(encoder_subsampling_factor)); ET_LOG(Info, "Running TDT greedy decode..."); auto decoded_tokens = greedy_decode_executorch(