From 630c99914bfc26c163dea967d5007fc20de50f71 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Jun 2026 05:37:53 +0000 Subject: [PATCH 1/2] Guard MoE fusion passes by XeLP arch --- src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 800dc81311bd..cf8ab9fe9ba7 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -572,7 +572,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { std::vector{ov::element::f32, ov::element::f16}, std::vector{ov::element::u4, ov::element::i4, ov::element::i8, ov::element::u8}); - if (!disable_moe_opt) { + if (!disable_moe_opt && device_info.arch >= cldnn::gpu_arch::xe_lp) { const bool has_batch_dim = !is_pa; manager.register_pass(has_batch_dim); manager.register_pass(); From 9dcc9b04b8e184f268d844ec3d53424a06e1b648 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Jun 2026 07:42:40 +0000 Subject: [PATCH 2/2] Clarify comments for XeLP-gated MoE fusion --- src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index cf8ab9fe9ba7..fcebc0461cb6 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -564,10 +564,11 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const bool disable_moe_opt = GPU_DEBUG_VALUE_OR(config.get_disable_moe_opt(), false); - // Run the compressed MoE fusion chain on all devices, including non-systolic iGPU. + // Run compressed MoE conversion passes on all devices, including non-systolic iGPU. manager.register_pass(); // This pass runs before ConvertPrecision, so f32 activations are still possible here. + // Later MoE fusion passes are applied only on XeLP+. manager.register_pass( std::vector{ov::element::f32, ov::element::f16}, std::vector{ov::element::u4, ov::element::i4,