From 630c99914bfc26c163dea967d5007fc20de50f71 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 2 Jun 2026 05:37:53 +0000
Subject: [PATCH 1/2] Guard MoE fusion passes by XeLP arch

---
 src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 800dc81311bd..cf8ab9fe9ba7 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -572,7 +572,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
             std::vector<ov::element::Type>{ov::element::f32, ov::element::f16},
             std::vector<ov::element::Type>{ov::element::u4, ov::element::i4,
                                            ov::element::i8, ov::element::u8});
-        if (!disable_moe_opt) {
+        if (!disable_moe_opt && device_info.arch >= cldnn::gpu_arch::xe_lp) {
             const bool has_batch_dim = !is_pa;
             manager.register_pass<ov::pass::MoeOpFusion>(has_batch_dim);
             manager.register_pass<ov::intel_gpu::FuseMOESharedExpert>();

From 9dcc9b04b8e184f268d844ec3d53424a06e1b648 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 2 Jun 2026 07:42:40 +0000
Subject: [PATCH 2/2] Clarify comments for XeLP-gated MoE fusion

---
 src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index cf8ab9fe9ba7..fcebc0461cb6 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -564,10 +564,11 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
 
         const bool disable_moe_opt = GPU_DEBUG_VALUE_OR(config.get_disable_moe_opt(), false);
 
-        // Run the compressed MoE fusion chain on all devices, including non-systolic iGPU.
+        // Run compressed MoE conversion passes on all devices, including non-systolic iGPU.
         manager.register_pass<ov::pass::ConvertTiledMoeBlockToGatherMatmuls>();
 
         // This pass runs before ConvertPrecision, so f32 activations are still possible here.
+        // Later MoE fusion passes are applied only on XeLP+.
         manager.register_pass<ov::pass::ConvertGatherMatmulToGatherMatmulCompressed>(
             std::vector<ov::element::Type>{ov::element::f32, ov::element::f16},
             std::vector<ov::element::Type>{ov::element::u4, ov::element::i4,