peterchen-intel · Copilot · Jun 2, 2026 · Jun 2, 2026
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -564,15 +564,16 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
 
         const bool disable_moe_opt = GPU_DEBUG_VALUE_OR(config.get_disable_moe_opt(), false);
 
-        // Run the compressed MoE fusion chain on all devices, including non-systolic iGPU.
+        // Run compressed MoE conversion passes on all devices, including non-systolic iGPU.
         manager.register_pass<ov::pass::ConvertTiledMoeBlockToGatherMatmuls>();
 
         // This pass runs before ConvertPrecision, so f32 activations are still possible here.
+        // Later MoE fusion passes are applied only on XeLP+.
         manager.register_pass<ov::pass::ConvertGatherMatmulToGatherMatmulCompressed>(
             std::vector<ov::element::Type>{ov::element::f32, ov::element::f16},
             std::vector<ov::element::Type>{ov::element::u4, ov::element::i4,
                                            ov::element::i8, ov::element::u8});
-        if (!disable_moe_opt) {
+        if (!disable_moe_opt && device_info.arch >= cldnn::gpu_arch::xe_lp) {
             const bool has_batch_dim = !is_pa;
             manager.register_pass<ov::pass::MoeOpFusion>(has_batch_dim);
             manager.register_pass<ov::intel_gpu::FuseMOESharedExpert>();