diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 800dc81311bda6..fcebc0461cb623 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -564,15 +564,16 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const bool disable_moe_opt = GPU_DEBUG_VALUE_OR(config.get_disable_moe_opt(), false); - // Run the compressed MoE fusion chain on all devices, including non-systolic iGPU. + // Run compressed MoE conversion passes on all devices, including non-systolic iGPU. manager.register_pass(); // This pass runs before ConvertPrecision, so f32 activations are still possible here. + // Later MoE fusion passes are applied only on XeLP+. manager.register_pass( std::vector{ov::element::f32, ov::element::f16}, std::vector{ov::element::u4, ov::element::i4, ov::element::i8, ov::element::u8}); - if (!disable_moe_opt) { + if (!disable_moe_opt && device_info.arch >= cldnn::gpu_arch::xe_lp) { const bool has_batch_dim = !is_pa; manager.register_pass(has_batch_dim); manager.register_pass();