huggingface
diff --git a/‎optimum/exporters/openvino/__main__.py‎
Lines changed: 3 additions & 1 deletion b/‎optimum/exporters/openvino/__main__.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎optimum/exporters/openvino/convert.py‎
Lines changed: 41 additions & 3 deletions b/‎optimum/exporters/openvino/convert.py‎
Lines changed: 41 additions & 3 deletions
@@ -651,6 +651,8 @@ def _main_quantize(
     # which is returned as the inferred task. As a result, we try to load the exported model using the
     # OVModelForSpeechSeq2Seq class instead of the OVModelForVisualCausalLM class when the task is not specified
     # explicitly. Because of this, we get an error.
+    # Qwen3-Omni-MoE registers multiple tasks (text-to-audio, ASR, image-text-to-text) but always loads
+    # through OVModelForVisualCausalLM, so redirect unconditionally regardless of inferred or explicit task.
     if original_task == "auto" and library_name == "transformers":
         config = AutoConfig.from_pretrained(
             model_name_or_path,
@@ -661,7 +663,7 @@ def _main_quantize(
             trust_remote_code=trust_remote_code,
         )
         model_type = config.model_type
-        if model_type in ["phi4mm", "phi4_multimodal"]:
+        if model_type in ["phi4mm", "phi4_multimodal", "qwen3_omni_moe"]:
             task = "image-text-to-text"
 
     # Step 1. Obtain the correct OpenVINO model class
 
@@ -94,6 +94,13 @@
     from optimum.intel.openvino.configuration import OVConfig
 
 
+_VLM_LANGUAGE_MODEL_TASKS = ("image-text-to-text", "text-to-audio", "automatic-speech-recognition")
+
+
+def _is_vlm_language_model(task: str, model_name: str) -> bool:
+    return model_name == "language_model" and any(t in task for t in _VLM_LANGUAGE_MODEL_TASKS)
+
+
 def _set_runtime_options(
     models_and_export_configs: Dict[
         str,
@@ -109,13 +116,13 @@ def _set_runtime_options(
             sub_export_config.runtime_options = {}
         if (
             "text-generation" in task
-            or ("image-text-to-text" in task and model_name == "language_model")
+            or _is_vlm_language_model(task, model_name)
             or getattr(sub_export_config, "stateful", False)
         ):
             sub_export_config.runtime_options["ACTIVATIONS_SCALE_FACTOR"] = "8.0"
         if not quantized_model and (
             "text-generation" in task
-            or ("image-text-to-text" in task and model_name == "language_model")
+            or _is_vlm_language_model(task, model_name)
             or getattr(sub_export_config, "stateful", False)
         ):
             sub_export_config.runtime_options["KV_CACHE_PRECISION"] = "f16"
@@ -552,6 +559,35 @@ def export_models(
     return outputs
 
 
+def _save_auxiliary_weights(model, model_type: str, output_dir: Path):
+    if model_type != "qwen3_omni_moe":
+        return
+    # CodePredictor is traced with inputs_embeds, so the per-step codec_embedding
+    # tables aren't part of the IR and must be dumped separately for runtime lookup.
+    import numpy as np
+    import torch
+
+    talker = getattr(model, "talker", None)
+    if talker is None:
+        return
+    code_predictor = getattr(talker, "code_predictor", None)
+    if code_predictor is None:
+        return
+    cp_model = getattr(code_predictor, "model", None)
+    if cp_model is None:
+        return
+    codec_embedding = getattr(cp_model, "codec_embedding", None)
+    if codec_embedding is None or len(codec_embedding) == 0:
+        return
+    try:
+        stacked = torch.stack([emb.weight.data for emb in codec_embedding])
+    except (AttributeError, RuntimeError) as e:
+        logger.warning(f"Failed to extract codec_embedding weights for qwen3_omni_moe: {e}")
+        return
+    np.save(output_dir / "code_predictor_codec_embedding.npy", stacked.cpu().float().numpy())
+    logger.info(f"Saved CodePredictor codec_embedding weights ({stacked.shape}) to {output_dir}")
+
+
 def export_from_model(
     model: Union["PreTrainedModel", "ModelMixin", "DiffusionPipeline"],
     output: Union[str, Path],
@@ -784,6 +820,8 @@ def export_from_model(
         library_name=library_name,
     )
 
+    _save_auxiliary_weights(model, model_type, output)
+
     return files_subpaths
 
 
@@ -811,7 +849,7 @@ def export_tokenizer(
 
     if (
         task is not None
-        and (task.startswith("text-generation") or task == "image-text-to-text")
+        and (task.startswith("text-generation") or any(t in task for t in _VLM_LANGUAGE_MODEL_TASKS))
         and compare_versions("openvino-tokenizers", ">=", "2024.3.0.0")
     ):
         logger.info(f"Set tokenizer padding side to left for `{task}` task.")