9494 from optimum .intel .openvino .configuration import OVConfig
9595
9696
97+ _VLM_LANGUAGE_MODEL_TASKS = ("image-text-to-text" , "text-to-audio" , "automatic-speech-recognition" )
98+
99+
100+ def _is_vlm_language_model (task : str , model_name : str ) -> bool :
101+ return model_name == "language_model" and any (t in task for t in _VLM_LANGUAGE_MODEL_TASKS )
102+
103+
97104def _set_runtime_options (
98105 models_and_export_configs : Dict [
99106 str ,
@@ -109,13 +116,13 @@ def _set_runtime_options(
109116 sub_export_config .runtime_options = {}
110117 if (
111118 "text-generation" in task
112- or ( "image-text-to-text" in task and model_name == "language_model" )
119+ or _is_vlm_language_model ( task , model_name )
113120 or getattr (sub_export_config , "stateful" , False )
114121 ):
115122 sub_export_config .runtime_options ["ACTIVATIONS_SCALE_FACTOR" ] = "8.0"
116123 if not quantized_model and (
117124 "text-generation" in task
118- or ( "image-text-to-text" in task and model_name == "language_model" )
125+ or _is_vlm_language_model ( task , model_name )
119126 or getattr (sub_export_config , "stateful" , False )
120127 ):
121128 sub_export_config .runtime_options ["KV_CACHE_PRECISION" ] = "f16"
@@ -552,6 +559,35 @@ def export_models(
552559 return outputs
553560
554561
562+ def _save_auxiliary_weights (model , model_type : str , output_dir : Path ):
563+ if model_type != "qwen3_omni_moe" :
564+ return
565+ # CodePredictor is traced with inputs_embeds, so the per-step codec_embedding
566+ # tables aren't part of the IR and must be dumped separately for runtime lookup.
567+ import numpy as np
568+ import torch
569+
570+ talker = getattr (model , "talker" , None )
571+ if talker is None :
572+ return
573+ code_predictor = getattr (talker , "code_predictor" , None )
574+ if code_predictor is None :
575+ return
576+ cp_model = getattr (code_predictor , "model" , None )
577+ if cp_model is None :
578+ return
579+ codec_embedding = getattr (cp_model , "codec_embedding" , None )
580+ if codec_embedding is None or len (codec_embedding ) == 0 :
581+ return
582+ try :
583+ stacked = torch .stack ([emb .weight .data for emb in codec_embedding ])
584+ except (AttributeError , RuntimeError ) as e :
585+ logger .warning (f"Failed to extract codec_embedding weights for qwen3_omni_moe: { e } " )
586+ return
587+ np .save (output_dir / "code_predictor_codec_embedding.npy" , stacked .cpu ().float ().numpy ())
588+ logger .info (f"Saved CodePredictor codec_embedding weights ({ stacked .shape } ) to { output_dir } " )
589+
590+
555591def export_from_model (
556592 model : Union ["PreTrainedModel" , "ModelMixin" , "DiffusionPipeline" ],
557593 output : Union [str , Path ],
@@ -784,6 +820,8 @@ def export_from_model(
784820 library_name = library_name ,
785821 )
786822
823+ _save_auxiliary_weights (model , model_type , output )
824+
787825 return files_subpaths
788826
789827
@@ -811,7 +849,7 @@ def export_tokenizer(
811849
812850 if (
813851 task is not None
814- and (task .startswith ("text-generation" ) or task == "image-text-to-text" )
852+ and (task .startswith ("text-generation" ) or any ( t in task for t in _VLM_LANGUAGE_MODEL_TASKS ) )
815853 and compare_versions ("openvino-tokenizers" , ">=" , "2024.3.0.0" )
816854 ):
817855 logger .info (f"Set tokenizer padding side to left for `{ task } ` task." )
0 commit comments