1+ """
2+ export visual embedding of Qwen/Qwen2.5-VL-7B-Instruct
3+ ======================================================
4+
5+ requirements
6+ ++++++++++++
7+
8+ git+https://github.com/sdpython/experimental-experiment.git
9+ huggingface_hub>=1.2.1
10+ onnx-diagnostic>=0.8.4
11+ onnxruntime>=1.23
12+ torch>=2.9 # weekly is better
13+ transformers>=4.57
14+
15+ example
16+ +++++++
17+
18+ .. code-block:: bash
19+
20+ python export_qwen25_vl_visual.py -m Qwen/Qwen2.5-VL-7B-Instruct --device cpu --dtype float32 --exporter custom --pretrained --second-input
21+ """
22+
123import os
224import sys
25+ import time
326from argparse import ArgumentParser , BooleanOptionalAction
427
528
29+ def remove_inplace_body_last_input_output_type_for_loop (filename : str ):
30+ import onnx
31+
32+ model = onnx .load (filename , load_external_data = False )
33+ for node in model .graph .node :
34+ if node .op_type == "Loop" :
35+ g = node .attribute [0 ].g
36+ g .input [- 1 ].type .CopyFrom (onnx .TypeProto ())
37+ g .output [- 1 ].type .CopyFrom (onnx .TypeProto ())
38+ onnx .save (model , filename , save_as_external_data = False )
39+
40+
641def main (
742 model_id : str = "Qwen/Qwen2.5-VL-7B-Instruct" ,
843 device : str = "cpu" ,
@@ -107,6 +142,8 @@ def _config_reduction(config, task):
107142
108143 filename = f"qwen25_vli_visual.{ device } .{ dtype } .{ exporter } .onnx"
109144 print (f"-- export in { filename !r} " )
145+ stat_file = filename .replace (".onnx" , ".stats" )
146+ begin = time .perf_counter ()
110147
111148 export_inputs = inputs
112149 with torch_export_patches (
@@ -131,25 +168,43 @@ def _config_reduction(config, task):
131168 optimize = True ,
132169 onnx_plugs = PLUGS ,
133170 )
134-
135- print ("-- checking discrepancies" )
136- providers = ["CUDAExecutionProvider" , "CPUExecutionProvider" ]
137- if device == "cpu" :
138- providers = providers [1 :]
139- sess = onnxruntime .InferenceSession (filename , providers = providers )
140-
141- print (f"-- inputs { string_type (inputs , with_shape = True )} " )
142- feeds = {k : v .detach ().cpu ().numpy () for k , v in inputs .items ()}
143- small = sess .run (None , feeds )
144- diff = max_diff (expected , small [0 ], hist = [0.1 ])
145- print (f"-- discrepancies={ diff } " )
146-
147- if second_input :
148- print (f"-- inputs { string_type (big_inputs , with_shape = True )} " )
149- feeds = {k : v .detach ().cpu ().numpy () for k , v in big_inputs .items ()}
150- big = sess .run (None , feeds )
151- diff = max_diff (expected_big , big [0 ], hist = [0.1 ])
152- print (f"-- discrepancies={ diff } " )
171+ duration = time .perf_counter () - begin
172+
173+ if exporter == "onnx-dynamo" :
174+ # onnx-dynamo fails at producing function body with sequences as input / output.
175+ # They are replaced by tensor type one step in the model.
176+ print ("-- remove_body_last_input_output_for_loop" )
177+ remove_inplace_body_last_input_output_type_for_loop (filename )
178+ print ("-- done." )
179+
180+ with open (stat_file , "w" ) as f :
181+
182+ def fprint (s ):
183+ print (s )
184+ f .write (f"{ s } \n " )
185+
186+ fprint (f"-- export duration: { duration } " )
187+ fprint ("-- checking discrepancies" )
188+ providers = ["CUDAExecutionProvider" , "CPUExecutionProvider" ]
189+ if device == "cpu" :
190+ providers = providers [1 :]
191+ sess = onnxruntime .InferenceSession (filename , providers = providers )
192+
193+ fprint (f"-- inputs { string_type (inputs , with_shape = True )} " )
194+ fprint (f"-- expected { string_type (expected , with_shape = True )} " )
195+ feeds = {k : v .detach ().cpu ().numpy () for k , v in inputs .items ()}
196+ small = sess .run (None , feeds )
197+ diff = max_diff (expected , small [0 ], hist = [0.1 ])
198+ fprint (f"-- discrepancies={ diff } " )
199+
200+ if second_input :
201+ fprint ("" )
202+ fprint (f"-- inputs { string_type (big_inputs , with_shape = True )} " )
203+ fprint (f"-- expected { string_type (expected_big , with_shape = True )} " )
204+ feeds = {k : v .detach ().cpu ().numpy () for k , v in big_inputs .items ()}
205+ big = sess .run (None , feeds )
206+ diff = max_diff (expected_big , big [0 ], hist = [0.1 ])
207+ fprint (f"-- discrepancies={ diff } " )
153208
154209
155210def get_parser () -> ArgumentParser :
0 commit comments