Skip to content

Commit 6692cd0

Browse files
committed
last changes to export qwen model
1 parent 10f9e16 commit 6692cd0

8 files changed

Lines changed: 5551 additions & 19 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
*.sqlitest
2222
*.svg
2323
*.onnx.stats
24+
*.stats
2425
CodeLlama*
2526
_tools/benchenv**
2627
_tools/repos**
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
2+
clear&&python -m onnx_diagnostic sbs \
3+
-i qwen25_vli_visual.inputs.pt \
4+
-e test_qwen25_vli_visual.cpu.float32.LOOPMHA.custom.graph.ep.pt2 \
5+
-m test_qwen25_vli_visual.cpu.float32.LOOPMHA.custom.onnx \
6+
-o test_qwen25_vli_visual.cpu.float32.LOOPMHA.custom.xlsx \
7+
-v 1 --atol 0.1 --rtol 1000

_dump_test_26/test_qwen25_vli_visual.cpu.float32.LOOPMHA.custom.graph.ep.graph

Lines changed: 2622 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
2+
clear&&python -m onnx_diagnostic sbs \
3+
-i qwen25_vli_visual.inputs.pt \
4+
-e test_qwen25_vli_visual.cpu.float32.LOOPMHA.custom.graph.ep.pt2 \
5+
-m test_qwen25_vli_visual.cpu.float32.LOOPMHA.custom.onnx \
6+
-o test_qwen25_vli_visual.cpu.float32.LOOPMHA.custom.xlsx \
7+
-v 1 --atol 0.1 --rtol 1000

_keep_dump_test/test_qwen25_vli_visual.cpu.float32.LOOPMHA.custom.graph.ep.graph

Lines changed: 2552 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 74 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,43 @@
1+
"""
2+
export visual embedding of Qwen/Qwen2.5-VL-7B-Instruct
3+
======================================================
4+
5+
requirements
6+
++++++++++++
7+
8+
git+https://github.com/sdpython/experimental-experiment.git
9+
huggingface_hub>=1.2.1
10+
onnx-diagnostic>=0.8.4
11+
onnxruntime>=1.23
12+
torch>=2.9 # weekly is better
13+
transformers>=4.57
14+
15+
example
16+
+++++++
17+
18+
.. code-block:: bash
19+
20+
python export_qwen25_vl_visual.py -m Qwen/Qwen2.5-VL-7B-Instruct --device cpu --dtype float32 --exporter custom --pretrained --second-input
21+
"""
22+
123
import os
224
import sys
25+
import time
326
from argparse import ArgumentParser, BooleanOptionalAction
427

528

29+
def remove_inplace_body_last_input_output_type_for_loop(filename: str):
30+
import onnx
31+
32+
model = onnx.load(filename, load_external_data=False)
33+
for node in model.graph.node:
34+
if node.op_type == "Loop":
35+
g = node.attribute[0].g
36+
g.input[-1].type.CopyFrom(onnx.TypeProto())
37+
g.output[-1].type.CopyFrom(onnx.TypeProto())
38+
onnx.save(model, filename, save_as_external_data=False)
39+
40+
641
def main(
742
model_id: str = "Qwen/Qwen2.5-VL-7B-Instruct",
843
device: str = "cpu",
@@ -107,6 +142,8 @@ def _config_reduction(config, task):
107142

108143
filename = f"qwen25_vli_visual.{device}.{dtype}.{exporter}.onnx"
109144
print(f"-- export in {filename!r}")
145+
stat_file = filename.replace(".onnx", ".stats")
146+
begin = time.perf_counter()
110147

111148
export_inputs = inputs
112149
with torch_export_patches(
@@ -131,25 +168,43 @@ def _config_reduction(config, task):
131168
optimize=True,
132169
onnx_plugs=PLUGS,
133170
)
134-
135-
print("-- checking discrepancies")
136-
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
137-
if device == "cpu":
138-
providers = providers[1:]
139-
sess = onnxruntime.InferenceSession(filename, providers=providers)
140-
141-
print(f"-- inputs {string_type(inputs, with_shape=True)}")
142-
feeds = {k: v.detach().cpu().numpy() for k, v in inputs.items()}
143-
small = sess.run(None, feeds)
144-
diff = max_diff(expected, small[0], hist=[0.1])
145-
print(f"-- discrepancies={diff}")
146-
147-
if second_input:
148-
print(f"-- inputs {string_type(big_inputs, with_shape=True)}")
149-
feeds = {k: v.detach().cpu().numpy() for k, v in big_inputs.items()}
150-
big = sess.run(None, feeds)
151-
diff = max_diff(expected_big, big[0], hist=[0.1])
152-
print(f"-- discrepancies={diff}")
171+
duration = time.perf_counter() - begin
172+
173+
if exporter == "onnx-dynamo":
174+
# onnx-dynamo fails at producing function body with sequences as input / output.
175+
# They are replaced by tensor type one step in the model.
176+
print("-- remove_body_last_input_output_for_loop")
177+
remove_inplace_body_last_input_output_type_for_loop(filename)
178+
print("-- done.")
179+
180+
with open(stat_file, "w") as f:
181+
182+
def fprint(s):
183+
print(s)
184+
f.write(f"{s}\n")
185+
186+
fprint(f"-- export duration: {duration}")
187+
fprint("-- checking discrepancies")
188+
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
189+
if device == "cpu":
190+
providers = providers[1:]
191+
sess = onnxruntime.InferenceSession(filename, providers=providers)
192+
193+
fprint(f"-- inputs {string_type(inputs, with_shape=True)}")
194+
fprint(f"-- expected {string_type(expected, with_shape=True)}")
195+
feeds = {k: v.detach().cpu().numpy() for k, v in inputs.items()}
196+
small = sess.run(None, feeds)
197+
diff = max_diff(expected, small[0], hist=[0.1])
198+
fprint(f"-- discrepancies={diff}")
199+
200+
if second_input:
201+
fprint("")
202+
fprint(f"-- inputs {string_type(big_inputs, with_shape=True)}")
203+
fprint(f"-- expected {string_type(expected_big, with_shape=True)}")
204+
feeds = {k: v.detach().cpu().numpy() for k, v in big_inputs.items()}
205+
big = sess.run(None, feeds)
206+
diff = max_diff(expected_big, big[0], hist=[0.1])
207+
fprint(f"-- discrepancies={diff}")
153208

154209

155210
def get_parser() -> ArgumentParser:

0 commit comments

Comments
 (0)