diff --git a/_scripts/qwen25_vl_visual.py b/_scripts/qwen25_vl_visual.py new file mode 100644 index 00000000..40c46131 --- /dev/null +++ b/_scripts/qwen25_vl_visual.py @@ -0,0 +1,198 @@ +import os +import sys +from argparse import ArgumentParser, BooleanOptionalAction + + +def main( + model_id: str = "Qwen/Qwen2.5-VL-7B-Instruct", + device: str = "cpu", + dtype: str = "float32", + exporter: str = "onnx-dynamo", + pretrained: bool = True, + second_input: bool = True, +): + print("-- import torch") + import torch + + print("-- import onnxruntime") + import onnxruntime + + print("-- import transformers") + from transformers import AutoModel, AutoProcessor + + print("-- import onnx_diagnostic") + from onnx_diagnostic.helpers import string_type, max_diff + from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import ( + PLUGS, + ) + from onnx_diagnostic.torch_export_patches import torch_export_patches + from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs + from onnx_diagnostic.export.api import to_onnx + + print(f"-- creating model {model_id!r}") + print( + f"-- device={device!r}, dtype={dtype!r}, exporter={exporter!r}, " + f"pretrained={pretrained!r}" + ) + torch_dtype = { + "float16": torch.float16, + "bfloat16": torch.bfloat16, + "float32": torch.float32, + }[dtype] + + if pretrained: + print("-- pretrained model") + model = AutoModel.from_pretrained( + model_id, device_map=device, dtype=torch_dtype, attn_implementation="sdpa" + ).eval() + else: + print("-- random model") + + def _config_reduction(config, task): + return { + # "num_hidden_layers": 2, + "text_config": { + "num_hidden_layers": 2, + "layer_types": ["full_attention", "full_attention"], + }, + # "_attn_implementation": "flash_attention_2", + "_attn_implementation": "sdpa", + "dtype": "float16", + } + + config_reduction = _config_reduction + data = get_untrained_model_with_inputs( + model_id, verbose=1, add_second_input=False, config_reduction=config_reduction + ) + model = data["model"] + + model = model.to(device).to(getattr(torch, dtype)) + + print(f"-- config._attn_implementation={model.config._attn_implementation}") + print(f"-- model.dtype={model.dtype}") + print(f"-- model.device={model.device}") + processor = AutoProcessor.from_pretrained(model_id, use_fast=True) + print(f"-- processor={type(processor)}") + + inputs = dict( + hidden_states=torch.rand((1292, 1176), dtype=torch_dtype).to(device), + grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64).to(device), + ) + big_inputs = ( + dict( + hidden_states=torch.rand((14308, 1176), dtype=torch_dtype).to(device), + grid_thw=torch.tensor([[1, 98, 146]], dtype=torch.int64).to(device), + ) + if second_input + else None + ) + + model_to_export = model.visual if hasattr(model, "visual") else model.model.visual + if not os.environ.get("STOPAT", ""): + print(f"-- compute with inputs: {string_type(inputs, with_shape=True)}") + expected = model_to_export(**inputs) + print(f"-- got: {string_type(expected, with_shape=True)}") + print(f"-- compute with inputs: {string_type(big_inputs, with_shape=True)}") + expected_big = None if big_inputs is None else model_to_export(**big_inputs) + print(f"-- got: {string_type(expected_big, with_shape=True)}") + else: + expected = None + expected_big = None + print(f"-- expected: {string_type(expected, with_shape=True)}") + + dynamic_shapes = dict( + hidden_states={0: "hidden_width", 1: "hidden_height"}, + grid_thw={}, # {0: "n_images"}, # TODO: fix + ) + + filename = f"qwen25_vli_visual.{device}.{dtype}.{exporter}.onnx" + print(f"-- export in {filename!r}") + + export_inputs = inputs + with torch_export_patches( + patch_torch=False, + patch_sympy=False, + patch_transformers=True, + verbose=1, + stop_if_static=2, + ): + if expected is None: + expected = model_to_export(**inputs) + expected_big = None if big_inputs is None else model_to_export(**big_inputs) + to_onnx( + model_to_export, + kwargs=export_inputs, + dynamic_shapes=dynamic_shapes, + filename=filename, + exporter=exporter, + verbose=1, + save_ep=None, + target_opset=22, + optimize=True, + onnx_plugs=PLUGS, + ) + + print("-- checking discrepancies") + providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] + if device == "cpu": + providers = providers[1:] + sess = onnxruntime.InferenceSession(filename, providers=providers) + + print(f"-- inputs {string_type(inputs, with_shape=True)}") + feeds = {k: v.detach().cpu().numpy() for k, v in inputs.items()} + small = sess.run(None, feeds) + diff = max_diff(expected, small[0], hist=[0.1]) + print(f"-- discrepancies={diff}") + + if second_input: + print(f"-- inputs {string_type(big_inputs, with_shape=True)}") + feeds = {k: v.detach().cpu().numpy() for k, v in big_inputs.items()} + big = sess.run(None, feeds) + diff = max_diff(expected_big, big[0], hist=[0.1]) + print(f"-- discrepancies={diff}") + + +def get_parser() -> ArgumentParser: + parser = ArgumentParser( + prog="qwen25", description="""Export visual part of model Qwen 2.5 VL.""" + ) + parser.add_argument( + "-m", + "--mid", + type=str, + default="Qwen/Qwen2.5-VL-7B-Instruct", + help="model id, default is Qwen/Qwen2.5-VL-7B-Instruct", + ) + parser.add_argument("-d", "--device", default="cpu", help="Device, cpu (default) or cuda.") + parser.add_argument( + "-t", "--dtype", default="float32", help="dtype, float32 (default) or float16" + ) + parser.add_argument( + "-e", "--exporter", default="onnx-dynamo", help="exporter, default is onnx-dynamo" + ) + parser.add_argument( + "--pretrained", + default=True, + help="use pretrained model or a random model", + action=BooleanOptionalAction, + ) + parser.add_argument( + "--second-input", + default=True, + help="check discrepancies with other inputs", + action=BooleanOptionalAction, + ) + return parser + + +if __name__ == "__main__": + parser = get_parser() + args = parser.parse_args(sys.argv[1:]) + main( + model_id=args.mid, + device=args.device, + dtype=args.dtype, + exporter=args.exporter, + pretrained=args.pretrained, + second_input=args.second_input, + ) diff --git a/_unittests/ut_helpers/test_cache_helper.py b/_unittests/ut_helpers/test_cache_helper.py index 5f9e0c92..db6e2781 100644 --- a/_unittests/ut_helpers/test_cache_helper.py +++ b/_unittests/ut_helpers/test_cache_helper.py @@ -121,6 +121,7 @@ def test_unflatten_flatten_encoder_decoder_cache(self): ) self.assertEqual(0, max_diff(c2, c2)["abs"]) self.assertIsInstance(c2, transformers.cache_utils.EncoderDecoderCache) + self.assertEqual(max_diff(c2, c2)["abs"], 0) flat, _spec = torch.utils._pytree.tree_flatten(c2) self.assertIsInstance(flat, list) self.assertEqual(len(flat), 12) diff --git a/_unittests/ut_helpers/test_onnx_helper.py b/_unittests/ut_helpers/test_onnx_helper.py index 9b2cebea..b925e24c 100644 --- a/_unittests/ut_helpers/test_onnx_helper.py +++ b/_unittests/ut_helpers/test_onnx_helper.py @@ -30,6 +30,7 @@ extract_subset_of_nodes, make_submodel, select_model_inputs_outputs, + _enumerate_model_node_outputs, ) @@ -602,6 +603,12 @@ def _get_model_select(self): ) return onnx_model + def test__enumerate_model_node_outputs(self): + model = self._get_model_select() + outputs1 = list(_enumerate_model_node_outputs(model, order=False)) + outputs2 = list(_enumerate_model_node_outputs(model, order=True)) + self.assertEqual(set(outputs1), set(outputs2)) + def test_select_model_inputs_outputs(self): def enumerate_model_tensors(model): for tensor in _get_all_tensors(model): diff --git a/_unittests/ut_torch_models/test_validate_models.py b/_unittests/ut_torch_models/test_validate_models.py index 7f0138ee..ad6dae7e 100644 --- a/_unittests/ut_torch_models/test_validate_models.py +++ b/_unittests/ut_torch_models/test_validate_models.py @@ -8,17 +8,11 @@ requires_experimental, requires_transformers, requires_cuda, - has_torch, - has_transformers, ) from onnx_diagnostic.torch_models.validate import validate_model -torch29_and_tr_main = not has_torch("2.9.9") and has_transformers("4.99999") - - class TestValidateModel(ExtTestCase): - @unittest.skipIf(torch29_and_tr_main, "combination not working") @requires_transformers("4.53") @requires_torch("2.7.99") @requires_experimental() @@ -40,12 +34,12 @@ def test_validate_tiny_llms_bfloat16(self): dtype="bfloat16", device="cuda", runtime="orteval", + optimization="default+onnxruntime+os_ort", ) self.assertLess(summary["disc_onnx_ort_run_abs"], 2e-2) self.assertIn("onnx_filename", data) self.clean_dump() - @unittest.skipIf(torch29_and_tr_main, "combination not working") @requires_transformers("4.57") # 4.53 works for some jobs fails due to no space left @requires_torch("2.9.99") # 2.9 works for some jobs fails due to no space left @requires_experimental() @@ -68,7 +62,6 @@ def test_validate_microsoft_phi4_reasoning(self): self.assertIn("onnx_filename", data) self.clean_dump() - @unittest.skipIf(torch29_and_tr_main, "combination not working") @requires_transformers("4.53") @requires_torch("2.8.99") @requires_experimental() diff --git a/_unittests/ut_torch_models/test_validate_whole_models1.py b/_unittests/ut_torch_models/test_validate_whole_models1.py index d1924e08..e6cabc0b 100644 --- a/_unittests/ut_torch_models/test_validate_whole_models1.py +++ b/_unittests/ut_torch_models/test_validate_whole_models1.py @@ -11,8 +11,6 @@ requires_experimental, requires_onnxscript, requires_transformers, - has_torch, - has_transformers, ) from onnx_diagnostic.torch_models.validate import ( get_inputs_for_task, @@ -24,9 +22,6 @@ from onnx_diagnostic.tasks import supported_tasks -torch29_and_tr_main = not has_torch("2.9.9") and has_transformers("4.99999") - - class TestValidateWholeModels1(ExtTestCase): def test_a_get_inputs_for_task(self): fcts = supported_tasks() @@ -205,7 +200,6 @@ def test_k_filter_inputs(self): ni, nd = filter_inputs(inputs, dynamic_shapes=ds, drop_names=["a"], model=["a", "b"]) self.assertEqual((ni, nd), (((None,), {"b": 4}), {"b": 30})) - @unittest.skipIf(torch29_and_tr_main, "combination not working") @requires_torch("2.9.99") @hide_stdout() @ignore_warnings(FutureWarning) diff --git a/_unittests/ut_torch_models/test_validate_whole_models2.py b/_unittests/ut_torch_models/test_validate_whole_models2.py index bbedacd7..b0cde1f1 100644 --- a/_unittests/ut_torch_models/test_validate_whole_models2.py +++ b/_unittests/ut_torch_models/test_validate_whole_models2.py @@ -7,16 +7,11 @@ ignore_warnings, requires_torch, requires_transformers, - has_torch, - has_transformers, ) from onnx_diagnostic.torch_models.validate import validate_model -torch29_and_tr_main = not has_torch("2.9.9") and has_transformers("4.99999") - class TestValidateWholeModels2(ExtTestCase): - @unittest.skipIf(torch29_and_tr_main, "combination not working") @requires_torch("2.9") @hide_stdout() @ignore_warnings(FutureWarning) diff --git a/_unittests/ut_torch_models/test_validate_whole_models3.py b/_unittests/ut_torch_models/test_validate_whole_models3.py index 419dbe13..d96610c6 100644 --- a/_unittests/ut_torch_models/test_validate_whole_models3.py +++ b/_unittests/ut_torch_models/test_validate_whole_models3.py @@ -5,16 +5,11 @@ ignore_warnings, requires_torch, requires_transformers, - has_torch, - has_transformers, ) from onnx_diagnostic.torch_models.validate import validate_model -torch29_and_tr_main = not has_torch("2.9.9") and has_transformers("4.99999") - class TestValidateWholeModels3(ExtTestCase): - @unittest.skipIf(torch29_and_tr_main, "combination not working") @requires_torch("2.7") @hide_stdout() @ignore_warnings(FutureWarning) diff --git a/_unittests/ut_xrun_doc/test_command_lines.py b/_unittests/ut_xrun_doc/test_command_lines.py index 449d5fac..b055763c 100644 --- a/_unittests/ut_xrun_doc/test_command_lines.py +++ b/_unittests/ut_xrun_doc/test_command_lines.py @@ -68,6 +68,60 @@ def test_parser_validate(self): text = st.getvalue() self.assertIn("mid", text) + def test_parser_validate_cmd(self): + parser = get_parser_validate() + args = parser.parse_args( + [ + "-m", + "arnir0/Tiny-LLM", + "--run", + "-v", + "1", + "--mop", + "cache_implementation=static", + "--iop", + "cls_cache=StaticCache", + "--patch", + ] + ) + self.assertEqual(args.mid, "arnir0/Tiny-LLM") + self.assertEqual(args.run, True) + self.assertEqual(args.patch, True) + self.assertEqual(args.verbose, 1) + self.assertEqual(args.mop, {"cache_implementation": "static"}) + self.assertEqual(args.iop, {"cls_cache": "StaticCache"}) + args = parser.parse_args( + [ + "-m", + "arnir0/Tiny-LLM", + "--run", + "-v", + "1", + "--mop", + "cache_implementation=static", + "--iop", + "cls_cache=StaticCache", + "--patch", + "patch_sympy=False", + "--patch", + "patch_torch=False", + ] + ) + self.assertEqual(args.mid, "arnir0/Tiny-LLM") + self.assertEqual(args.run, True) + self.assertEqual( + args.patch, + { + "patch_diffusers": True, + "patch_sympy": False, + "patch_torch": False, + "patch_transformers": True, + }, + ) + self.assertEqual(args.verbose, 1) + self.assertEqual(args.mop, {"cache_implementation": "static"}) + self.assertEqual(args.iop, {"cls_cache": "StaticCache"}) + def test_parser_stats(self): st = StringIO() with redirect_stdout(st): @@ -82,6 +136,26 @@ def test_parser_agg(self): text = st.getvalue() self.assertIn("--recent", text) + def test_parser_agg_cmd(self): + parser = get_parser_agg() + args = parser.parse_args( + [ + "o.xlsx", + "*.zip", + "--sbs", + "dynamo:exporter=onnx-dynamo,opt=ir,attn_impl=eager", + "--sbs", + "custom:exporter=custom,opt=default,attn_impl=eager", + ] + ) + self.assertEqual( + args.sbs, + { + "custom": {"attn_impl": "eager", "exporter": "custom", "opt": "default"}, + "dynamo": {"attn_impl": "eager", "exporter": "onnx-dynamo", "opt": "ir"}, + }, + ) + def test_parser_sbs(self): st = StringIO() with redirect_stdout(st): diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py index 37582f0b..3d6cf7b5 100644 --- a/onnx_diagnostic/_command_lines_parser.py +++ b/onnx_diagnostic/_command_lines_parser.py @@ -517,12 +517,12 @@ def get_parser_validate(name: str = "validate") -> ArgumentParser: nargs="*", help=textwrap.dedent( """ - Applies patches before exporting, it can be a boolean - to enable to disable the patches or be more finetuned - (default is True). It is possible to disable patch for torch - by adding: - --patch "patch_sympy=False" --patch "patch_torch=False" - """.strip( + Applies patches before exporting, it can be a boolean + to enable to disable the patches or be more finetuned + (default is True). It is possible to disable patch for torch + by adding: + --patch "patch_sympy=False" --patch "patch_torch=False" + """.strip( "\n" ) ),