preliminiary support for submodule export

xadupre · xadupre · commit 1cbe7f04773c · 2025-12-05T17:02:57.000+01:00
diff --git a/_doc/cmds/validate.rst b/_doc/cmds/validate.rst
@@ -124,7 +124,7 @@ of function :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`.
 
     main("validate -m arnir0/Tiny-LLM --run -v 1 --export onnx-dynamo -o dump_models --patch --opt ir --ortfusiontype ALL".split())
 
-Sdpa or Eager implementation or Use a StaticCache
+SDPA or Eager implementation or Use a StaticCache
 +++++++++++++++++++++++++++++++++++++++++++++++++
 
 Add ``--mop cache_implementation=static --iop cls_cache=StaticCache`` to use a StaticCache instead of a DynamicCache (default).
@@ -147,3 +147,22 @@ Add ``--mop attn_implementation=eager`` to explicitly select eager implementatio
                 --mop attn_implementation=eager \
                 --mop cache_implementation=static \
                 --iop cls_cache=StaticCache
+
+Frequent examples used to test
+++++++++++++++++++++++++++++++
+
+.. code-block:: bash
+
+    python -m onnx_diagnostic validate -m arnir0/Tiny-LLM --run -v 1 --device cuda --dtype float16 -o dump_models --patch --opt default+onnxruntime --export custom
+
+About the exporter 'custom'
++++++++++++++++++++++++++++
+
+It used to investigate issues or scenarios. It is usually very strict
+and fails everytime it falls in one unexpected situation.
+It call :func:`experimental_experiment.torch_interpreter.to_onnx`.
+Some useful environment variables to set before running the command line.
+
+* ``DROPPATTERN=<pattern1,patterns2,...>``: do not apply those patterns when optimizing a model
+* ``DUMPPATTERNS=<folder>``: dumps all matched and applied nodes when a pattern is applied
+* ``PATTERN=<pattern1,pattern2,...>``: increase verbosity for specific patterns to understand why one pattern was not applied
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -47,6 +47,24 @@ def test_text_generation(self):
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
+    @hide_stdout()
+    def test_submodule(self):
+        mid = "arnir0/Tiny-LLM::model"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "text-generation")
+        self.assertIn("inputs", data)
+        self.assertIn("inputs2", data)
+        self.assertIn("inputs_batch1", data)
+        self.assertIn("inputs_empty_cache", data)
+        self.assertIn((data["size"], data["n_weights"]), [(27379968, 6844992)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        model(**data["inputs2"])
+        with torch_export_patches(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
     @hide_stdout()
     def test_text_generation_empty_cache(self):
         mid = "arnir0/Tiny-LLM"
diff --git a/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py b/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py
@@ -638,12 +638,14 @@ def forward(
                     self.config._attn_implementation
                 ]
 
-            is_sdpa = (
+            is_sdpa_or_eager = (
                 attention_interface
                 is transformers.integrations.sdpa_attention.sdpa_attention_forward
                 or attention_interface is patched_sdpa_attention_forward
+                or attention_interface
+                is transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.eager_attention_forward
             )
-            if is_sdpa:
+            if is_sdpa_or_eager:
                 attn_output = qwen_sdpa_attention_versatile(
                     query_states,
                     key_states,
diff --git a/onnx_diagnostic/torch_models/code_sample.py b/onnx_diagnostic/torch_models/code_sample.py
@@ -236,7 +236,7 @@ def code_sample(
             )
         )
     """
-    model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
+    model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id(
         model_id,
         subfolder,
         same_as_pretrained=same_as_pretrained,
@@ -256,6 +256,7 @@ def code_sample(
         model_kwargs=mop,
         subfolder=subfolder,
         add_second_input=False,
+        submodule=submodule,
     )
     if drop_inputs:
         update = {}
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -26,17 +26,26 @@ def _code_needing_rewriting(model: Any) -> Any:
 
 
 def _preprocess_model_id(
-    model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
-) -> Tuple[str, Optional[str], bool, bool]:
+    model_id: str,
+    subfolder: Optional[str],
+    same_as_pretrained: bool,
+    use_pretrained: bool,
+    submodule: Optional[str] = None,
+) -> Tuple[str, Optional[str], bool, bool, Optional[str]]:
+    if "::" in model_id:
+        assert (
+            not submodule
+        ), f"submodule={submodule!r} cannot be defined in model_id={model_id!r} as well"
+        model_id, submodule = model_id.split("::", maxsplit=1)
     if subfolder or "//" not in model_id:
-        return model_id, subfolder, same_as_pretrained, use_pretrained
+        return model_id, subfolder, same_as_pretrained, use_pretrained, submodule
     spl = model_id.split("//")
     if spl[-1] == "pretrained":
-        return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
+        return _preprocess_model_id("//".join(spl[:-1]), "", True, True, submodule)
     if spl[-1] in {"transformer", "vae"}:
         # known subfolder
         return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
-    return model_id, subfolder, same_as_pretrained, use_pretrained
+    return model_id, subfolder, same_as_pretrained, use_pretrained, submodule
 
 
 def get_untrained_model_with_inputs(
@@ -54,6 +63,7 @@ def get_untrained_model_with_inputs(
     subfolder: Optional[str] = None,
     use_only_preinstalled: bool = False,
     config_reduction: Optional[Callable[[Any, str], Dict]] = None,
+    submodule: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Gets a non initialized model similar to the original model
@@ -82,6 +92,7 @@ def get_untrained_model_with_inputs(
         <onnx_diagnostic.torch_models.hghub.reduce_model_config>`,
         this function takes a configuration and a task (string)
         as arguments
+    :param submodule: use a submodule instead of the main model
     :return: dictionary with a model, inputs, dynamic shapes, and the configuration,
         some necessary rewriting as well
 
@@ -108,11 +119,12 @@ def get_untrained_model_with_inputs(
         f"model_id={model_id!r}, preinstalled model is only available "
         f"if use_only_preinstalled is False."
     )
-    model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
+    model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id(
         model_id,
         subfolder,
         same_as_pretrained=same_as_pretrained,
         use_pretrained=use_pretrained,
+        submodule=submodule,
     )
     if verbose:
         print(
@@ -147,6 +159,8 @@ def get_untrained_model_with_inputs(
         if verbose:
             print(f"[get_untrained_model_with_inputs] architecture={arch!r}")
             print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}")
+            if submodule:
+                print(f"[get_untrained_model_with_inputs] submodule={submodule!r}")
         if task is None:
             task = task_from_arch(arch, model_id=model_id, subfolder=subfolder)
         if verbose:
@@ -357,6 +371,19 @@ def get_untrained_model_with_inputs(
     if diff_config is not None:
         res["dump_info"] = dict(config_diff=diff_config)
 
+    if submodule:
+        path = submodule.split("::") if "::" in submodule else [submodule]
+        for p in path:
+            assert hasattr(model, p), (
+                f"Unable to find submodule {p!r} in in class {type(model)}, "
+                f"submodule={submodule!r}, possible candidates: "
+                f"{[k for k in dir(model) if isinstance(getattr(model, k), torch.nn.Module)]}"
+            )
+            model = getattr(model, p)
+
+    if verbose:
+        print(f"[get_untrained_model_with_inputs] model class={model.__class__.__name__!r}")
+
     sizes = compute_model_size(model)
     res["model"] = model
     res["configuration"] = config
diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py
@@ -349,13 +349,15 @@ def _prepare_validation(
     verbose,
     output_names,
     dump_folder,
+    submodule,
 ):
     main_validation_begin = time.perf_counter()
-    model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
+    model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id(
         model_id,
         subfolder,
         same_as_pretrained=same_as_pretrained,
         use_pretrained=use_pretrained,
+        submodule=submodule,
     )
     time_preprocess_model_id = time.perf_counter() - main_validation_begin
     patch_kwargs = make_patch_kwargs(patch=patch, rewrite=rewrite)
@@ -364,6 +366,7 @@ def _prepare_validation(
     summary.update(
         dict(
             version_model_id=model_id,
+            version_submodule=submodule,
             version_do_run=str(do_run),
             version_dtype=str(dtype or ""),
             version_device=str(device or ""),
@@ -444,6 +447,7 @@ def _prepare_validation(
         dump_folder,
         folder_name,
         patch_kwargs,
+        submodule,
     )
 
 
@@ -460,6 +464,7 @@ def _get_untrained_model_with_inputs(
     inputs2,
     quiet,
     dump_folder,
+    submodule,
 ):
     iop = input_options or {}
     mop = model_options or {}
@@ -480,6 +485,7 @@ def _get_untrained_model_with_inputs(
                     model_kwargs=mop,
                     subfolder=sub,
                     add_second_input=i2,
+                    submodule=submodule,
                 )
             )
         ),
@@ -842,6 +848,7 @@ def validate_model(
     ort_logs: bool = False,
     quiet_input_sets: Optional[Set[str]] = None,
     save_ep: Optional[str] = None,
+    submodule: Optional[str] = None,
 ) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
     """
     Validates a model.
@@ -902,6 +909,7 @@ def validate_model(
         even if quiet is False
     :param save_ep: if not empty, this can be used to save the input sets and
         the exported program
+    :param submodule: to test not the model but a submodule of this model
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
 
@@ -966,6 +974,7 @@ def validate_model(
         use_pretrained=use_pretrained,
         same_as_pretrained=same_as_pretrained,
         save_ep=save_ep,
+        submodule=submodule,
     )
     if dump_folder:
         with open(dump_stats, "w") as f:
@@ -1053,6 +1062,7 @@ def _validate_model_step1(
     use_pretrained,
     same_as_pretrained,
     save_ep,
+    submodule,
 ):
     assert not do_same or do_run, (
         f"Discrepancies cannot be measured if the model is not run, "
@@ -1067,6 +1077,7 @@ def _validate_model_step1(
         dump_folder,
         folder_name,
         patch_kwargs,
+        submodule,
     ) = _prepare_validation(
         model_id=model_id,
         subfolder=subfolder,
@@ -1093,6 +1104,7 @@ def _validate_model_step1(
         verbose=verbose,
         output_names=output_names,
         dump_folder=dump_folder,
+        submodule=submodule,
     )
 
     data, iop, mop = _get_untrained_model_with_inputs(
@@ -1108,6 +1120,7 @@ def _validate_model_step1(
         inputs2=inputs2,
         quiet=quiet,
         dump_folder=dump_folder,
+        submodule=submodule,
     )
 
     second_input_keys = [k for k in data if k.startswith("inputs") and k != "inputs"]

Original file line number	Diff line number	Diff line change
`@@ -236,7 +236,7 @@ def code_sample(`
`236`	`236`	`)`
`237`	`237`	`)`
`238`	`238`	`"""`
`239`		`- model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(`
	`239`	`+ model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id(`
`240`	`240`	`model_id,`
`241`	`241`	`subfolder,`
`242`	`242`	`same_as_pretrained=same_as_pretrained,`
`@@ -256,6 +256,7 @@ def code_sample(`
`256`	`256`	`model_kwargs=mop,`
`257`	`257`	`subfolder=subfolder,`
`258`	`258`	`add_second_input=False,`
	`259`	`+ submodule=submodule,`
`259`	`260`	`)`
`260`	`261`	`if drop_inputs:`
`261`	`262`	`update = {}`