fix

xadupre · xadupre · commit 8bae9ab9e2bd · 2025-12-04T16:24:03.000Z
diff --git a/_unittests/ut_tasks/try_export.py b/_unittests/ut_tasks/try_export.py
@@ -57,6 +57,7 @@ def test_qwen25_vli_visual(self):
             TESTDEVICE=cuda \\
             TESTDTYPE=float16 \\
             EXPORTER=custom \\
+            CUT_EXPORTED_PROGRAM=qwen_sdpa_attention_loopmha_16 \\
             python _unittests/ut_tasks/try_export.py -k qwen25_vli_visual
 
         .. code-block:: bash
@@ -78,6 +79,9 @@ def test_qwen25_vli_visual(self):
             "float32": torch.float32,
         }[dtype]
         exporter = os.environ.get("EXPORTER", "custom")
+        cut_ep = os.environ.get("CUT_EXPORTED_PROGRAM", None)
+        if cut_ep is not None:
+            cut_ep = cut_ep.split(",")
 
         from transformers import AutoModel, AutoProcessor
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
@@ -135,15 +139,18 @@ def _config_reduction(config, task):
             grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64).to(device),
         )
         if not self.unit_test_going():
-            print("-- save inputs")
+            print("-- save big inputs")
             torch.save(big_inputs, self.get_dump_file("qwen25_vli_visual.inputs.big.pt"))
             torch.save(inputs, self.get_dump_file("qwen25_vli_visual.inputs.pt"))
 
         print(f"-- inputs: {self.string_type(inputs, with_shape=True)}")
         # this is too long
         model_to_export = model.visual if hasattr(model, "visual") else model.model.visual
         begin = time.perf_counter()
-        expected = model_to_export(**inputs)
+        if not os.environ.get("STOPAT", ""):
+            expected = model_to_export(**inputs)
+        else:
+            expected = None
         print(f"-- MODEL RUN IN {time.perf_counter() - begin}")
         print(f"-- expected: {self.string_type(expected, with_shape=True)}")
 
@@ -184,6 +191,8 @@ def _config_reduction(config, task):
                     verbose=1,
                     stop_if_static=2,
                 ):
+                    if expected is None:
+                        expected = model_to_export(**inputs)
                     to_onnx(
                         model_to_export,
                         kwargs=export_inputs,
@@ -195,6 +204,7 @@ def _config_reduction(config, task):
                         target_opset=24 if attention == "LOOPA24" else 22,
                         optimize=True,
                         onnx_plugs=PLUGS,
+                        cut_ep=cut_ep,
                     )
 
                 if not self.unit_test_going():
diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py
@@ -1437,6 +1437,10 @@ def _size(name):
         print("-- done")
         del sess
 
+    if not args.sbs:
+        print("-- done")
+        return
+
     print(f"-- load onnx {args.onnx!r}")
     begin = time.perf_counter()
     onx = onnx.load(args.onnx)
diff --git a/onnx_diagnostic/export/api.py b/onnx_diagnostic/export/api.py
@@ -21,6 +21,7 @@ def to_onnx(
     use_control_flow_dispatcher: bool = False,
     onnx_plugs: Optional[List[EagerDirectReplacementWithOnnx]] = None,
     inline: bool = True,
+    cut_ep: Optional[List[str]] = None,
 ) -> Any:
     """
     Common API for exporters. By default, the models are optimized to use the
@@ -46,6 +47,8 @@ def to_onnx(
         custom loops (see :func:`onnx_diagnostic.export.control_flow_onnx.loop_for_onnx`)
     :param onnx_plugs: the code was modified to replace some parts with onnx translation
     :param inline: inline local functions
+    :param cut_ep: cut the exported program before exporting,
+        this is used to investigate issues.
     :return: the output of the selected exporter, usually a structure including
         an onnx model
 
@@ -140,7 +143,7 @@ def find_method(self, name: Any):
             dynamic_shapes=dynamic_shapes,
             large_model=True,
             output_dynamic_shapes=output_dynamic_shapes,
-            export_options=ExportOptions(save_ep=save_ep),
+            export_options=ExportOptions(save_ep=save_ep, cut_ep=cut_ep),
             options=options,
             inline=inline,
             dispatcher=main_dispatcher,
@@ -155,6 +158,7 @@ def find_method(self, name: Any):
         assert (
             not output_dynamic_shapes
         ), f"output_dynamic_shapes not supported for exporter={exporter!r}"
+        assert not cut_ep, f"cut_ep={cut_ep} not available with exporter={exporter!r}"
         custom_translation_table = {}
         if onnx_plugs:
             for plug in onnx_plugs:
@@ -218,6 +222,7 @@ def find_method(self, name: Any):
             f"Only a specified set of inputs is supported for exporter={exporter!r}, "
             f"but it is {list(kwargs)}"  # type: ignore[arg-type]
         )
+        assert not cut_ep, f"cut_ep={cut_ep} not available with exporter={exporter!r}"
         flat_inputs = flatten_object(kwargs, drop_keys=True)
         first = flat_inputs[0]
         first_float = [
diff --git a/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py b/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py
@@ -26,6 +26,11 @@
     op = onnxscript.opset22
     op24 = onnxscript.onnx_opset.opset24
     msft_op = onnxscript.values.Opset("com.microsoft", 1)
+    STOPAT = (
+        int(os.environ.get("STOPAT", None))
+        if os.environ.get("STOPAT", None) is not None
+        else None
+    )
 
     def _add_com_microsoft_opset(function_proto: onnx.FunctionProto) -> onnx.FunctionProto:
         opsets = {d.domain: d.version for d in function_proto.opset_import}
@@ -529,8 +534,12 @@ def forward(
                     position_embeddings=position_embeddings,
                     **kwargs,
                 )
+                if STOPAT is not None and layer_num > STOPAT:
+                    break
 
             hidden_states = self.merger(hidden_states)
+            if STOPAT is not None:
+                return hidden_states
             reverse_indices = torch.argsort(window_index)
             hidden_states = hidden_states[reverse_indices, :]
             return hidden_states