diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst index ed2b273f..a5495cf9 100644 --- a/CHANGELOGS.rst +++ b/CHANGELOGS.rst @@ -4,6 +4,7 @@ Change Logs 0.8.4 +++++ +* :pr:`341`: preliminary support to export submodule * :pr:`340`: supports devices in onnx plugs * :pr:`338`: fixes ReplayConfiguration.dump, add function to select of part of a model * :pr:`337`: fixes extract_subset_of_nodes diff --git a/_doc/cmds/validate.rst b/_doc/cmds/validate.rst index e27df801..6ac93064 100644 --- a/_doc/cmds/validate.rst +++ b/_doc/cmds/validate.rst @@ -124,7 +124,7 @@ of function :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`. main("validate -m arnir0/Tiny-LLM --run -v 1 --export onnx-dynamo -o dump_models --patch --opt ir --ortfusiontype ALL".split()) -Sdpa or Eager implementation or Use a StaticCache +SDPA or Eager implementation or Use a StaticCache +++++++++++++++++++++++++++++++++++++++++++++++++ Add ``--mop cache_implementation=static --iop cls_cache=StaticCache`` to use a StaticCache instead of a DynamicCache (default). @@ -147,3 +147,22 @@ Add ``--mop attn_implementation=eager`` to explicitly select eager implementatio --mop attn_implementation=eager \ --mop cache_implementation=static \ --iop cls_cache=StaticCache + +Frequent examples used to test +++++++++++++++++++++++++++++++ + +.. code-block:: bash + + python -m onnx_diagnostic validate -m arnir0/Tiny-LLM --run -v 1 --device cuda --dtype float16 -o dump_models --patch --opt default+onnxruntime --export custom + +About the exporter 'custom' ++++++++++++++++++++++++++++ + +It used to investigate issues or scenarios. It is usually very strict +and fails every time it falls in one unexpected situation. +It call :func:`experimental_experiment.torch_interpreter.to_onnx`. +Some useful environment variables to set before running the command line. + +* ``DROPPATTERN=``: do not apply those patterns when optimizing a model +* ``DUMPPATTERNS=``: dumps all matched and applied nodes when a pattern is applied +* ``PATTERN=``: increase verbosity for specific patterns to understand why one pattern was not applied diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py index 9599359a..63a58358 100644 --- a/_unittests/ut_tasks/test_tasks.py +++ b/_unittests/ut_tasks/test_tasks.py @@ -47,6 +47,24 @@ def test_text_generation(self): model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False ) + @hide_stdout() + def test_submodule(self): + mid = "arnir0/Tiny-LLM::model" + data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True) + self.assertEqual(data["task"], "text-generation") + self.assertIn("inputs", data) + self.assertIn("inputs2", data) + self.assertIn("inputs_batch1", data) + self.assertIn("inputs_empty_cache", data) + self.assertIn((data["size"], data["n_weights"]), [(27379968, 6844992)]) + model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"] + model(**inputs) + model(**data["inputs2"]) + with torch_export_patches(patch_transformers=True, verbose=10): + torch.export.export( + model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False + ) + @hide_stdout() def test_text_generation_empty_cache(self): mid = "arnir0/Tiny-LLM" diff --git a/onnx_diagnostic/export/api.py b/onnx_diagnostic/export/api.py index 260a5d6b..6f14eaec 100644 --- a/onnx_diagnostic/export/api.py +++ b/onnx_diagnostic/export/api.py @@ -3,6 +3,52 @@ from .onnx_plug import EagerDirectReplacementWithOnnx +def get_main_dispatcher( + use_control_flow_dispatcher: bool = False, + onnx_plugs: Optional[List[EagerDirectReplacementWithOnnx]] = None, +) -> Any: # Dispatcher + """Creates a custom dispatcher for the custom exporter.""" + from experimental_experiment.torch_interpreter import Dispatcher + + if use_control_flow_dispatcher: + from .control_flow_onnx import create_global_dispatcher + + control_flow_dispatcher = create_global_dispatcher() + else: + control_flow_dispatcher = None + + class MainDispatcher(Dispatcher): + def __init__(self, previous_dispatcher=None): + super().__init__({}) + self.previous_dispatcher = previous_dispatcher + + @property + def supported(self): + if self.previous_dispatcher: + return set(self.registered_functions) | self.previous_dispatcher.supported + return set(self.registered_functions) + + def find_function(self, name: Any): + if self.previous_dispatcher: + find = self.previous_dispatcher.find_function(name) + if find: + return find + return Dispatcher.find_function(self, name) + + def find_method(self, name: Any): + if self.previous_dispatcher: + find = self.previous_dispatcher.find_method(name) + if find: + return find + return Dispatcher.find_method(self, name) + + main_dispatcher = MainDispatcher(control_flow_dispatcher) + if onnx_plugs: + for plug in onnx_plugs: + main_dispatcher.registered_functions[plug.target_name] = plug.custom_converter() + return main_dispatcher + + def to_onnx( mod: Union["torch.nn.Module", "torch.fx.GraphModule"], # noqa: F821 args: Optional[Sequence["torch.Tensor"]] = None, # noqa: F821 @@ -82,51 +128,11 @@ def to_onnx( options = exporter_kwargs.pop("options", None) if options is None: options = OptimizationOptions(patterns="default+onnxruntime") - if onnx_plugs or use_control_flow_dispatcher: - from experimental_experiment.torch_interpreter import Dispatcher - - if use_control_flow_dispatcher: - from .control_flow_onnx import create_global_dispatcher - - control_flow_dispatcher = create_global_dispatcher() - else: - control_flow_dispatcher = None - - class MainDispatcher(Dispatcher): - def __init__(self, previous_dispatcher=None): - super().__init__({}) - self.previous_dispatcher = previous_dispatcher - - @property - def supported(self): - if self.previous_dispatcher: - return ( - set(self.registered_functions) | self.previous_dispatcher.supported - ) - return set(self.registered_functions) - - def find_function(self, name: Any): - if self.previous_dispatcher: - find = self.previous_dispatcher.find_function(name) - if find: - return find - return Dispatcher.find_function(self, name) - - def find_method(self, name: Any): - if self.previous_dispatcher: - find = self.previous_dispatcher.find_method(name) - if find: - return find - return Dispatcher.find_method(self, name) - - main_dispatcher = MainDispatcher(control_flow_dispatcher) - if onnx_plugs: - for plug in onnx_plugs: - main_dispatcher.registered_functions[plug.target_name] = ( - plug.custom_converter() - ) - else: - main_dispatcher = None + main_dispatcher = ( + get_main_dispatcher(use_control_flow_dispatcher, onnx_plugs) + if onnx_plugs or use_control_flow_dispatcher + else None + ) return _to_onnx( mod, @@ -181,9 +187,17 @@ def find_method(self, name: Any): import onnx_ir as ir import onnx_ir.passes.common as common_passes + opset = ( + 18 + if target_opset is None + else (target_opset if isinstance(target_opset, int) else target_opset[""]) + ) + irfunctions = [ ir.from_proto( - plug.get_function_proto(*flatten_object((args, kwargs), drop_keys=True)) + plug.get_function_proto( + opset, *flatten_object((args, kwargs), drop_keys=True) + ) ) for plug in onnx_plugs ] diff --git a/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py b/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py index 98c1e29b..6fb742af 100644 --- a/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py +++ b/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py @@ -262,12 +262,14 @@ def qwen_version_selector(opset: int, *args: torch.Tensor) -> Tuple[str, torch.d itype = torch_dtype_to_onnx_dtype(dtype) if strategy is not None: return strategy, itype - if dtype == torch.float32: + if dtype == torch.float32 or itype == onnx.TensorProto.FLOAT: if opset >= 24: return "LOOPA24", itype return "LOOPMHA", itype - if dtype == torch.float16: - if first_tensor.is_cuda: + if dtype == torch.float16 or itype == onnx.TensorProto.FLOAT16: + # first_tensor may be a SymbolicTensor (onnx). + # is_cuda is not available. + if hasattr(first_tensor, "is_cuda") and first_tensor.is_cuda: return "PACKED", itype return "LOOPMHA", itype raise AssertionError( @@ -638,12 +640,14 @@ def forward( self.config._attn_implementation ] - is_sdpa = ( + is_sdpa_or_eager = ( attention_interface is transformers.integrations.sdpa_attention.sdpa_attention_forward or attention_interface is patched_sdpa_attention_forward + or attention_interface + is transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.eager_attention_forward ) - if is_sdpa: + if is_sdpa_or_eager: attn_output = qwen_sdpa_attention_versatile( query_states, key_states, diff --git a/onnx_diagnostic/torch_models/code_sample.py b/onnx_diagnostic/torch_models/code_sample.py index cdcd8970..61053aaf 100644 --- a/onnx_diagnostic/torch_models/code_sample.py +++ b/onnx_diagnostic/torch_models/code_sample.py @@ -236,7 +236,7 @@ def code_sample( ) ) """ - model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id( + model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id( model_id, subfolder, same_as_pretrained=same_as_pretrained, @@ -256,6 +256,7 @@ def code_sample( model_kwargs=mop, subfolder=subfolder, add_second_input=False, + submodule=submodule, ) if drop_inputs: update = {} diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py index 62052bf7..8a6df2e3 100644 --- a/onnx_diagnostic/torch_models/hghub/model_inputs.py +++ b/onnx_diagnostic/torch_models/hghub/model_inputs.py @@ -26,17 +26,26 @@ def _code_needing_rewriting(model: Any) -> Any: def _preprocess_model_id( - model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool -) -> Tuple[str, Optional[str], bool, bool]: + model_id: str, + subfolder: Optional[str], + same_as_pretrained: bool, + use_pretrained: bool, + submodule: Optional[str] = None, +) -> Tuple[str, Optional[str], bool, bool, Optional[str]]: + if "::" in model_id: + assert ( + not submodule + ), f"submodule={submodule!r} cannot be defined in model_id={model_id!r} as well" + model_id, submodule = model_id.split("::", maxsplit=1) if subfolder or "//" not in model_id: - return model_id, subfolder, same_as_pretrained, use_pretrained + return model_id, subfolder, same_as_pretrained, use_pretrained, submodule spl = model_id.split("//") if spl[-1] == "pretrained": - return _preprocess_model_id("//".join(spl[:-1]), "", True, True) + return _preprocess_model_id("//".join(spl[:-1]), "", True, True, submodule) if spl[-1] in {"transformer", "vae"}: # known subfolder - return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained - return model_id, subfolder, same_as_pretrained, use_pretrained + return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained, submodule + return model_id, subfolder, same_as_pretrained, use_pretrained, submodule def get_untrained_model_with_inputs( @@ -54,6 +63,7 @@ def get_untrained_model_with_inputs( subfolder: Optional[str] = None, use_only_preinstalled: bool = False, config_reduction: Optional[Callable[[Any, str], Dict]] = None, + submodule: Optional[str] = None, ) -> Dict[str, Any]: """ Gets a non initialized model similar to the original model @@ -82,6 +92,7 @@ def get_untrained_model_with_inputs( `, this function takes a configuration and a task (string) as arguments + :param submodule: use a submodule instead of the main model :return: dictionary with a model, inputs, dynamic shapes, and the configuration, some necessary rewriting as well @@ -108,11 +119,12 @@ def get_untrained_model_with_inputs( f"model_id={model_id!r}, preinstalled model is only available " f"if use_only_preinstalled is False." ) - model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id( + model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id( model_id, subfolder, same_as_pretrained=same_as_pretrained, use_pretrained=use_pretrained, + submodule=submodule, ) if verbose: print( @@ -147,6 +159,8 @@ def get_untrained_model_with_inputs( if verbose: print(f"[get_untrained_model_with_inputs] architecture={arch!r}") print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}") + if submodule: + print(f"[get_untrained_model_with_inputs] submodule={submodule!r}") if task is None: task = task_from_arch(arch, model_id=model_id, subfolder=subfolder) if verbose: @@ -357,6 +371,19 @@ def get_untrained_model_with_inputs( if diff_config is not None: res["dump_info"] = dict(config_diff=diff_config) + if submodule: + path = submodule.split("::") if "::" in submodule else [submodule] + for p in path: + assert hasattr(model, p), ( + f"Unable to find submodule {p!r} in in class {type(model)}, " + f"submodule={submodule!r}, possible candidates: " + f"{[k for k in dir(model) if isinstance(getattr(model, k), torch.nn.Module)]}" + ) + model = getattr(model, p) + + if verbose: + print(f"[get_untrained_model_with_inputs] model class={model.__class__.__name__!r}") + sizes = compute_model_size(model) res["model"] = model res["configuration"] = config diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py index 962eacc9..644d158f 100644 --- a/onnx_diagnostic/torch_models/validate.py +++ b/onnx_diagnostic/torch_models/validate.py @@ -349,13 +349,15 @@ def _prepare_validation( verbose, output_names, dump_folder, + submodule, ): main_validation_begin = time.perf_counter() - model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id( + model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id( model_id, subfolder, same_as_pretrained=same_as_pretrained, use_pretrained=use_pretrained, + submodule=submodule, ) time_preprocess_model_id = time.perf_counter() - main_validation_begin patch_kwargs = make_patch_kwargs(patch=patch, rewrite=rewrite) @@ -364,6 +366,7 @@ def _prepare_validation( summary.update( dict( version_model_id=model_id, + version_submodule=submodule, version_do_run=str(do_run), version_dtype=str(dtype or ""), version_device=str(device or ""), @@ -444,6 +447,7 @@ def _prepare_validation( dump_folder, folder_name, patch_kwargs, + submodule, ) @@ -460,6 +464,7 @@ def _get_untrained_model_with_inputs( inputs2, quiet, dump_folder, + submodule, ): iop = input_options or {} mop = model_options or {} @@ -480,6 +485,7 @@ def _get_untrained_model_with_inputs( model_kwargs=mop, subfolder=sub, add_second_input=i2, + submodule=submodule, ) ) ), @@ -842,6 +848,7 @@ def validate_model( ort_logs: bool = False, quiet_input_sets: Optional[Set[str]] = None, save_ep: Optional[str] = None, + submodule: Optional[str] = None, ) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]: """ Validates a model. @@ -902,6 +909,7 @@ def validate_model( even if quiet is False :param save_ep: if not empty, this can be used to save the input sets and the exported program + :param submodule: to test not the model but a submodule of this model :return: two dictionaries, one with some metrics, another one with whatever the function produces @@ -966,6 +974,7 @@ def validate_model( use_pretrained=use_pretrained, same_as_pretrained=same_as_pretrained, save_ep=save_ep, + submodule=submodule, ) if dump_folder: with open(dump_stats, "w") as f: @@ -1053,6 +1062,7 @@ def _validate_model_step1( use_pretrained, same_as_pretrained, save_ep, + submodule, ): assert not do_same or do_run, ( f"Discrepancies cannot be measured if the model is not run, " @@ -1067,6 +1077,7 @@ def _validate_model_step1( dump_folder, folder_name, patch_kwargs, + submodule, ) = _prepare_validation( model_id=model_id, subfolder=subfolder, @@ -1093,6 +1104,7 @@ def _validate_model_step1( verbose=verbose, output_names=output_names, dump_folder=dump_folder, + submodule=submodule, ) data, iop, mop = _get_untrained_model_with_inputs( @@ -1108,6 +1120,7 @@ def _validate_model_step1( inputs2=inputs2, quiet=quiet, dump_folder=dump_folder, + submodule=submodule, ) second_input_keys = [k for k in data if k.startswith("inputs") and k != "inputs"]