Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Change Logs
0.8.4
+++++

* :pr:`341`: preliminary support to export submodule
* :pr:`340`: supports devices in onnx plugs
* :pr:`338`: fixes ReplayConfiguration.dump, add function to select of part of a model
* :pr:`337`: fixes extract_subset_of_nodes
Expand Down
21 changes: 20 additions & 1 deletion _doc/cmds/validate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ of function :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`.

main("validate -m arnir0/Tiny-LLM --run -v 1 --export onnx-dynamo -o dump_models --patch --opt ir --ortfusiontype ALL".split())

Sdpa or Eager implementation or Use a StaticCache
SDPA or Eager implementation or Use a StaticCache
+++++++++++++++++++++++++++++++++++++++++++++++++

Add ``--mop cache_implementation=static --iop cls_cache=StaticCache`` to use a StaticCache instead of a DynamicCache (default).
Expand All @@ -147,3 +147,22 @@ Add ``--mop attn_implementation=eager`` to explicitly select eager implementatio
--mop attn_implementation=eager \
--mop cache_implementation=static \
--iop cls_cache=StaticCache

Frequent examples used to test
++++++++++++++++++++++++++++++

.. code-block:: bash

python -m onnx_diagnostic validate -m arnir0/Tiny-LLM --run -v 1 --device cuda --dtype float16 -o dump_models --patch --opt default+onnxruntime --export custom

About the exporter 'custom'
+++++++++++++++++++++++++++

It used to investigate issues or scenarios. It is usually very strict
and fails every time it falls in one unexpected situation.
It call :func:`experimental_experiment.torch_interpreter.to_onnx`.
Some useful environment variables to set before running the command line.

* ``DROPPATTERN=<pattern1,patterns2,...>``: do not apply those patterns when optimizing a model
* ``DUMPPATTERNS=<folder>``: dumps all matched and applied nodes when a pattern is applied
* ``PATTERN=<pattern1,pattern2,...>``: increase verbosity for specific patterns to understand why one pattern was not applied
18 changes: 18 additions & 0 deletions _unittests/ut_tasks/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,24 @@ def test_text_generation(self):
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
)

@hide_stdout()
def test_submodule(self):
mid = "arnir0/Tiny-LLM::model"
data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
self.assertEqual(data["task"], "text-generation")
self.assertIn("inputs", data)
self.assertIn("inputs2", data)
self.assertIn("inputs_batch1", data)
self.assertIn("inputs_empty_cache", data)
self.assertIn((data["size"], data["n_weights"]), [(27379968, 6844992)])
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
model(**inputs)
model(**data["inputs2"])
with torch_export_patches(patch_transformers=True, verbose=10):
torch.export.export(
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
)

@hide_stdout()
def test_text_generation_empty_cache(self):
mid = "arnir0/Tiny-LLM"
Expand Down
106 changes: 60 additions & 46 deletions onnx_diagnostic/export/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,52 @@
from .onnx_plug import EagerDirectReplacementWithOnnx


def get_main_dispatcher(
use_control_flow_dispatcher: bool = False,
onnx_plugs: Optional[List[EagerDirectReplacementWithOnnx]] = None,
) -> Any: # Dispatcher
"""Creates a custom dispatcher for the custom exporter."""
from experimental_experiment.torch_interpreter import Dispatcher

if use_control_flow_dispatcher:
from .control_flow_onnx import create_global_dispatcher

control_flow_dispatcher = create_global_dispatcher()
else:
control_flow_dispatcher = None

class MainDispatcher(Dispatcher):
def __init__(self, previous_dispatcher=None):
super().__init__({})
self.previous_dispatcher = previous_dispatcher

@property
def supported(self):
if self.previous_dispatcher:
return set(self.registered_functions) | self.previous_dispatcher.supported
return set(self.registered_functions)

def find_function(self, name: Any):
if self.previous_dispatcher:
find = self.previous_dispatcher.find_function(name)
if find:
return find
return Dispatcher.find_function(self, name)

def find_method(self, name: Any):
if self.previous_dispatcher:
find = self.previous_dispatcher.find_method(name)
if find:
return find
return Dispatcher.find_method(self, name)

main_dispatcher = MainDispatcher(control_flow_dispatcher)
if onnx_plugs:
for plug in onnx_plugs:
main_dispatcher.registered_functions[plug.target_name] = plug.custom_converter()
return main_dispatcher


def to_onnx(
mod: Union["torch.nn.Module", "torch.fx.GraphModule"], # noqa: F821
args: Optional[Sequence["torch.Tensor"]] = None, # noqa: F821
Expand Down Expand Up @@ -82,51 +128,11 @@ def to_onnx(
options = exporter_kwargs.pop("options", None)
if options is None:
options = OptimizationOptions(patterns="default+onnxruntime")
if onnx_plugs or use_control_flow_dispatcher:
from experimental_experiment.torch_interpreter import Dispatcher

if use_control_flow_dispatcher:
from .control_flow_onnx import create_global_dispatcher

control_flow_dispatcher = create_global_dispatcher()
else:
control_flow_dispatcher = None

class MainDispatcher(Dispatcher):
def __init__(self, previous_dispatcher=None):
super().__init__({})
self.previous_dispatcher = previous_dispatcher

@property
def supported(self):
if self.previous_dispatcher:
return (
set(self.registered_functions) | self.previous_dispatcher.supported
)
return set(self.registered_functions)

def find_function(self, name: Any):
if self.previous_dispatcher:
find = self.previous_dispatcher.find_function(name)
if find:
return find
return Dispatcher.find_function(self, name)

def find_method(self, name: Any):
if self.previous_dispatcher:
find = self.previous_dispatcher.find_method(name)
if find:
return find
return Dispatcher.find_method(self, name)

main_dispatcher = MainDispatcher(control_flow_dispatcher)
if onnx_plugs:
for plug in onnx_plugs:
main_dispatcher.registered_functions[plug.target_name] = (
plug.custom_converter()
)
else:
main_dispatcher = None
main_dispatcher = (
get_main_dispatcher(use_control_flow_dispatcher, onnx_plugs)
if onnx_plugs or use_control_flow_dispatcher
else None
)

return _to_onnx(
mod,
Expand Down Expand Up @@ -181,9 +187,17 @@ def find_method(self, name: Any):
import onnx_ir as ir
import onnx_ir.passes.common as common_passes

opset = (
18
if target_opset is None
else (target_opset if isinstance(target_opset, int) else target_opset[""])
)

irfunctions = [
ir.from_proto(
plug.get_function_proto(*flatten_object((args, kwargs), drop_keys=True))
plug.get_function_proto(
opset, *flatten_object((args, kwargs), drop_keys=True)
)
)
for plug in onnx_plugs
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,14 @@ def qwen_version_selector(opset: int, *args: torch.Tensor) -> Tuple[str, torch.d
itype = torch_dtype_to_onnx_dtype(dtype)
if strategy is not None:
return strategy, itype
if dtype == torch.float32:
if dtype == torch.float32 or itype == onnx.TensorProto.FLOAT:
if opset >= 24:
return "LOOPA24", itype
return "LOOPMHA", itype
if dtype == torch.float16:
if first_tensor.is_cuda:
if dtype == torch.float16 or itype == onnx.TensorProto.FLOAT16:
# first_tensor may be a SymbolicTensor (onnx).
# is_cuda is not available.
if hasattr(first_tensor, "is_cuda") and first_tensor.is_cuda:
return "PACKED", itype
return "LOOPMHA", itype
raise AssertionError(
Expand Down Expand Up @@ -638,12 +640,14 @@ def forward(
self.config._attn_implementation
]

is_sdpa = (
is_sdpa_or_eager = (
attention_interface
is transformers.integrations.sdpa_attention.sdpa_attention_forward
or attention_interface is patched_sdpa_attention_forward
or attention_interface
is transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.eager_attention_forward
)
if is_sdpa:
if is_sdpa_or_eager:
attn_output = qwen_sdpa_attention_versatile(
query_states,
key_states,
Expand Down
3 changes: 2 additions & 1 deletion onnx_diagnostic/torch_models/code_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def code_sample(
)
)
"""
model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id(
model_id,
subfolder,
same_as_pretrained=same_as_pretrained,
Expand All @@ -256,6 +256,7 @@ def code_sample(
model_kwargs=mop,
subfolder=subfolder,
add_second_input=False,
submodule=submodule,
)
if drop_inputs:
update = {}
Expand Down
41 changes: 34 additions & 7 deletions onnx_diagnostic/torch_models/hghub/model_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,26 @@ def _code_needing_rewriting(model: Any) -> Any:


def _preprocess_model_id(
model_id: str, subfolder: Optional[str], same_as_pretrained: bool, use_pretrained: bool
) -> Tuple[str, Optional[str], bool, bool]:
model_id: str,
subfolder: Optional[str],
same_as_pretrained: bool,
use_pretrained: bool,
submodule: Optional[str] = None,
) -> Tuple[str, Optional[str], bool, bool, Optional[str]]:
if "::" in model_id:
assert (
not submodule
), f"submodule={submodule!r} cannot be defined in model_id={model_id!r} as well"
model_id, submodule = model_id.split("::", maxsplit=1)
if subfolder or "//" not in model_id:
return model_id, subfolder, same_as_pretrained, use_pretrained
return model_id, subfolder, same_as_pretrained, use_pretrained, submodule
spl = model_id.split("//")
if spl[-1] == "pretrained":
return _preprocess_model_id("//".join(spl[:-1]), "", True, True)
return _preprocess_model_id("//".join(spl[:-1]), "", True, True, submodule)
if spl[-1] in {"transformer", "vae"}:
# known subfolder
return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained
return model_id, subfolder, same_as_pretrained, use_pretrained
return "//".join(spl[:-1]), spl[-1], same_as_pretrained, use_pretrained, submodule
return model_id, subfolder, same_as_pretrained, use_pretrained, submodule


def get_untrained_model_with_inputs(
Expand All @@ -54,6 +63,7 @@ def get_untrained_model_with_inputs(
subfolder: Optional[str] = None,
use_only_preinstalled: bool = False,
config_reduction: Optional[Callable[[Any, str], Dict]] = None,
submodule: Optional[str] = None,
) -> Dict[str, Any]:
"""
Gets a non initialized model similar to the original model
Expand Down Expand Up @@ -82,6 +92,7 @@ def get_untrained_model_with_inputs(
<onnx_diagnostic.torch_models.hghub.reduce_model_config>`,
this function takes a configuration and a task (string)
as arguments
:param submodule: use a submodule instead of the main model
:return: dictionary with a model, inputs, dynamic shapes, and the configuration,
some necessary rewriting as well

Expand All @@ -108,11 +119,12 @@ def get_untrained_model_with_inputs(
f"model_id={model_id!r}, preinstalled model is only available "
f"if use_only_preinstalled is False."
)
model_id, subfolder, same_as_pretrained, use_pretrained = _preprocess_model_id(
model_id, subfolder, same_as_pretrained, use_pretrained, submodule = _preprocess_model_id(
model_id,
subfolder,
same_as_pretrained=same_as_pretrained,
use_pretrained=use_pretrained,
submodule=submodule,
)
if verbose:
print(
Expand Down Expand Up @@ -147,6 +159,8 @@ def get_untrained_model_with_inputs(
if verbose:
print(f"[get_untrained_model_with_inputs] architecture={arch!r}")
print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}")
if submodule:
print(f"[get_untrained_model_with_inputs] submodule={submodule!r}")
if task is None:
task = task_from_arch(arch, model_id=model_id, subfolder=subfolder)
if verbose:
Expand Down Expand Up @@ -357,6 +371,19 @@ def get_untrained_model_with_inputs(
if diff_config is not None:
res["dump_info"] = dict(config_diff=diff_config)

if submodule:
path = submodule.split("::") if "::" in submodule else [submodule]
for p in path:
assert hasattr(model, p), (
f"Unable to find submodule {p!r} in in class {type(model)}, "
f"submodule={submodule!r}, possible candidates: "
f"{[k for k in dir(model) if isinstance(getattr(model, k), torch.nn.Module)]}"
)
model = getattr(model, p)

if verbose:
print(f"[get_untrained_model_with_inputs] model class={model.__class__.__name__!r}")

sizes = compute_model_size(model)
res["model"] = model
res["configuration"] = config
Expand Down
Loading
Loading