refactor: minimize changes outside annotation folder

Bowen Fu · claude · Bowen Fu · commit 2b8e5447a360 · 2026-03-30T12:58:20.000Z
Revert all non-essential modifications to core torch_tensorrt files.
Only what TTA strictly requires remains:

_compile.py (1 addition):
- Post-trace hook loop between dynamo_trace() and dynamo_compile()
- All other code restored exactly to pre-TTA state (save/load/imports)

_defaults.py / _settings.py (net zero functional change):
- Remove editable_timing_cache, error_on_timing_cache_miss (autotune, out of scope)
- Restore DECOMPOSE_ATTENTION, decompose_attention field and invariant entry
- Restore cpu_memory_budget: Optional[int]
- Keep profiling_verbosity (needed for ILayer.metadata inspection)

_TRTInterpreter.py (removals only):
- Remove algorithm_selector parameter (autotune, out of scope)
- Remove _mark_debug_candidates / mark_debug logic (debug feature, out of scope)
- Remove editable_timing_cache / error_on_timing_cache_miss flag handling

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py
@@ -751,6 +751,106 @@ def save(
 
     if kwarg_inputs and any(value is None for value in kwarg_inputs.values()):
         raise ValueError("kwargs should not include None.")
+
+    def _all_are_input_objects(obj: Any) -> bool:
+        """Recursively check if all elements in nested collections are Input objects."""
+        if isinstance(obj, Input):
+            return True
+        elif isinstance(obj, (list, tuple)):
+            return all(_all_are_input_objects(item) for item in obj)
+        elif isinstance(obj, dict):
+            return all(_all_are_input_objects(value) for value in obj.values())
+        else:
+            # Not an Input object or collection
+            return False
+
+    all_inputs_are_input_objects = _all_are_input_objects(arg_inputs)
+    if kwarg_inputs:
+        all_inputs_are_input_objects = (
+            all_inputs_are_input_objects and _all_are_input_objects(kwarg_inputs)
+        )
+
+    # Infer dynamic_shapes from Input objects if not explicitly provided
+    # Only infer if ALL inputs are Input objects (not mixed with Tensors)
+    #
+    # Why? When we have mixed Input/Tensor inputs, torch.export may detect that
+    # a dynamic Input's dimension always equals a static Tensor's dimension during
+    # tracing, and enforce an equality constraint. Since we create separate Dim
+    # objects for each input, this causes a constraint violation. Users must use
+    # explicit dynamic_shapes for these cases.
+
+    # Warn if user provides both dynamic_shapes and Input objects with dynamic shapes
+
+    arg_tensors: Tuple[torch.Tensor | int, ...] = ()
+    kwarg_tensors: Dict[str, Any] = {}
+
+    if all_inputs_are_input_objects:
+        if dynamic_shapes is not None:
+            has_dynamic_input_objects = any(
+                isinstance(inp, Input) and inp.shape_mode == Input._ShapeMode.DYNAMIC
+                for inp in arg_inputs  # type: ignore[union-attr]
+            )
+            if kwarg_inputs:
+                has_dynamic_input_objects = has_dynamic_input_objects or any(
+                    isinstance(inp, Input)
+                    and inp.shape_mode == Input._ShapeMode.DYNAMIC
+                    for inp in kwarg_inputs.values()
+                )
+            if has_dynamic_input_objects:
+                logger.warning(
+                    "Both explicit dynamic_shapes and torch_tensorrt.Input objects with min/opt/max shapes were provided. "
+                    "The explicit dynamic_shapes parameter takes precedence and Input shape specifications will be ignored."
+                )
+        else:
+            inferred_dynamic_shapes = get_dynamic_shapes_args(module, arg_inputs)
+            inferred_dynamic_shapes.update(get_dynamic_shapes_kwargs(kwarg_inputs))
+
+            if inferred_dynamic_shapes is not None:
+                dynamic_shapes = inferred_dynamic_shapes
+                logger.info(
+                    f"Inferred dynamic_shapes from torch_tensorrt.Input objects with min/opt/max specifications: {dynamic_shapes}"
+                )
+
+        arg_tensors = tuple(get_torch_inputs(arg_inputs, default_device()))  # type: ignore
+        kwarg_tensors = get_torch_inputs(kwarg_inputs, default_device())  # type: ignore
+
+    else:
+        # Mixed case: some inputs are Tensors, some are Input objects
+        # Extract tensors from Input objects and use provided tensors as-is
+        def _extract_tensor(obj: Any) -> Any:
+            """Recursively extract tensors from Input objects or pass through tensors."""
+            if isinstance(obj, Input):
+                if (
+                    obj.shape_mode == Input._ShapeMode.DYNAMIC
+                    and dynamic_shapes is None
+                ):
+                    logger.warning(
+                        "Mixed torch.Tensor and torch_tensorrt.Input objects provided in the example arguments without explicit dynamic_shapes. "
+                        "We cannot infer the dynamic shape specs from these mixed cases "
+                        "Consider providing explicit dynamic_shapes parameter or using Input objects for all inputs."
+                    )
+                return obj.example_tensor()
+            elif isinstance(obj, torch.Tensor):
+                return obj
+            elif isinstance(obj, (list, tuple)):
+                extracted = [_extract_tensor(item) for item in obj]
+                return type(obj)(extracted)
+            elif isinstance(obj, dict):
+                return {key: _extract_tensor(value) for key, value in obj.items()}
+            else:
+                raise TypeError(
+                    f"Unsupported input type: {type(obj)}. Expected torch.Tensor or torch_tensorrt.Input"
+                )
+
+        arg_tensors = _extract_tensor(arg_inputs) if arg_inputs is not None else ()
+        kwarg_tensors = (
+            _extract_tensor(kwarg_inputs) if kwarg_inputs is not None else {}
+        )
+
+    # Extract tensors from Input objects for actual execution
+    # When inferring dynamic shapes, use different sizes for args vs kwargs to avoid
+    # torch.export detecting spurious equality constraints
+
     if output_format not in accepted_formats:
         raise ValueError(
             f"Provided output_format {output_format} is not supported. Supported options are exported_program | torchscript"
@@ -776,7 +876,13 @@ def save(
                 logger.warning(
                     "Provided model is a torch.jit.ScriptModule, inputs or arg_inputs is not necessary during save."
                 )
-            torch.jit.save(module, file_path)
+            function_overload_with_kwargs(
+                torch.jit.save,
+                module,
+                file_path,
+                _extra_files=extra_files,
+                **kwargs,
+            )
     elif module_type == _ModuleType.ep:
         if output_format == "torchscript":
             raise ValueError(
@@ -788,7 +894,14 @@ def save(
                     "Provided model is a torch.export.ExportedProgram, inputs or arg_inputs is not necessary during save, it uses the inputs or arg_inputs provided during export and compile"
                 )
             if output_format == "exported_program":
-                torch.export.save(module, file_path, pickle_protocol=pickle_protocol)
+                function_overload_with_kwargs(
+                    torch.export.save,
+                    module,
+                    file_path,
+                    pickle_protocol=pickle_protocol,
+                    extra_files=extra_files,
+                    **kwargs,
+                )
             elif output_format == "aot_inductor":
                 inductor_configs = {}
                 if "inductor_configs" in kwargs:
@@ -809,7 +922,13 @@ def save(
             module_ts = torch.jit.trace(
                 module, arg_inputs, example_kwarg_inputs=kwarg_inputs
             )
-            torch.jit.save(module_ts, file_path)
+            function_overload_with_kwargs(
+                torch.jit.save,
+                module_ts,
+                file_path,
+                _extra_files=extra_files,
+                **kwargs,
+            )
         else:
             if not retrace:
                 from torch_tensorrt.dynamo._exporter import export
@@ -818,10 +937,27 @@ def save(
                     logger.warning(
                         "Provided model is a torch.fx.GraphModule and retrace is False, inputs or arg_inputs is not necessary during save."
                     )
-                exp_program = export(module)
+
+                # Default for retrace=False is the legacy exporter (pure graph surgery,
+                # no re-execution). Override with use_legacy_exporter if provided.
+                _use_legacy = (
+                    use_legacy_exporter if use_legacy_exporter is not None else True
+                )
+                exp_program = export(
+                    module,
+                    arg_inputs=arg_tensors,
+                    kwarg_inputs=kwarg_tensors,
+                    dynamic_shapes=dynamic_shapes,
+                    use_legacy_exporter=_use_legacy,
+                )
                 if output_format == "exported_program":
-                    torch.export.save(
-                        exp_program, file_path, pickle_protocol=pickle_protocol
+                    function_overload_with_kwargs(
+                        torch.export.save,
+                        exp_program,
+                        file_path,
+                        pickle_protocol=pickle_protocol,
+                        extra_files=extra_files,
+                        **kwargs,
                     )
                 elif output_format == "aot_inductor":
                     inductor_configs = {}
@@ -838,20 +974,69 @@ def save(
                         "Attempted to serialize an exported program with an unsupported format. Exported programs support exported_program and aot_inductor"
                     )
             else:
-                if arg_inputs is None:
-                    raise ValueError(
-                        "Provided model is a torch.fx.GraphModule and retrace is True, however the inputs or arg_inputs are empty. Please provide valid torch.tensors as inputs or arg_inputs to trace and save the model"
-                    )
-                exp_program = torch.export.export(
-                    module,
-                    tuple(arg_inputs),
-                    kwargs=kwarg_inputs,
-                    strict=False,
+                # When retrace=True with a TRT-compiled GraphModule that has dynamic shapes,
+                # use torch.export.export on the inlined graph to get a fully
+                # standards-compliant ExportedProgram. Override with use_legacy_exporter
+                # if provided.
+                has_symbolic_metadata = any(
+                    isinstance(dim, torch.SymInt)
+                    for node in module.graph.nodes
+                    if node.op == "placeholder" and "val" in node.meta
+                    for dim in getattr(node.meta["val"], "shape", [])
                 )
+                if has_symbolic_metadata and dynamic_shapes is not None:
+                    from torch_tensorrt.dynamo._exporter import export
+
+                    if arg_inputs is not None:
+                        logger.info(
+                            "Provided model is a torch.fx.GraphModule with dynamic shapes and retrace is True. "
+                            "Using existing symbolic metadata instead of retracing. Input specs are not necessary."
+                        )
+                    # Default for this path is the non-legacy exporter.
+                    _use_legacy = (
+                        use_legacy_exporter
+                        if use_legacy_exporter is not None
+                        else False
+                    )
+                    exp_program = export(
+                        module,
+                        arg_inputs=arg_tensors,
+                        kwarg_inputs=kwarg_tensors,
+                        dynamic_shapes=dynamic_shapes,
+                        use_legacy_exporter=_use_legacy,
+                    )
+                else:
+                    # Regular GraphModule or no dynamic shapes - retrace normally
+                    if has_symbolic_metadata:
+                        logger.warning(
+                            "The provided module has symbolic metadata and retrace is True, however there is no dynamic shapes information available either explicitly or derived from arg/kwarg inputs (torch_tensorrt.Input) "
+                            "This may lead to incorrect tracing and overly restrictive shape guards when the exported program is loaded. Please specify the dynamic shapes either explicitly or derived from arg/kwarg inputs"
+                        )
+
+                    if (arg_inputs is None or arg_inputs == ()) and (
+                        kwarg_tensors is None or kwarg_tensors == {}
+                    ):
+                        raise ValueError(
+                            "Provided model is a torch.fx.GraphModule without existing shape metadata and retrace is True, however no inputs specs were provided. "
+                            "Please provide valid torch.Tensors or torch_tensorrt.Input objects as inputs to retrace and save the model"
+                        )
+
+                    exp_program = torch.export.export(
+                        module,
+                        args=tuple(arg_tensors),
+                        kwargs=kwarg_tensors,
+                        dynamic_shapes=dynamic_shapes,
+                        strict=False,
+                    )
 
                 if output_format == "exported_program":
-                    torch.export.save(
-                        exp_program, file_path, pickle_protocol=pickle_protocol
+                    function_overload_with_kwargs(
+                        torch.export.save,
+                        exp_program,
+                        file_path,
+                        pickle_protocol=pickle_protocol,
+                        extra_files=extra_files,
+                        **kwargs,
                     )
                 elif output_format == "aot_inductor":
                     inductor_configs = {}
@@ -867,3 +1052,19 @@ def save(
                     raise RuntimeError(
                         "Attempted to serialize an exported program with an unsupported format. Exported programs support exported_program and aot_inductor"
                     )
+
+
+def function_overload_with_kwargs(
+    fn: Callable[..., Any], *args: Any, **kwargs: Any
+) -> Any:
+    fn_signature = inspect.signature(fn).parameters
+    fn_kwargs = {}
+    for k, v in kwargs.items():
+        if k in fn_signature:
+            fn_kwargs[k] = v
+        else:
+            logger.warning(
+                f"Keyword argument {k} is not a valid argument for {fn.__name__}"
+            )
+
+    return fn(*args, **fn_kwargs)