From 5aa61380e442a6d70065bba1748e62cb42520250 Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Wed, 21 Jan 2026 20:28:21 -0800
Subject: [PATCH 01/13] test

---
 MODULE.bazel                     | 26 +++++++++++++-------------
 setup.py                         |  4 ++--
 third_party/tensorrt/local/BUILD | 14 +++++++-------
 3 files changed, 22 insertions(+), 22 deletions(-)
diff --git a/MODULE.bazel b/MODULE.bazel
index 625ddbea6b..4f7af984a9 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -98,14 +98,14 @@ http_archive(
 # Either place them in the distdir directory in third_party and use the --distdir flag
 # or modify the urls to "file:///<PATH TO TARBALL>/<TARBALL NAME>.tar.gz
 
-http_archive(
-    name = "tensorrt",
-    build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.14.1.48",
-    urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.14.1/tars/TensorRT-10.14.1.48.Linux.x86_64-gnu.cuda-13.0.tar.gz",
-    ],
-)
+# http_archive(
+#    name = "tensorrt",
+#    build_file = "@//third_party/tensorrt/archive:BUILD",
+#    strip_prefix = "TensorRT-10.14.1.48",
+#    urls = [
+#        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.14.1/tars/TensorRT-10.14.1.48.Linux.x86_64-gnu.cuda-13.0.tar.gz",
+#    ],
+#)
 
 http_archive(
     name = "tensorrt_rtx",
@@ -164,8 +164,8 @@ http_archive(
 #    build_file = "third_party/libtorch/BUILD"
 #)
 
-#new_local_repository(
-#   name = "tensorrt",
-#   path = "/usr/",
-#   build_file = "@//third_party/tensorrt/local:BUILD"
-#)
+new_local_repository(
+   name = "tensorrt",
+   path = "/home/lanl/Downloads/cmake_build/packages/TensorRT-10.16.0.35",
+   build_file = "@//third_party/tensorrt/local:BUILD"
+)
diff --git a/setup.py b/setup.py
index 8e9e0231c9..085a31b7c7 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@
 
 LEGACY_BASE_VERSION_SUFFIX_PATTERN = re.compile("a0$")
 # CI_PIPELINE_ID is the environment variable set by DLFW ci build
-IS_DLFW_CI = os.environ.get("CI_PIPELINE_ID") is not None
+IS_DLFW_CI = True # os.environ.get("CI_PIPELINE_ID") is not None
 
 
 def get_root_dir() -> Path:
@@ -95,7 +95,7 @@ def load_dep_info():
 NO_TS = False
 LEGACY = False
 RELEASE = False
-CI_BUILD = False
+CI_BUILD = True #False
 USE_TRT_RTX = False
 
 if "--use-rtx" in sys.argv:
diff --git a/third_party/tensorrt/local/BUILD b/third_party/tensorrt/local/BUILD
index b28ef63e7c..1d860b2c25 100644
--- a/third_party/tensorrt/local/BUILD
+++ b/third_party/tensorrt/local/BUILD
@@ -83,7 +83,7 @@ cc_import(
     name = "nvinfer_static_lib",
     static_library = select({
         ":aarch64_linux": "lib/aarch64-linux-gnu/libnvinfer_static.a",
-        ":ci_rhel_x86_64_linux": "lib64/libnvinfer_static.a",
+        ":ci_rhel_x86_64_linux": "lib/libnvinfer_static.a",
         ":windows": "lib/nvinfer_10.lib",
         "//conditions:default": "lib/x86_64-linux-gnu/libnvinfer_static.a",
     }),
@@ -94,7 +94,7 @@ cc_import(
     name = "nvinfer_lib",
     shared_library = select({
         ":aarch64_linux": "lib/aarch64-linux-gnu/libnvinfer.so",
-        ":ci_rhel_x86_64_linux": "lib64/libnvinfer.so",
+        ":ci_rhel_x86_64_linux": "lib/libnvinfer.so",
         ":windows": "bin/nvinfer_10.dll",
         "//conditions:default": "lib/x86_64-linux-gnu/libnvinfer.so",
     }),
@@ -122,7 +122,7 @@ cc_import(
     name = "nvparsers_lib",
     shared_library = select({
         ":aarch64_linux": "lib/aarch64-linux-gnu/libnvparsers.so",
-        ":ci_rhel_x86_64_linux": "lib64/libnvparsers.so",
+        ":ci_rhel_x86_64_linux": "lib/libnvparsers.so",
         ":windows": "lib/nvparsers.dll",
         "//conditions:default": "lib/x86_64-linux-gnu/libnvparsers.so",
     }),
@@ -186,7 +186,7 @@ cc_import(
     name = "nvonnxparser_lib",
     shared_library = select({
         ":aarch64_linux": "lib/aarch64-linux-gnu/libnvonnxparser.so",
-        ":ci_rhel_x86_64_linux": "lib64/libnvonnxparser.so",
+        ":ci_rhel_x86_64_linux": "lib/libnvonnxparser.so",
         ":windows": "lib/nvonnxparser.dll",
         "//conditions:default": "lib/x86_64-linux-gnu/libnvonnxparser.so",
     }),
@@ -242,7 +242,7 @@ cc_import(
     name = "nvonnxparser_runtime_lib",
     shared_library = select({
         ":aarch64_linux": "lib/x86_64-linux-gnu/libnvonnxparser_runtime.so",
-        ":ci_rhel_x86_64_linux": "lib64/libnvonnxparser_runtime.so",
+        ":ci_rhel_x86_64_linux": "lib/libnvonnxparser_runtime.so",
         ":windows": "lib/nvonnxparser_runtime.dll",
         "//conditions:default": "lib/x86_64-linux-gnu/libnvonnxparser_runtime.so",
     }),
@@ -290,7 +290,7 @@ cc_import(
     name = "nvcaffeparser_lib",
     shared_library = select({
         ":aarch64_linux": "lib/aarch64-linux-gnu/libnvcaffe_parsers.so",
-        ":ci_rhel_x86_64_linux": "lib64/libnvcaffe_parsers.so",
+        ":ci_rhel_x86_64_linux": "lib/libnvcaffe_parsers.so",
         ":windows": "lib/nvcaffe_parsers.dll",
         "//conditions:default": "lib/x86_64-linux-gnu/libnvcaffe_parsers.so",
     }),
@@ -338,7 +338,7 @@ cc_library(
     name = "nvinferplugin",
     srcs = select({
         ":aarch64_linux": ["lib/aarch64-linux-gnu/libnvinfer_plugin.so"],
-        ":ci_rhel_x86_64_linux": ["lib64/libnvinfer_plugin.so"],
+        ":ci_rhel_x86_64_linux": ["lib/libnvinfer_plugin.so"],
         ":windows": ["lib/nvinfer_plugin_10.lib"],
         "//conditions:default": ["lib/x86_64-linux-gnu/libnvinfer_plugin.so"],
     }),

From 456668e3fac2767d76520657b153bb8a20bf4b2d Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Sun, 22 Feb 2026 12:28:48 -0800
Subject: [PATCH 02/13] test

---
 MODULE.bazel                                  | 20 ++++----
 py/torch_tensorrt/_TensorRTProxyModule.py     | 23 ++-------
 py/torch_tensorrt/dynamo/debug/_Debugger.py   | 51 ++++++++++++++++++-
 .../dynamo/debug/_DebuggerConfig.py           |  1 +
 4 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/MODULE.bazel b/MODULE.bazel
index 07cf95b0cd..e0e44020d0 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -1,6 +1,6 @@
 module(
     name = "torch_tensorrt",
-    version = "2.12.0a0",
+    version = "2.11.0a0",
     repo_name = "org_pytorch_tensorrt",
 )
 
@@ -98,14 +98,14 @@ http_archive(
 # Either place them in the distdir directory in third_party and use the --distdir flag
 # or modify the urls to "file:///<PATH TO TARBALL>/<TARBALL NAME>.tar.gz
 
-http_archive(
-    name = "tensorrt",
-    build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.15.1.29",
-    urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.15.1/tars/TensorRT-10.15.1.29.Linux.x86_64-gnu.cuda-13.1.tar.gz",
-    ],
-)
+#http_archive(
+#    name = "tensorrt",
+#    build_file = "@//third_party/tensorrt/archive:BUILD",
+#    strip_prefix = "TensorRT-10.15.1.29",
+#    urls = [
+#        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.15.1/tars/TensorRT-10.15.1.29.Linux.x86_64-gnu.cuda-13.1.tar.gz",
+#    ],
+#)
 
 http_archive(
     name = "tensorrt_rtx",
@@ -167,5 +167,5 @@ http_archive(
 new_local_repository(
     name = "tensorrt",
     build_file = "@//third_party/tensorrt/local:BUILD",
-    path = "/home/lanl/Downloads/cmake_build/packages/TensorRT-10.16.0.35",
+    path = "/home/lanl/Downloads/cmake_build/packages/TensorRT-10.16.0.46",
 )
diff --git a/py/torch_tensorrt/_TensorRTProxyModule.py b/py/torch_tensorrt/_TensorRTProxyModule.py
index cc88953c01..75751a70d1 100644
--- a/py/torch_tensorrt/_TensorRTProxyModule.py
+++ b/py/torch_tensorrt/_TensorRTProxyModule.py
@@ -1,12 +1,11 @@
 import ctypes
 import importlib
-import importlib.util
 import importlib.metadata
+import importlib.util
 import logging
 import os
 import platform
 import sys
-import tempfile
 from types import ModuleType
 from typing import Any, Dict, List
 
@@ -54,6 +53,7 @@ def enable_capture_tensorrt_api_recording() -> None:
     elif platform.uname().processor == "aarch64":
         linux_lib_path.append("/usr/lib/aarch64-linux-gnu")
 
+    tensorrt_lib_path = None
     for path in linux_lib_path:
         if os.path.isfile(os.path.join(path, "libtensorrt_shim.so")):
             try:
@@ -74,24 +74,7 @@ def enable_capture_tensorrt_api_recording() -> None:
         os.environ["TRT_SHIM_NVINFER_LIB_NAME"] = os.path.join(
             tensorrt_lib_path, "libnvinfer.so"
         )
-        import pwd
-
-        current_user = pwd.getpwuid(os.getuid())[0]
-        shim_temp_dir = os.path.join(
-            tempfile.gettempdir(), f"torch_tensorrt_{current_user}/shim"
-        )
-        os.makedirs(shim_temp_dir, exist_ok=True)
-        json_file_name = os.path.join(shim_temp_dir, "shim.json")
-        os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] = json_file_name
-        bin_file_name = os.path.join(shim_temp_dir, "shim.bin")
-        # if exists, delete the file, so that we can capture the new one
-        if os.path.exists(json_file_name):
-            os.remove(json_file_name)
-        if os.path.exists(bin_file_name):
-            os.remove(bin_file_name)
-        _LOGGER.info(
-            f"Capturing TensorRT API calls feature is enabled and the captured output is in the {shim_temp_dir} directory"
-        )
+        _LOGGER.info("Capturing TensorRT API calls feature is enabled")
 
 
 # TensorRTProxyModule is a proxy module that allows us to register the tensorrt or tensorrt-rtx package
diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py
index e565929861..4bbf1dff5b 100644
--- a/py/torch_tensorrt/dynamo/debug/_Debugger.py
+++ b/py/torch_tensorrt/dynamo/debug/_Debugger.py
@@ -1,4 +1,5 @@
 import contextlib
+import ctypes
 import functools
 import logging
 import os
@@ -34,6 +35,7 @@ def __init__(
         capture_fx_graph_after: Optional[List[str]] = None,
         save_engine_profile: bool = False,
         capture_tensorrt_api_recording: bool = False,
+        capture_tensorrt_api_recording_json_file: str = "",
         profile_format: str = "perfetto",
         engine_builder_monitor: bool = True,
         logging_dir: str = DEBUG_LOGGING_DIR,
@@ -54,6 +56,8 @@ def __init__(
             capture_tensorrt_api_recording (bool): Whether to enable the capture TensorRT API recording feature, when this is enabled, it will output the catputure TensorRT API recording in the /tmp/torch_tensorrt_{current_user}/shim directory.
                 It is part of the TensorRT capture and replay feature, the captured output will be able to replay for debug purpose.
                 Defaults to False.
+            capture_tensorrt_api_recording_json_file (str, optional): the JSON file to save the captured TensorRT API recording.
+                If not set, the captured TensorRT API recording will be saved in the current working directory.
             profile_format (str): Format for profiling data. Choose from 'perfetto', 'trex', 'cudagraph'.
                 If you need to generate engine graph using the profiling files, set it to 'trex' and use the C++ runtime.
                 If you need to generate cudagraph visualization, set it to 'cudagraph'.
@@ -71,6 +75,7 @@ def __init__(
             log_level=log_level,
             save_engine_profile=save_engine_profile,
             capture_tensorrt_api_recording=capture_tensorrt_api_recording,
+            capture_tensorrt_api_recording_json_file=capture_tensorrt_api_recording_json_file,
             engine_builder_monitor=engine_builder_monitor,
             logging_dir=logging_dir,
             profile_format=profile_format,
@@ -103,17 +108,21 @@ def __init__(
                 _LOGGER.warning(
                     f"Capturing TensorRT API calls is only supported on Linux, therefore ignoring the capture_tensorrt_api_recording setting for {sys.platform}"
                 )
+                self.cfg.capture_tensorrt_api_recording = False
             elif ENABLED_FEATURES.tensorrt_rtx:
                 _LOGGER.warning(
                     "Capturing TensorRT API calls is not supported for TensorRT-RTX, therefore ignoring the capture_tensorrt_api_recording setting"
                 )
+                self.cfg.capture_tensorrt_api_recording = False
             else:
                 env_flag = os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE", None)
                 if env_flag is None or (env_flag != "1" and env_flag.lower() != "true"):
                     _LOGGER.warning(
                         "In order to capture TensorRT API calls, please invoke the script with environment variable TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1"
                     )
-                _LOGGER.info("Capturing TensorRT API calls feature is enabled")
+                    self.cfg.capture_tensorrt_api_recording = False
+                else:
+                    _LOGGER.info("Capturing TensorRT API calls feature is enabled")
 
     def __enter__(self) -> None:
         self.original_lvl = _LOGGER.getEffectiveLevel()
@@ -166,6 +175,8 @@ def __enter__(self) -> None:
             for c in _DEBUG_ENABLED_CLS
         ]
 
+        self.set_capture_tensorrt_api_recording_json_file()
+
     def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None:
 
         dictConfig(self.get_logging_config(None))
@@ -224,3 +235,41 @@ def get_logging_config(self, log_level: Optional[int] = None) -> dict[str, Any]:
             }
             config["loggers"][""]["handlers"].append("file")
         return config
+
+    def set_capture_tensorrt_api_recording_json_file(self) -> None:
+        if self.cfg.capture_tensorrt_api_recording is False:
+            return
+        if self.cfg.capture_tensorrt_api_recording_json_file == "":
+            return
+
+        if os.path.isdir(self.cfg.capture_tensorrt_api_recording_json_file):
+            self.cfg.capture_tensorrt_api_recording_json_file = os.path.join(
+                self.cfg.capture_tensorrt_api_recording_json_file, "capture.json"
+            )
+
+        if os.path.isfile(self.cfg.capture_tensorrt_api_recording_json_file):
+            os.remove(self.cfg.capture_tensorrt_api_recording_json_file)
+
+        nvinfer_lib = os.environ.get("TRT_SHIM_NVINFER_LIB_NAME", None)
+        if nvinfer_lib is None:
+            _LOGGER.warning(
+                "TRT_SHIM_NVINFER_LIB_NAME is not set, therefore capturing TensorRT API recording is not supported"
+            )
+            return
+        lib_path = os.path.dirname(nvinfer_lib)
+        shim_path = os.path.join(lib_path, "libtensorrt_shim.so")
+        if not os.path.isfile(shim_path):
+            _LOGGER.warning(
+                f"libtensorrt_shim.so is not found in the {lib_path} directory, therefore capturing TensorRT API recording is not supported"
+            )
+            return
+        try:
+            shim_lib = ctypes.CDLL(shim_path, mode=ctypes.RTLD_GLOBAL)
+            shim_lib.trtShimSetOutputJsonFile(
+                self.cfg.capture_tensorrt_api_recording_json_file.encode("utf-8")
+            )
+        except Exception as e:
+            _LOGGER.warning(
+                f"Failed to set the output JSON file for TensorRT API recording: {e}"
+            )
+            return
diff --git a/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py b/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py
index 82cd3ba83a..4216b09371 100644
--- a/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py
+++ b/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py
@@ -8,6 +8,7 @@ class DebuggerConfig:
     log_level: str = "debug"
     save_engine_profile: bool = False
     capture_tensorrt_api_recording: bool = False
+    capture_tensorrt_api_recording_json_file: str = ""
     engine_builder_monitor: bool = True
     logging_dir: str = DEBUG_LOGGING_DIR
     profile_format: str = "perfetto"

From 6ede9a03b57e5f56e3d14134ff4ef328dcac52f0 Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Thu, 2 Apr 2026 13:50:48 -0700
Subject: [PATCH 03/13] test

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c05079ec52..6dddefa38e 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@
 
 LEGACY_BASE_VERSION_SUFFIX_PATTERN = re.compile("a0$")
 # CI_PIPELINE_ID is the environment variable set by DLFW ci build
-IS_DLFW_CI = True  # os.environ.get("CI_PIPELINE_ID") is not None
+IS_DLFW_CI = os.environ.get("CI_PIPELINE_ID") is not None
 
 
 def get_root_dir() -> Path:

From e3fb1bd36a58fb1d5400ffb439de49703d558023 Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Thu, 2 Apr 2026 13:51:32 -0700
Subject: [PATCH 04/13] test

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6dddefa38e..8b377fc651 100644
--- a/setup.py
+++ b/setup.py
@@ -96,7 +96,7 @@ def load_dep_info():
 NO_TS = False
 LEGACY = False
 RELEASE = False
-CI_BUILD = True  # False
+CI_BUILD = False
 USE_TRT_RTX = False
 
 if "--use-rtx" in sys.argv:

From 73541c6126ae7b83dee46d99ecaa78c0e944b81c Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Thu, 2 Apr 2026 13:57:09 -0700
Subject: [PATCH 05/13] add doc change

---
 docsrc/debugging/capture_and_replay.rst | 47 +++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/docsrc/debugging/capture_and_replay.rst b/docsrc/debugging/capture_and_replay.rst
index cbd7295502..1df1f2ead3 100644
--- a/docsrc/debugging/capture_and_replay.rst
+++ b/docsrc/debugging/capture_and_replay.rst
@@ -13,6 +13,53 @@ Prerequisites
 Quick start: Capture
 --------------------
 
+Example ``test.py``:
+
+.. code-block:: python
+
+    import torch
+    import torch_tensorrt as torchtrt
+    import torchvision.models as models
+    class MyModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv1d(3, 3, 3, padding=1, stride=1, bias=True)
+
+        def forward(self, x):
+            return self.conv(x)
+
+    model = MyModule().eval().to("cuda")
+    input = torch.randn((1, 3, 3)).to("cuda").to(torch.float32)
+
+    compile_spec = {
+        "inputs": [
+            torchtrt.Input(
+                min_shape=(1, 3, 3),
+                opt_shape=(2, 3, 3),
+                max_shape=(3, 3, 3),
+                dtype=torch.float32,
+            )
+        ],
+        "min_block_size": 1,
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
+        "use_python_runtime": True,
+    }
+
+    try:
+        with torchtrt.dynamo.Debugger(
+            "graphs",
+            logging_dir="debuglogs",
+            capture_tensorrt_api_recording=True,
+            capture_tensorrt_api_recording_json_file="/tmp/capturelanlan.json",
+        ):
+            trt_mod = torchtrt.compile(model, **compile_spec)
+
+    except Exception as e:
+        raise e
+
+    print("done.....")
+
 .. code-block:: bash
 
     TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1 python test.py

From 0678dce16a0ba23442b27c49b9ed3bba67449ab3 Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Fri, 3 Apr 2026 09:42:49 -0700
Subject: [PATCH 06/13] resolve comments

---
 .gitignore                                    |  1 +
 docsrc/debugging/capture_and_replay.rst       |  6 +-
 py/torch_tensorrt/dynamo/debug/_Debugger.py   | 71 ++++++++-----------
 .../dynamo/debug/_DebuggerConfig.py           |  1 -
 4 files changed, 34 insertions(+), 45 deletions(-)

diff --git a/.gitignore b/.gitignore
index f08d97d448..25a7acad99 100644
--- a/.gitignore
+++ b/.gitignore
@@ -81,3 +81,4 @@ coverage.xml
 *.log
 *.pt2
 examples/torchtrt_aoti_example/torchtrt_aoti_example
+CLAUDE.md
diff --git a/docsrc/debugging/capture_and_replay.rst b/docsrc/debugging/capture_and_replay.rst
index 1df1f2ead3..c80a8c509e 100644
--- a/docsrc/debugging/capture_and_replay.rst
+++ b/docsrc/debugging/capture_and_replay.rst
@@ -50,8 +50,6 @@ Example ``test.py``:
         with torchtrt.dynamo.Debugger(
             "graphs",
             logging_dir="debuglogs",
-            capture_tensorrt_api_recording=True,
-            capture_tensorrt_api_recording_json_file="/tmp/capturelanlan.json",
         ):
             trt_mod = torchtrt.compile(model, **compile_spec)
 
@@ -64,7 +62,7 @@ Example ``test.py``:
 
     TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1 python test.py
 
-You should see ``shim.json`` and ``shim.bin`` generated in ``/tmp/torch_tensorrt_{current_user}/shim``.
+When ``TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1`` is set, capture and replay files are automatically saved under ``debuglogs/capture_replay/`` (i.e., the ``capture_replay`` subdirectory of ``logging_dir``). You should see ``capture.json`` and associated ``.bin`` files generated there.
 
 Replay: Build the engine from the capture
 -----------------------------------------
@@ -73,7 +71,7 @@ Use ``tensorrt_player`` to replay the captured build without the original framew
 
 .. code-block:: bash
 
-    tensorrt_player -j /absolute/path/to/shim.json -o /absolute/path/to/output_engine
+    tensorrt_player -j debuglogs/capture_replay/capture.json -o /absolute/path/to/output_engine
 
 This produces a serialized TensorRT engine at ``output_engine``.
 
diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py
index 4bbf1dff5b..7b645c04a4 100644
--- a/py/torch_tensorrt/dynamo/debug/_Debugger.py
+++ b/py/torch_tensorrt/dynamo/debug/_Debugger.py
@@ -34,8 +34,6 @@ def __init__(
         capture_fx_graph_before: Optional[List[str]] = None,
         capture_fx_graph_after: Optional[List[str]] = None,
         save_engine_profile: bool = False,
-        capture_tensorrt_api_recording: bool = False,
-        capture_tensorrt_api_recording_json_file: str = "",
         profile_format: str = "perfetto",
         engine_builder_monitor: bool = True,
         logging_dir: str = DEBUG_LOGGING_DIR,
@@ -53,11 +51,6 @@ def __init__(
                 after execution of a lowering pass. Defaults to None.
             save_engine_profile (bool): Whether to save TensorRT engine profiling information.
                 Defaults to False.
-            capture_tensorrt_api_recording (bool): Whether to enable the capture TensorRT API recording feature, when this is enabled, it will output the catputure TensorRT API recording in the /tmp/torch_tensorrt_{current_user}/shim directory.
-                It is part of the TensorRT capture and replay feature, the captured output will be able to replay for debug purpose.
-                Defaults to False.
-            capture_tensorrt_api_recording_json_file (str, optional): the JSON file to save the captured TensorRT API recording.
-                If not set, the captured TensorRT API recording will be saved in the current working directory.
             profile_format (str): Format for profiling data. Choose from 'perfetto', 'trex', 'cudagraph'.
                 If you need to generate engine graph using the profiling files, set it to 'trex' and use the C++ runtime.
                 If you need to generate cudagraph visualization, set it to 'cudagraph'.
@@ -71,11 +64,35 @@ def __init__(
         """
 
         os.makedirs(logging_dir, exist_ok=True)
+
+        # Auto-detect TensorRT API capture from environment variable
+        env_flag = os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE", None)
+        capture_tensorrt_api_recording = env_flag is not None and (
+            env_flag == "1" or env_flag.lower() == "true"
+        )
+
+        if capture_tensorrt_api_recording:
+            if not sys.platform.startswith("linux"):
+                _LOGGER.warning(
+                    f"Capturing TensorRT API calls is only supported on Linux, therefore ignoring TORCHTRT_ENABLE_TENSORRT_API_CAPTURE for {sys.platform}"
+                )
+                capture_tensorrt_api_recording = False
+            elif ENABLED_FEATURES.tensorrt_rtx:
+                _LOGGER.warning(
+                    "Capturing TensorRT API calls is not supported for TensorRT-RTX, therefore ignoring TORCHTRT_ENABLE_TENSORRT_API_CAPTURE"
+                )
+                capture_tensorrt_api_recording = False
+            else:
+                _LOGGER.info("Capturing TensorRT API calls feature is enabled")
+
+        if capture_tensorrt_api_recording:
+            capture_replay_dir = os.path.join(logging_dir, "capture_replay")
+            os.makedirs(capture_replay_dir, exist_ok=True)
+
         self.cfg = DebuggerConfig(
             log_level=log_level,
             save_engine_profile=save_engine_profile,
             capture_tensorrt_api_recording=capture_tensorrt_api_recording,
-            capture_tensorrt_api_recording_json_file=capture_tensorrt_api_recording_json_file,
             engine_builder_monitor=engine_builder_monitor,
             logging_dir=logging_dir,
             profile_format=profile_format,
@@ -103,27 +120,6 @@ def __init__(
         self.capture_fx_graph_before = capture_fx_graph_before
         self.capture_fx_graph_after = capture_fx_graph_after
 
-        if self.cfg.capture_tensorrt_api_recording:
-            if not sys.platform.startswith("linux"):
-                _LOGGER.warning(
-                    f"Capturing TensorRT API calls is only supported on Linux, therefore ignoring the capture_tensorrt_api_recording setting for {sys.platform}"
-                )
-                self.cfg.capture_tensorrt_api_recording = False
-            elif ENABLED_FEATURES.tensorrt_rtx:
-                _LOGGER.warning(
-                    "Capturing TensorRT API calls is not supported for TensorRT-RTX, therefore ignoring the capture_tensorrt_api_recording setting"
-                )
-                self.cfg.capture_tensorrt_api_recording = False
-            else:
-                env_flag = os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE", None)
-                if env_flag is None or (env_flag != "1" and env_flag.lower() != "true"):
-                    _LOGGER.warning(
-                        "In order to capture TensorRT API calls, please invoke the script with environment variable TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1"
-                    )
-                    self.cfg.capture_tensorrt_api_recording = False
-                else:
-                    _LOGGER.info("Capturing TensorRT API calls feature is enabled")
-
     def __enter__(self) -> None:
         self.original_lvl = _LOGGER.getEffectiveLevel()
         if ENABLED_FEATURES.torch_tensorrt_runtime:
@@ -239,16 +235,12 @@ def get_logging_config(self, log_level: Optional[int] = None) -> dict[str, Any]:
     def set_capture_tensorrt_api_recording_json_file(self) -> None:
         if self.cfg.capture_tensorrt_api_recording is False:
             return
-        if self.cfg.capture_tensorrt_api_recording_json_file == "":
-            return
 
-        if os.path.isdir(self.cfg.capture_tensorrt_api_recording_json_file):
-            self.cfg.capture_tensorrt_api_recording_json_file = os.path.join(
-                self.cfg.capture_tensorrt_api_recording_json_file, "capture.json"
-            )
+        capture_replay_dir = os.path.join(self.cfg.logging_dir, "capture_replay")
+        json_file = os.path.join(capture_replay_dir, "capture.json")
 
-        if os.path.isfile(self.cfg.capture_tensorrt_api_recording_json_file):
-            os.remove(self.cfg.capture_tensorrt_api_recording_json_file)
+        if os.path.isfile(json_file):
+            os.remove(json_file)
 
         nvinfer_lib = os.environ.get("TRT_SHIM_NVINFER_LIB_NAME", None)
         if nvinfer_lib is None:
@@ -265,9 +257,8 @@ def set_capture_tensorrt_api_recording_json_file(self) -> None:
             return
         try:
             shim_lib = ctypes.CDLL(shim_path, mode=ctypes.RTLD_GLOBAL)
-            shim_lib.trtShimSetOutputJsonFile(
-                self.cfg.capture_tensorrt_api_recording_json_file.encode("utf-8")
-            )
+            shim_lib.trtShimSetOutputJsonFile(json_file.encode("utf-8"))
+            _LOGGER.info(f"TensorRT API recording will be saved to {json_file}")
         except Exception as e:
             _LOGGER.warning(
                 f"Failed to set the output JSON file for TensorRT API recording: {e}"
diff --git a/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py b/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py
index 4216b09371..82cd3ba83a 100644
--- a/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py
+++ b/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py
@@ -8,7 +8,6 @@ class DebuggerConfig:
     log_level: str = "debug"
     save_engine_profile: bool = False
     capture_tensorrt_api_recording: bool = False
-    capture_tensorrt_api_recording_json_file: str = ""
     engine_builder_monitor: bool = True
     logging_dir: str = DEBUG_LOGGING_DIR
     profile_format: str = "perfetto"

From da634da0e40156a960ee32a8d3657423c102cf42 Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Fri, 3 Apr 2026 11:59:43 -0700
Subject: [PATCH 07/13] test

---
 third_party/libtorch/BUILD | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/third_party/libtorch/BUILD b/third_party/libtorch/BUILD
index 37309f7209..497b953178 100644
--- a/third_party/libtorch/BUILD
+++ b/third_party/libtorch/BUILD
@@ -39,9 +39,12 @@ cc_library(
         exclude = [
             "include/torch/csrc/api/include/**/*.h",
         ],
-    ) + glob([
-        "include/torch/csrc/api/include/**/*.h",
-    ]),
+    ) + glob(
+        [
+            "include/torch/csrc/api/include/**/*.h",
+        ],
+        allow_empty = True,
+    ),
     includes = [
         "include",
         "include/torch/csrc/api/include/",

From 92ac72f7be86ec5622994a1da8b38ea94fec0d40 Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Fri, 3 Apr 2026 12:13:45 -0700
Subject: [PATCH 08/13] test

---
 third_party/libtorch/BUILD | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/third_party/libtorch/BUILD b/third_party/libtorch/BUILD
index 497b953178..8a32f94ae4 100644
--- a/third_party/libtorch/BUILD
+++ b/third_party/libtorch/BUILD
@@ -61,9 +61,12 @@ cc_library(
         ":windows": ["lib/c10_cuda.lib"],
         "//conditions:default": ["lib/libc10_cuda.so"],
     }),
-    hdrs = glob([
-        "include/c10/**/*.h",
-    ]),
+    hdrs = glob(
+        [
+            "include/c10/**/*.h",
+        ],
+        allow_empty = True,
+    ),
     strip_include_prefix = "include",
     deps = [
         ":c10",

From 98f0d209f9786d2e07a7476065b9a31d27358900 Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Tue, 7 Apr 2026 14:46:09 -0700
Subject: [PATCH 09/13] test

---
 third_party/libtorch/BUILD | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/third_party/libtorch/BUILD b/third_party/libtorch/BUILD
index 8a32f94ae4..3297ba04dd 100644
--- a/third_party/libtorch/BUILD
+++ b/third_party/libtorch/BUILD
@@ -79,9 +79,12 @@ cc_library(
         ":windows": ["lib/c10.lib"],
         "//conditions:default": ["lib/libc10.so"],
     }),
-    hdrs = glob([
-        "include/c10/**/*.h",
-    ]),
+    hdrs = glob(
+        [
+            "include/c10/**/*.h",
+        ],
+        allow_empty = True,
+    ),
     strip_include_prefix = "include",
 )
 

From 9e6360f493a1be15279ceb65ae56dcb4f6f46d2f Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Tue, 7 Apr 2026 15:12:17 -0700
Subject: [PATCH 10/13] test

---
 third_party/libtorch/BUILD | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/third_party/libtorch/BUILD b/third_party/libtorch/BUILD
index 3297ba04dd..5f36debe1c 100644
--- a/third_party/libtorch/BUILD
+++ b/third_party/libtorch/BUILD
@@ -90,9 +90,12 @@ cc_library(
 
 cc_library(
     name = "ATen",
-    hdrs = glob([
-        "include/ATen/**/*.h",
-    ]),
+    hdrs = glob(
+        [
+            "include/ATen/**/*.h",
+        ],
+        allow_empty = True,
+    ),
     strip_include_prefix = "include",
 )
 
@@ -106,8 +109,11 @@ cc_library(
             "lib/libcaffe2_nvrtc.so",
         ],
     }),
-    hdrs = glob([
-        "include/caffe2/**/*.h",
-    ]),
+    hdrs = glob(
+        [
+            "include/caffe2/**/*.h",
+        ],
+        allow_empty = True,
+    ),
     strip_include_prefix = "include",
 )

From 948c6c1c8fc3c6546637a30bf0c1dd175bb1b1a2 Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Tue, 7 Apr 2026 15:26:51 -0700
Subject: [PATCH 11/13] test

---
 packaging/pre_build_script.sh | 17 +++++++++++++++++
 toolchains/local_torch.bzl    |  9 +++++++++
 2 files changed, 26 insertions(+)

diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh
index 38825a1f43..12757138c9 100755
--- a/packaging/pre_build_script.sh
+++ b/packaging/pre_build_script.sh
@@ -59,6 +59,18 @@ fi
 export TORCH_BUILD_NUMBER=$(python -c "import torch, urllib.parse as ul; print(ul.quote_plus(torch.__version__))")
 export TORCH_INSTALL_PATH=$(python -c "import torch, os; print(os.path.dirname(torch.__file__))")
 
+if [[ -z "${TORCH_INSTALL_PATH}" ]]; then
+    echo "ERROR: TORCH_INSTALL_PATH is empty — could not locate torch installation."
+    echo "Ensure the active Python environment has torch installed, or set TORCH_PATH explicitly."
+    exit 1
+fi
+
+if [[ ! -d "${TORCH_INSTALL_PATH}/include/c10" ]]; then
+    echo "ERROR: torch at '${TORCH_INSTALL_PATH}' is missing include/c10/ C++ headers."
+    echo "Install a full PyTorch wheel (pip install torch) that includes dev headers."
+    exit 1
+fi
+
 # CU_UPPERBOUND eg:13.2 or 12.9
 # tensorrt tar for linux and windows are different across cuda version
 # for sbsa it is the same tar across cuda version
@@ -70,6 +82,11 @@ fi
 
 cat toolchains/ci_workspaces/MODULE.bazel.tmpl | envsubst > MODULE.bazel
 
+# Clear any stale Bazel external-repo cache so the freshly-generated MODULE.bazel
+# is evaluated against the current third_party/libtorch/BUILD, not a cached copy
+# from a previous build or a pre-baked container image.
+bazel clean --expunge || true
+
 if [[ ${TENSORRT_VERSION} != "" ]]; then
     sed -i -e "s/strip_prefix = \"TensorRT-.*\"/strip_prefix = \"${TENSORRT_STRIP_PREFIX}\"/g" MODULE.bazel
     sed -i -e "s#\"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/.*\"#\"${TENSORRT_URLS}\"#g" MODULE.bazel
diff --git a/toolchains/local_torch.bzl b/toolchains/local_torch.bzl
index 52eb641c93..37b5291c31 100644
--- a/toolchains/local_torch.bzl
+++ b/toolchains/local_torch.bzl
@@ -76,6 +76,15 @@ def _local_torch_impl(ctx):
 
     torch_path = ctx.path(torch_dir)
 
+    # Validate that the installation has the expected C++ headers.
+    c10_include = torch_path.get_child("include").get_child("c10")
+    if not c10_include.exists:
+        fail(
+            "torch at '" + torch_dir + "' is missing include/c10/ C++ headers. " +
+            "Install a full PyTorch wheel (pip install torch) that includes dev headers, " +
+            "or set TORCH_PATH to the correct directory.",
+        )
+
     # Symlink the subdirectories the BUILD file references into the synthetic repo
     for sub in ["include", "lib", "share"]:
         child = torch_path.get_child(sub)

From 4a507bb6ba0f4e4fba7c488adfe955defb322a2c Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Tue, 7 Apr 2026 15:52:41 -0700
Subject: [PATCH 12/13] fix

---
 packaging/pre_build_script.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh
index 12757138c9..5acd980e17 100755
--- a/packaging/pre_build_script.sh
+++ b/packaging/pre_build_script.sh
@@ -3,7 +3,7 @@
 set -x
 
 # Install dependencies
-python3 -m pip install pyyaml
+python3 -m pip install pyyaml packaging
 
 if [[ $(uname -m) == "aarch64" ]]; then
   IS_AARCH64=true

From f9d4ee33687b80249c5b9c26410f69474b97cc54 Mon Sep 17 00:00:00 2001
From: Lan Luo <lanl@nvidia.com>
Date: Tue, 7 Apr 2026 16:07:08 -0700
Subject: [PATCH 13/13] test

---
 packaging/pre_build_script.sh | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh
index 5acd980e17..3bd1dbe6f1 100755
--- a/packaging/pre_build_script.sh
+++ b/packaging/pre_build_script.sh
@@ -82,11 +82,6 @@ fi
 
 cat toolchains/ci_workspaces/MODULE.bazel.tmpl | envsubst > MODULE.bazel
 
-# Clear any stale Bazel external-repo cache so the freshly-generated MODULE.bazel
-# is evaluated against the current third_party/libtorch/BUILD, not a cached copy
-# from a previous build or a pre-baked container image.
-bazel clean --expunge || true
-
 if [[ ${TENSORRT_VERSION} != "" ]]; then
     sed -i -e "s/strip_prefix = \"TensorRT-.*\"/strip_prefix = \"${TENSORRT_STRIP_PREFIX}\"/g" MODULE.bazel
     sed -i -e "s#\"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/.*\"#\"${TENSORRT_URLS}\"#g" MODULE.bazel