Skip to content

Commit 84b66a8

Browse files
author
shoumikhin
committed
[executorch][nvidia][tensorrt][12/n] Add C++ runtime infrastructure
Add complete C++ runtime infrastructure for TensorRT backend. Differential Revision: [D93275039](https://our.internmc.facebook.com/intern/diff/D93275039/) [ghstack-poisoned]
1 parent 4035db7 commit 84b66a8

12 files changed

Lines changed: 1275 additions & 1 deletion

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,11 @@ if(EXECUTORCH_BUILD_METAL)
716716
list(APPEND _executorch_backends metal_backend)
717717
endif()
718718

719+
if(EXECUTORCH_BUILD_TENSORRT)
720+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/nvidia/tensorrt)
721+
list(APPEND _executorch_backends tensorrt_backend)
722+
endif()
723+
719724
if(EXECUTORCH_BUILD_EXTENSION_APPLE)
720725
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple)
721726
endif()
@@ -983,6 +988,10 @@ if(EXECUTORCH_BUILD_PYBIND)
983988
list(APPEND _dep_libs vulkan_backend)
984989
endif()
985990

991+
if(EXECUTORCH_BUILD_TENSORRT)
992+
list(APPEND _dep_libs tensorrt_backend)
993+
endif()
994+
986995
# compile options for pybind
987996
set(_pybind_compile_options
988997
$<$<CXX_COMPILER_ID:MSVC>:/EHsc

backends/nvidia/tensorrt/CMakeLists.txt

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,127 @@ if(NOT EXECUTORCH_ROOT)
2727
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
2828
endif()
2929

30-
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
30+
# When built as part of the main ExecuTorch CMake, Utils.cmake is already included.
31+
# Only include it if executorch_target_link_options_shared_lib is not defined.
32+
if(NOT COMMAND executorch_target_link_options_shared_lib)
33+
if(EXISTS ${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
34+
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
35+
else()
36+
# Define a no-op fallback for standalone builds
37+
function(executorch_target_link_options_shared_lib target)
38+
# No-op: whole-archive linking is handled separately for the runner
39+
endfunction()
40+
endif()
41+
endif()
42+
43+
if(EXECUTORCH_BUILD_TENSORRT)
44+
# Find TensorRT package
45+
find_package(TensorRT QUIET)
46+
47+
if(NOT TensorRT_FOUND)
48+
# Try to find TensorRT manually via CMake variable or environment variable
49+
if(DEFINED TENSORRT_HOME)
50+
set(TENSORRT_ROOT ${TENSORRT_HOME})
51+
elseif(DEFINED ENV{TENSORRT_HOME})
52+
set(TENSORRT_ROOT $ENV{TENSORRT_HOME})
53+
elseif(DEFINED ENV{TENSORRT_DIR})
54+
set(TENSORRT_ROOT $ENV{TENSORRT_DIR})
55+
else()
56+
# Default to /usr for system installations (e.g., JetPack on Jetson)
57+
set(TENSORRT_ROOT "/usr")
58+
endif()
59+
60+
message(STATUS "Looking for TensorRT in: ${TENSORRT_ROOT}")
61+
62+
# Find TensorRT headers (supports both x86_64 and aarch64/Jetson)
63+
find_path(
64+
TENSORRT_INCLUDE_DIR NvInfer.h
65+
PATHS
66+
${TENSORRT_ROOT}/include
67+
${TENSORRT_ROOT}/include/aarch64-linux-gnu
68+
${TENSORRT_ROOT}/include/x86_64-linux-gnu
69+
PATH_SUFFIXES tensorrt
70+
)
71+
72+
# Find TensorRT library (supports both x86_64 and aarch64/Jetson)
73+
find_library(
74+
TENSORRT_LIBRARY nvinfer
75+
PATHS
76+
${TENSORRT_ROOT}/lib
77+
${TENSORRT_ROOT}/lib/aarch64-linux-gnu
78+
${TENSORRT_ROOT}/lib/x86_64-linux-gnu
79+
${TENSORRT_ROOT}/lib64
80+
)
81+
82+
if(TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY)
83+
message(STATUS "Found TensorRT include: ${TENSORRT_INCLUDE_DIR}")
84+
message(STATUS "Found TensorRT library: ${TENSORRT_LIBRARY}")
85+
86+
# Get the directory containing TensorRT library and add to link directories
87+
get_filename_component(TENSORRT_LIB_DIR ${TENSORRT_LIBRARY} DIRECTORY)
88+
message(STATUS "TensorRT library directory: ${TENSORRT_LIB_DIR}")
89+
link_directories(${TENSORRT_LIB_DIR})
90+
endif()
91+
endif()
92+
93+
# Verify TensorRT was found
94+
if(NOT TensorRT_FOUND AND NOT TENSORRT_LIBRARY)
95+
message(FATAL_ERROR
96+
"TensorRT not found. Set TENSORRT_HOME or TENSORRT_DIR environment variable, "
97+
"or pass -DTENSORRT_HOME=/path/to/tensorrt to CMake.")
98+
endif()
99+
100+
# Find CUDA
101+
find_package(CUDAToolkit REQUIRED)
102+
103+
# Define common include directories (used by backend and runner/test binaries)
104+
set(TENSORRT_COMMON_INCLUDE_DIRS ${EXECUTORCH_ROOT}/..)
105+
106+
# TensorRT backend static library
107+
add_library(tensorrt_backend STATIC)
108+
109+
# Enable exceptions and RTTI for TensorRT backend
110+
target_compile_options(tensorrt_backend PRIVATE -frtti -fexceptions)
111+
112+
target_include_directories(
113+
tensorrt_backend
114+
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}>
115+
$<BUILD_INTERFACE:${TENSORRT_COMMON_INCLUDE_DIRS}>
116+
$<INSTALL_INTERFACE:include>
117+
)
118+
119+
if(TENSORRT_INCLUDE_DIR)
120+
target_include_directories(
121+
tensorrt_backend PUBLIC $<BUILD_INTERFACE:${TENSORRT_INCLUDE_DIR}>
122+
)
123+
endif()
124+
125+
# Add source files
126+
target_sources(
127+
tensorrt_backend
128+
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/runtime/TensorRTBackend.cpp
129+
${CMAKE_CURRENT_LIST_DIR}/runtime/TensorRTExecutor.cpp
130+
)
131+
132+
# Link dependencies
133+
target_link_libraries(tensorrt_backend PUBLIC executorch_core CUDA::cudart)
134+
135+
if(TENSORRT_LIBRARY)
136+
target_link_libraries(tensorrt_backend PUBLIC ${TENSORRT_LIBRARY})
137+
elseif(TensorRT_FOUND)
138+
target_link_libraries(tensorrt_backend PUBLIC TensorRT::nvinfer)
139+
endif()
140+
141+
# Force link the whole library to ensure static registration works
142+
executorch_target_link_options_shared_lib(tensorrt_backend)
143+
144+
# Install TensorRT backend library
145+
install(
146+
TARGETS tensorrt_backend
147+
EXPORT ExecuTorchTargets
148+
DESTINATION ${CMAKE_INSTALL_LIBDIR}
149+
INCLUDES
150+
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
151+
)
152+
153+
endif()

backends/nvidia/tensorrt/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,14 @@ The TensorRT delegate uses a custom binary blob format:
106106
- CUDA Toolkit 11.x or 12.x
107107
- cuDNN 8.x
108108
- PyTorch 2.x with CUDA support (for export)
109+
110+
## Build Instructions
111+
112+
```bash
113+
cd executorch
114+
mkdir -p cmake-out && cd cmake-out
115+
116+
cmake .. -DEXECUTORCH_BUILD_TENSORRT=ON
117+
118+
cmake --build . --target tensorrt_backend tensorrt_executor_runner
119+
```
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
load(":targets.bzl", "define_common_targets")
2+
3+
oncall("executorch")
4+
5+
define_common_targets()
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/nvidia/tensorrt/runtime/TensorRTBackend.h>
10+
#include <executorch/backends/nvidia/tensorrt/runtime/TensorRTBlobHeader.h>
11+
#include <executorch/backends/nvidia/tensorrt/runtime/TensorRTExecutor.h>
12+
#include <executorch/runtime/backend/interface.h>
13+
#include <executorch/runtime/platform/log.h>
14+
15+
namespace executorch {
16+
namespace backends {
17+
namespace tensorrt {
18+
19+
using executorch::runtime::ArrayRef;
20+
using executorch::runtime::Backend;
21+
using executorch::runtime::BackendExecutionContext;
22+
using executorch::runtime::BackendInitContext;
23+
using executorch::runtime::CompileSpec;
24+
using executorch::runtime::DelegateHandle;
25+
using executorch::runtime::Error;
26+
using executorch::runtime::EValue;
27+
using executorch::runtime::FreeableBuffer;
28+
using executorch::runtime::MemoryAllocator;
29+
using executorch::runtime::register_backend;
30+
using executorch::runtime::Result;
31+
using executorch::runtime::Span;
32+
33+
namespace {
34+
35+
bool is_tensorrt_available() {
36+
return true;
37+
}
38+
39+
} // namespace
40+
41+
bool TensorRTBackend::is_available() const {
42+
return is_tensorrt_available();
43+
}
44+
45+
Result<DelegateHandle*> TensorRTBackend::init(
46+
BackendInitContext& context,
47+
FreeableBuffer* processed,
48+
ArrayRef<CompileSpec> compile_specs) const {
49+
(void)compile_specs;
50+
51+
if (!is_available()) {
52+
ET_LOG(Error, "TensorRT backend is not available");
53+
return Error::NotSupported;
54+
}
55+
56+
if (processed == nullptr || processed->data() == nullptr) {
57+
ET_LOG(Error, "Invalid processed buffer");
58+
return Error::InvalidArgument;
59+
}
60+
61+
const void* blob_data = processed->data();
62+
const size_t blob_size = processed->size();
63+
64+
TensorRTBlobHeader header{};
65+
if (!parse_blob_header(blob_data, blob_size, header)) {
66+
ET_LOG(Error, "Failed to parse TensorRT blob header");
67+
return Error::InvalidArgument;
68+
}
69+
70+
MemoryAllocator* allocator =
71+
context.get_runtime_allocator();
72+
if (allocator == nullptr) {
73+
ET_LOG(Error, "Failed to get runtime allocator");
74+
return Error::InvalidState;
75+
}
76+
77+
TensorRTExecutor* executor =
78+
allocator->allocateInstance<TensorRTExecutor>();
79+
if (executor == nullptr) {
80+
ET_LOG(Error, "Failed to allocate TensorRT executor");
81+
return Error::MemoryAllocationFailed;
82+
}
83+
84+
new (executor) TensorRTExecutor();
85+
86+
Error err = executor->initialize(blob_data, blob_size);
87+
if (err != Error::Ok) {
88+
ET_LOG(Error, "Failed to initialize TensorRT executor");
89+
executor->~TensorRTExecutor();
90+
return err;
91+
}
92+
93+
processed->Free();
94+
95+
return static_cast<DelegateHandle*>(executor);
96+
}
97+
98+
Error TensorRTBackend::execute(
99+
BackendExecutionContext& context,
100+
DelegateHandle* handle,
101+
Span<EValue*> args) const {
102+
(void)context;
103+
104+
if (handle == nullptr) {
105+
ET_LOG(Error, "Invalid delegate handle");
106+
return Error::InvalidArgument;
107+
}
108+
109+
auto* executor = static_cast<TensorRTExecutor*>(handle);
110+
111+
if (!executor->is_initialized()) {
112+
ET_LOG(Error, "Executor not initialized");
113+
return Error::InvalidState;
114+
}
115+
116+
size_t num_inputs = executor->get_num_inputs();
117+
size_t num_outputs = executor->get_num_outputs();
118+
119+
if (num_inputs + num_outputs == 0) {
120+
ET_LOG(Error, "No inputs or outputs found");
121+
return Error::InvalidState;
122+
}
123+
124+
std::vector<void*> input_buffers;
125+
std::vector<void*> output_buffers;
126+
input_buffers.reserve(num_inputs);
127+
output_buffers.reserve(num_outputs);
128+
129+
size_t tensor_idx = 0;
130+
for (size_t i = 0; i < args.size(); ++i) {
131+
EValue* arg = args[i];
132+
if (arg == nullptr || !arg->isTensor()) {
133+
continue;
134+
}
135+
136+
::executorch::aten::Tensor tensor = arg->toTensor();
137+
void* data_ptr = tensor.mutable_data_ptr();
138+
139+
if (tensor_idx < num_inputs) {
140+
input_buffers.push_back(data_ptr);
141+
} else {
142+
output_buffers.push_back(data_ptr);
143+
}
144+
++tensor_idx;
145+
}
146+
147+
if (input_buffers.size() != num_inputs) {
148+
ET_LOG(
149+
Error,
150+
"Input buffer count mismatch: expected %zu, got %zu",
151+
num_inputs,
152+
input_buffers.size());
153+
return Error::InvalidArgument;
154+
}
155+
156+
if (output_buffers.size() != num_outputs) {
157+
ET_LOG(
158+
Error,
159+
"Output buffer count mismatch: expected %zu, got %zu",
160+
num_outputs,
161+
output_buffers.size());
162+
return Error::InvalidArgument;
163+
}
164+
165+
return executor->execute(
166+
input_buffers.data(),
167+
input_buffers.size(),
168+
output_buffers.data(),
169+
output_buffers.size());
170+
}
171+
172+
void TensorRTBackend::destroy(DelegateHandle* handle) const {
173+
if (handle != nullptr) {
174+
auto* executor = static_cast<TensorRTExecutor*>(handle);
175+
executor->~TensorRTExecutor();
176+
}
177+
}
178+
179+
} // namespace tensorrt
180+
} // namespace backends
181+
} // namespace executorch
182+
183+
namespace {
184+
executorch::backends::tensorrt::TensorRTBackend& get_backend() {
185+
static executorch::backends::tensorrt::TensorRTBackend backend;
186+
return backend;
187+
}
188+
const executorch::runtime::Backend backend_id{"TensorRTBackend", &get_backend()};
189+
const auto registered = executorch::runtime::register_backend(backend_id);
190+
} // namespace

0 commit comments

Comments
 (0)