Skip to content

Commit 353a33a

Browse files
author
shoumikhin
committed
[executorch][nvidia][tensorrt][13/n] Add examples, C++ runner and CI workflow
Add complete C++ runner example for TensorRT-accelerated model inference. Also sets up GitHub Actions CI workflow for automated builds on NVIDIA GPUs. Differential Revision: [D93275050](https://our.internmc.facebook.com/intern/diff/D93275050/) [ghstack-poisoned]
1 parent 84b66a8 commit 353a33a

21 files changed

Lines changed: 2127 additions & 16 deletions

File tree

.github/workflows/tensorrt.yml

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
# Test ExecuTorch TensorRT Backend
2+
#
3+
# This workflow tests the TensorRT backend for NVIDIA GPU acceleration.
4+
# It exports models using the TensorRT partitioner and runs them using
5+
# both Python and C++ runners.
6+
#
7+
# Requirements:
8+
# - NVIDIA GPU with TensorRT support
9+
# - TensorRT SDK (pip install tensorrt>=10.3)
10+
# - CUDA toolkit
11+
12+
name: Test TensorRT Backend
13+
14+
on:
15+
pull_request:
16+
paths:
17+
- backends/nvidia/tensorrt/**
18+
- examples/nvidia/tensorrt/**
19+
- .github/workflows/tensorrt.yml
20+
push:
21+
branches:
22+
- main
23+
- release/*
24+
paths:
25+
- backends/nvidia/tensorrt/**
26+
- examples/nvidia/tensorrt/**
27+
workflow_dispatch:
28+
schedule:
29+
# Run daily at 3 AM UTC (after CUDA workflow at 2 AM)
30+
- cron: '0 3 * * *'
31+
32+
concurrency:
33+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
34+
cancel-in-progress: true
35+
36+
jobs:
37+
# Test that TensorRT backend builds correctly
38+
test-tensorrt-build:
39+
name: test-tensorrt-build
40+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
41+
permissions:
42+
id-token: write
43+
contents: read
44+
with:
45+
timeout: 90
46+
runner: linux.g5.4xlarge.nvidia.gpu
47+
gpu-arch-type: cuda
48+
gpu-arch-version: "12.6"
49+
use-custom-docker-registry: false
50+
submodules: recursive
51+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
52+
script: |
53+
set -eux
54+
55+
echo "::group::Install TensorRT"
56+
pip install tensorrt onnx
57+
echo "::endgroup::"
58+
59+
echo "::group::Install ExecuTorch"
60+
PYTHON_EXECUTABLE=python ./install_executorch.sh
61+
echo "::endgroup::"
62+
63+
echo "::group::Build TensorRT Backend"
64+
# Build with TensorRT support
65+
cmake -S . -B cmake-out \
66+
-DCMAKE_BUILD_TYPE=Release \
67+
-DEXECUTORCH_BUILD_TENSORRT=ON \
68+
-DPYTHON_EXECUTABLE=python
69+
70+
cmake --build cmake-out --target tensorrt_backend tensorrt_executor_runner -j$(nproc)
71+
echo "::endgroup::"
72+
73+
echo "::group::Verify Build Artifacts"
74+
ls -la cmake-out/backends/nvidia/tensorrt/
75+
test -f cmake-out/backends/nvidia/tensorrt/libtensorrt_backend.a
76+
test -f cmake-out/backends/nvidia/tensorrt/tensorrt_executor_runner
77+
echo "Build verification passed!"
78+
echo "::endgroup::"
79+
80+
# Test model export and Python execution
81+
test-models-tensorrt-python:
82+
name: test-models-tensorrt-python
83+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
84+
permissions:
85+
id-token: write
86+
contents: read
87+
strategy:
88+
fail-fast: false
89+
matrix:
90+
model: [add, mul, linear]
91+
with:
92+
timeout: 60
93+
runner: linux.g5.4xlarge.nvidia.gpu
94+
gpu-arch-type: cuda
95+
gpu-arch-version: "12.6"
96+
use-custom-docker-registry: false
97+
submodules: recursive
98+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
99+
script: |
100+
set -eux
101+
102+
echo "::group::Install TensorRT"
103+
pip install tensorrt onnx
104+
echo "::endgroup::"
105+
106+
echo "::group::Install ExecuTorch"
107+
PYTHON_EXECUTABLE=python ./install_executorch.sh
108+
echo "::endgroup::"
109+
110+
echo "::group::Export ${{ matrix.model }} model with TensorRT"
111+
python -m executorch.examples.nvidia.tensorrt.export -m ${{ matrix.model }}
112+
test -f ${{ matrix.model }}_tensorrt.pte
113+
echo "Model exported successfully!"
114+
echo "::endgroup::"
115+
116+
echo "::group::Run ${{ matrix.model }} model with Python runner"
117+
python -m executorch.examples.nvidia.tensorrt.runner \
118+
--model_path=${{ matrix.model }}_tensorrt.pte
119+
echo "Python execution completed!"
120+
echo "::endgroup::"
121+
122+
# Test model export and C++ execution
123+
test-models-tensorrt-cpp:
124+
name: test-models-tensorrt-cpp
125+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
126+
permissions:
127+
id-token: write
128+
contents: read
129+
strategy:
130+
fail-fast: false
131+
matrix:
132+
model: [add, mul, linear]
133+
with:
134+
timeout: 60
135+
runner: linux.g5.4xlarge.nvidia.gpu
136+
gpu-arch-type: cuda
137+
gpu-arch-version: "12.6"
138+
use-custom-docker-registry: false
139+
submodules: recursive
140+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
141+
script: |
142+
set -eux
143+
144+
echo "::group::Install TensorRT"
145+
pip install tensorrt onnx
146+
echo "::endgroup::"
147+
148+
echo "::group::Install ExecuTorch"
149+
PYTHON_EXECUTABLE=python ./install_executorch.sh
150+
echo "::endgroup::"
151+
152+
echo "::group::Build TensorRT Backend and Runner"
153+
cmake -S . -B cmake-out \
154+
-DCMAKE_BUILD_TYPE=Release \
155+
-DEXECUTORCH_BUILD_TENSORRT=ON \
156+
-DPYTHON_EXECUTABLE=python
157+
158+
cmake --build cmake-out --target tensorrt_executor_runner -j$(nproc)
159+
echo "::endgroup::"
160+
161+
echo "::group::Export ${{ matrix.model }} model"
162+
python -m executorch.examples.nvidia.tensorrt.export -m ${{ matrix.model }}
163+
test -f ${{ matrix.model }}_tensorrt.pte
164+
echo "::endgroup::"
165+
166+
echo "::group::Run ${{ matrix.model }} model with C++ runner"
167+
RUNNER_PATH="./cmake-out/backends/nvidia/tensorrt/tensorrt_executor_runner"
168+
if [ ! -f "$RUNNER_PATH" ]; then
169+
# Fallback: search for the runner binary
170+
RUNNER_PATH=$(find ./cmake-out -name tensorrt_executor_runner -type f | head -1)
171+
fi
172+
$RUNNER_PATH \
173+
--model_path=${{ matrix.model }}_tensorrt.pte \
174+
--verbose
175+
echo "C++ execution completed!"
176+
echo "::endgroup::"
177+
178+
# Run TensorRT backend unit tests
179+
unittest-tensorrt:
180+
name: unittest-tensorrt
181+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
182+
permissions:
183+
id-token: write
184+
contents: read
185+
with:
186+
timeout: 60
187+
runner: linux.g5.4xlarge.nvidia.gpu
188+
gpu-arch-type: cuda
189+
gpu-arch-version: "12.6"
190+
use-custom-docker-registry: false
191+
submodules: recursive
192+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
193+
script: |
194+
set -eux
195+
196+
echo "::group::Install TensorRT"
197+
pip install tensorrt onnx
198+
echo "::endgroup::"
199+
200+
echo "::group::Install ExecuTorch"
201+
PYTHON_EXECUTABLE=python ./install_executorch.sh
202+
echo "::endgroup::"
203+
204+
echo "::group::Run TensorRT Backend Unit Tests"
205+
# Run all test_*.py files in the backend test directory.
206+
# The -o "addopts=" override prevents pytest.ini from injecting
207+
# flags that would run unrelated test suites.
208+
python -m pytest backends/nvidia/tensorrt/test/ -v -o "addopts="
209+
echo "::endgroup::"
210+
211+
# Summary job to check all tests passed
212+
check-all-tensorrt-tests:
213+
needs: [test-tensorrt-build, test-models-tensorrt-python, test-models-tensorrt-cpp, unittest-tensorrt]
214+
# All four jobs must succeed for the overall check to pass.
215+
runs-on: ubuntu-latest
216+
if: always()
217+
steps:
218+
- name: Check if all TensorRT tests succeeded
219+
run: |
220+
if [[ "${{ needs.test-tensorrt-build.result }}" != "success" ]]; then
221+
echo "ERROR: TensorRT build test failed!"
222+
exit 1
223+
fi
224+
if [[ "${{ needs.test-models-tensorrt-python.result }}" != "success" ]]; then
225+
echo "ERROR: TensorRT Python model tests failed!"
226+
exit 1
227+
fi
228+
if [[ "${{ needs.test-models-tensorrt-cpp.result }}" != "success" ]]; then
229+
echo "ERROR: TensorRT C++ model tests failed!"
230+
exit 1
231+
fi
232+
if [[ "${{ needs.unittest-tensorrt.result }}" != "success" ]]; then
233+
echo "ERROR: TensorRT unit tests failed!"
234+
exit 1
235+
fi
236+
echo "SUCCESS: All TensorRT backend tests passed!"

backends/nvidia/tensorrt/CMakeLists.txt

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,4 +150,69 @@ if(EXECUTORCH_BUILD_TENSORRT)
150150
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
151151
)
152152

153+
# TensorRT executor runner binary
154+
# This binary links the TensorRT backend with the ExecuTorch runtime
155+
# to run exported .pte files on NVIDIA GPUs.
156+
#
157+
# Build:
158+
# cmake -DEXECUTORCH_BUILD_TENSORRT=ON ...
159+
# cmake --build . --target tensorrt_executor_runner
160+
#
161+
# Usage:
162+
# ./tensorrt_executor_runner --model_path=model_tensorrt.pte
163+
add_executable(
164+
tensorrt_executor_runner
165+
${CMAKE_CURRENT_SOURCE_DIR}/../../../examples/nvidia/tensorrt/tensorrt_executor_runner.cpp
166+
)
167+
168+
target_include_directories(
169+
tensorrt_executor_runner
170+
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}>
171+
$<BUILD_INTERFACE:${TENSORRT_COMMON_INCLUDE_DIRS}>
172+
)
173+
174+
# Note: We need WHOLE_ARCHIVE for tensorrt_backend to ensure static
175+
# registration of the backend runs. This is necessary because the backend
176+
# registers itself via a global constructor.
177+
#
178+
# We use target_link_options to ensure the whole-archive flags are placed
179+
# correctly around the library in the link command. CMake's target_link_libraries
180+
# with string flags may not preserve the correct order.
181+
target_link_libraries(
182+
tensorrt_executor_runner
183+
PRIVATE executorch extension_data_loader extension_runner_util portable_kernels
184+
)
185+
186+
# Apply whole-archive linking for tensorrt_backend via link options
187+
# This ensures the static backend registration is included
188+
if(APPLE)
189+
target_link_options(
190+
tensorrt_executor_runner
191+
PRIVATE "SHELL:LINKER:-force_load,$<TARGET_FILE:tensorrt_backend>"
192+
)
193+
else()
194+
target_link_options(
195+
tensorrt_executor_runner
196+
PRIVATE
197+
"SHELL:LINKER:--whole-archive $<TARGET_FILE:tensorrt_backend> LINKER:--no-whole-archive"
198+
)
199+
endif()
200+
201+
# Add tensorrt_backend's transitive dependencies (CUDA, TensorRT)
202+
# We can't use tensorrt_backend directly in target_link_libraries because
203+
# it would be linked twice (once via whole-archive, once normally)
204+
target_link_libraries(tensorrt_executor_runner PRIVATE CUDA::cudart)
205+
if(TENSORRT_LIBRARY)
206+
target_link_libraries(tensorrt_executor_runner PRIVATE ${TENSORRT_LIBRARY})
207+
elseif(TensorRT_FOUND)
208+
target_link_libraries(tensorrt_executor_runner PRIVATE TensorRT::nvinfer)
209+
endif()
210+
211+
# Ensure tensorrt_backend is built before the runner
212+
add_dependencies(tensorrt_executor_runner tensorrt_backend)
213+
214+
target_compile_options(tensorrt_executor_runner PRIVATE -frtti -fexceptions)
215+
216+
install(TARGETS tensorrt_executor_runner DESTINATION ${CMAKE_INSTALL_BINDIR})
217+
153218
endif()

0 commit comments

Comments
 (0)