Skip to content

Commit 8d59e04

Browse files
committed
Update on "[ET Device Support] Schema changes: device info on Tensor and buffer-level device array"
This diff adds device placement information to the ExecuTorch schema to support representing tensor-level device type information, which will be the basic requirement for the following tensor_parser updates. This is part of the Phase 1 implementation to make ET device type work E2E without user-specified device placement. Design doc: https://docs.google.com/document/d/1lwd9BlohmwkN5EEvRulO_b-XnZBwv1nMb5l2K3jfuwA/edit?tab=t.0#heading=h.o6anuvkix4bu Differential Revision: [D93635657](https://our.internmc.facebook.com/intern/diff/D93635657/) [ghstack-poisoned]
2 parents 98edf6a + c1c701b commit 8d59e04

25 files changed

Lines changed: 1087 additions & 176 deletions

.ci/scripts/export_model_artifact.sh

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Arguments:
2424
- google/gemma-3-4b-it
2525
- nvidia/diar_streaming_sortformer_4spk-v2
2626
- nvidia/parakeet-tdt
27+
- facebook/dinov2-small-imagenet1k-1-layer
2728
2829
quant_name Quantization type (optional, default: non-quantized)
2930
Options:
@@ -167,6 +168,14 @@ case "$HF_MODEL" in
167168
PREPROCESSOR_FEATURE_SIZE=""
168169
PREPROCESSOR_OUTPUT=""
169170
;;
171+
facebook/dinov2-small-imagenet1k-1-layer)
172+
MODEL_NAME="dinov2"
173+
TASK=""
174+
MAX_SEQ_LEN=""
175+
EXTRA_PIP=""
176+
PREPROCESSOR_FEATURE_SIZE=""
177+
PREPROCESSOR_OUTPUT=""
178+
;;
170179
mistralai/Voxtral-Mini-4B-Realtime-2602)
171180
MODEL_NAME="voxtral_realtime"
172181
TASK=""
@@ -177,7 +186,7 @@ case "$HF_MODEL" in
177186
;;
178187
*)
179188
echo "Error: Unsupported model '$HF_MODEL'"
180-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
189+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
181190
exit 1
182191
;;
183192
esac
@@ -293,6 +302,23 @@ if [ "$MODEL_NAME" = "sortformer" ]; then
293302
exit 0
294303
fi
295304

305+
# DINOv2 uses a custom export script
306+
if [ "$MODEL_NAME" = "dinov2" ]; then
307+
pip install -r examples/models/dinov2/install_requirements.txt
308+
309+
python -m executorch.examples.models.dinov2.export_dinov2 \
310+
--backend "$DEVICE" \
311+
--output-dir "${OUTPUT_DIR}"
312+
313+
test -f "${OUTPUT_DIR}/model.pte"
314+
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
315+
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
316+
fi
317+
ls -al "${OUTPUT_DIR}"
318+
echo "::endgroup::"
319+
exit 0
320+
fi
321+
296322
# Voxtral Realtime uses a custom export script
297323
if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
298324
pip install safetensors huggingface_hub

.ci/scripts/test_model_e2e.sh

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Arguments:
2424
- google/gemma-3-4b-it
2525
- Qwen/Qwen3-0.6B
2626
- nvidia/parakeet-tdt
27+
- facebook/dinov2-small-imagenet1k-1-layer
2728
- mistralai/Voxtral-Mini-4B-Realtime-2602
2829
2930
quant_name Quantization type (required)
@@ -190,6 +191,19 @@ case "$HF_MODEL" in
190191
AUDIO_FILE="poem.wav"
191192
IMAGE_PATH=""
192193
;;
194+
facebook/dinov2-small-imagenet1k-1-layer)
195+
MODEL_NAME="dinov2"
196+
RUNNER_TARGET="dinov2_runner"
197+
RUNNER_PATH="dinov2"
198+
EXPECTED_OUTPUT="Samoyed"
199+
PREPROCESSOR=""
200+
TOKENIZER_URL=""
201+
TOKENIZER_FILE=""
202+
AUDIO_URL=""
203+
AUDIO_FILE=""
204+
IMAGE_URL="https://github.com/pytorch/hub/raw/master/images/dog.jpg"
205+
IMAGE_PATH=""
206+
;;
193207
mistralai/Voxtral-Mini-4B-Realtime-2602)
194208
MODEL_NAME="voxtral_realtime"
195209
RUNNER_TARGET="voxtral_realtime_runner"
@@ -204,7 +218,7 @@ case "$HF_MODEL" in
204218
;;
205219
*)
206220
echo "Error: Unsupported model '$HF_MODEL'"
207-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt"
221+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
208222
exit 1
209223
;;
210224
esac
@@ -218,7 +232,7 @@ echo "::group::Prepare $MODEL_NAME Artifacts"
218232

219233

220234
# Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
221-
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ]; then
235+
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
222236
if [ "$TOKENIZER_FILE" != "" ]; then
223237
curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
224238
else
@@ -238,6 +252,11 @@ elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ];
238252
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
239253
fi
240254

255+
# Download test image for vision models
256+
if [ -n "${IMAGE_URL:-}" ]; then
257+
curl -L "$IMAGE_URL" -o "${MODEL_DIR}/test_image.jpg"
258+
fi
259+
241260
ls -al
242261
echo "::endgroup::"
243262

@@ -316,6 +335,12 @@ EOF
316335
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
317336
fi
318337
;;
338+
dinov2)
339+
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --image_path ${MODEL_DIR}/test_image.jpg"
340+
if [ "$DEVICE" = "cuda" ]; then
341+
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
342+
fi
343+
;;
319344
voxtral_realtime)
320345
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
321346
# Add CUDA data path if present

.ci/scripts/test_model_e2e_windows.ps1

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ param(
1313
[Parameter(Mandatory = $true)]
1414
[string]$QuantName,
1515
[string]$ModelDir = ".",
16-
[string]$ExpectedCudaVersion = ""
16+
[string]$ExpectedCudaVersion = "",
17+
[string]$Mode = ""
1718
)
1819

1920
Set-StrictMode -Version Latest
@@ -25,6 +26,15 @@ if ($Device -ne "cuda-windows") {
2526
throw "Unsupported device '$Device'. Expected 'cuda-windows'."
2627
}
2728

29+
if ($Mode -ne "") {
30+
if ($Mode -notin @("vr-streaming", "vr-offline")) {
31+
throw "Unsupported mode '$Mode'. Supported modes: vr-streaming, vr-offline"
32+
}
33+
if ($HfModel -ne "mistralai/Voxtral-Mini-4B-Realtime-2602") {
34+
throw "Mode '$Mode' can only be used with Voxtral Realtime model"
35+
}
36+
}
37+
2838
Write-Host "Testing model: $HfModel (quantization: $QuantName)"
2939

3040
$resolvedModelDir = (Resolve-Path -Path $ModelDir).Path
@@ -79,15 +89,28 @@ switch ($HfModel) {
7989
$runnerTarget = "voxtral_realtime_runner"
8090
$runnerPath = "voxtral_realtime"
8191
$runnerPreset = "voxtral-realtime-cuda"
82-
$expectedOutput = "Loading audio from"
92+
$expectedOutput = "Quilter"
8393
$preprocessor = "preprocessor.pte"
8494
$tokenizerUrl = ""
8595
$tokenizerFile = "tekken.json"
8696
$audioUrl = "https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
8797
$audioFile = "poem.wav"
8898
}
99+
"facebook/dinov2-small-imagenet1k-1-layer" {
100+
$runnerTarget = "dinov2_runner"
101+
$runnerPath = "dinov2"
102+
$runnerPreset = "dinov2-cuda"
103+
$expectedOutput = "Samoyed"
104+
$preprocessor = ""
105+
$tokenizerUrl = ""
106+
$tokenizerFile = ""
107+
$audioUrl = ""
108+
$audioFile = ""
109+
$imageUrl = "https://github.com/pytorch/hub/raw/master/images/dog.jpg"
110+
$imageFile = "test_image.jpg"
111+
}
89112
default {
90-
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
113+
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
91114
}
92115
}
93116

@@ -162,6 +185,9 @@ try {
162185
if ($audioUrl -ne "") {
163186
Download-IfNeeded -Url $audioUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $audioFile)
164187
}
188+
if ((Get-Variable -Name imageUrl -ErrorAction SilentlyContinue) -and $imageUrl -ne "") {
189+
Download-IfNeeded -Url $imageUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
190+
}
165191
Get-ChildItem -Path $resolvedModelDir
166192
Write-Host "::endgroup::"
167193

@@ -207,6 +233,16 @@ try {
207233
"--audio_path", (Join-Path -Path $resolvedModelDir -ChildPath $audioFile),
208234
"--preprocessor_path", (Join-Path -Path $resolvedModelDir -ChildPath $preprocessor)
209235
)
236+
if ($Mode -ne "vr-offline") {
237+
$runnerArgs += "--streaming"
238+
}
239+
}
240+
"facebook/dinov2-small-imagenet1k-1-layer" {
241+
$runnerArgs = @(
242+
"--model_path", $modelPte,
243+
"--data_path", $cudaBlob,
244+
"--image_path", (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
245+
)
210246
}
211247
}
212248

.github/workflows/cuda-windows.yml

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ jobs:
4747
- model_repo: "mistralai"
4848
model_name: "Voxtral-Mini-4B-Realtime-2602"
4949
quant: "quantized-int4-tile-packed"
50+
- model_repo: "facebook"
51+
model_name: "dinov2-small-imagenet1k-1-layer"
52+
quant: "non-quantized"
5053
with:
5154
timeout: 90
5255
secrets-env: EXECUTORCH_HF_TOKEN
@@ -83,12 +86,15 @@ jobs:
8386
PYTHON_EXECUTABLE=python ./install_executorch.sh
8487
echo "::endgroup::"
8588
86-
echo "::group::Setup Huggingface"
87-
pip install -U "huggingface_hub[cli]<1.0" accelerate
88-
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
89-
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
90-
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
91-
echo "::endgroup::"
89+
# Setup Huggingface only for models that need it (not dinov2)
90+
if [ "${{ matrix.model_name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
91+
echo "::group::Setup Huggingface"
92+
pip install -U "huggingface_hub[cli]<1.0" accelerate
93+
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
94+
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
95+
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
96+
echo "::endgroup::"
97+
fi
9298
9399
VR_MODE=""
94100
if [ "${{ matrix.model_name }}" = "Voxtral-Mini-4B-Realtime-2602" ]; then
@@ -122,6 +128,9 @@ jobs:
122128
- model_repo: "mistralai"
123129
model_name: "Voxtral-Mini-4B-Realtime-2602"
124130
quant: "quantized-int4-tile-packed"
131+
- model_repo: "facebook"
132+
model_name: "dinov2-small-imagenet1k-1-layer"
133+
quant: "non-quantized"
125134
with:
126135
timeout: 240
127136
runner: windows.g5.4xlarge.nvidia.gpu

.github/workflows/cuda.yml

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ jobs:
151151
name: "Qwen3-0.6B"
152152
- repo: "nvidia"
153153
name: "parakeet-tdt"
154+
- repo: "facebook"
155+
name: "dinov2-small-imagenet1k-1-layer"
154156
quant:
155157
- "non-quantized"
156158
- "quantized-int4-tile-packed"
@@ -179,6 +181,15 @@ jobs:
179181
repo: "nvidia"
180182
name: "diar_streaming_sortformer_4spk-v2"
181183
quant: "quantized-int4-weight-only"
184+
# DINOv2 currently supports only non-quantized export
185+
- model:
186+
repo: "facebook"
187+
name: "dinov2-small-imagenet1k-1-layer"
188+
quant: "quantized-int4-tile-packed"
189+
- model:
190+
repo: "facebook"
191+
name: "dinov2-small-imagenet1k-1-layer"
192+
quant: "quantized-int4-weight-only"
182193
with:
183194
timeout: 90
184195
secrets-env: EXECUTORCH_HF_TOKEN
@@ -198,8 +209,8 @@ jobs:
198209
./install_executorch.sh
199210
echo "::endgroup::"
200211
201-
# Setup Huggingface only for models that need it (not parakeet)
202-
if [ "${{ matrix.model.name }}" != "parakeet-tdt" ]; then
212+
# Setup Huggingface only for models that need it (not parakeet or dinov2)
213+
if [ "${{ matrix.model.name }}" != "parakeet-tdt" ] && [ "${{ matrix.model.name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
203214
echo "::group::Setup Huggingface"
204215
pip install -U "huggingface_hub[cli]<1.0" accelerate
205216
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
@@ -235,6 +246,8 @@ jobs:
235246
name: "gemma-3-4b-it"
236247
- repo: "nvidia"
237248
name: "parakeet-tdt"
249+
- repo: "facebook"
250+
name: "dinov2-small-imagenet1k-1-layer"
238251
quant:
239252
- "non-quantized"
240253
- "quantized-int4-tile-packed"
@@ -263,6 +276,15 @@ jobs:
263276
repo: "nvidia"
264277
name: "diar_streaming_sortformer_4spk-v2"
265278
quant: "quantized-int4-weight-only"
279+
# DINOv2 currently supports only non-quantized export
280+
- model:
281+
repo: "facebook"
282+
name: "dinov2-small-imagenet1k-1-layer"
283+
quant: "quantized-int4-tile-packed"
284+
- model:
285+
repo: "facebook"
286+
name: "dinov2-small-imagenet1k-1-layer"
287+
quant: "quantized-int4-weight-only"
266288
with:
267289
timeout: 90
268290
runner: linux.g5.4xlarge.nvidia.gpu

Makefile

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
#
9292
# ==============================================================================
9393

94-
.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
94+
.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
9595

9696
help:
9797
@echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make <target>\`. Available targets:"
@@ -109,6 +109,8 @@ help:
109109
@echo " parakeet-cuda-debug - Build Parakeet runner with CUDA backend (debug mode)"
110110
@echo " parakeet-cpu - Build Parakeet runner with CPU backend"
111111
@echo " parakeet-metal - Build Parakeet runner with Metal backend (macOS only)"
112+
@echo " dinov2-cuda - Build DINOv2 runner with CUDA backend"
113+
@echo " dinov2-cuda-debug - Build DINOv2 runner with CUDA backend (debug mode)"
112114
@echo " sortformer-cuda - Build Sortformer runner with CUDA backend"
113115
@echo " sortformer-cpu - Build Sortformer runner with CPU backend"
114116
@echo " silero-vad-cpu - Build Silero VAD runner with CPU backend"
@@ -219,6 +221,24 @@ parakeet-metal:
219221
@echo "✓ Build complete!"
220222
@echo " Binary: cmake-out/examples/models/parakeet/parakeet_runner"
221223

224+
dinov2-cuda:
225+
@echo "==> Building and installing ExecuTorch with CUDA..."
226+
cmake --workflow --preset llm-release-cuda
227+
@echo "==> Building DINOv2 runner with CUDA..."
228+
cd examples/models/dinov2 && cmake --workflow --preset dinov2-cuda
229+
@echo ""
230+
@echo "✓ Build complete!"
231+
@echo " Binary: cmake-out/examples/models/dinov2/dinov2_runner"
232+
233+
dinov2-cuda-debug:
234+
@echo "==> Building and installing ExecuTorch with CUDA (debug mode)..."
235+
cmake --workflow --preset llm-debug-cuda
236+
@echo "==> Building DINOv2 runner with CUDA (debug mode)..."
237+
cd examples/models/dinov2 && cmake --workflow --preset dinov2-cuda-debug
238+
@echo ""
239+
@echo "✓ Build complete!"
240+
@echo " Binary: cmake-out/examples/models/dinov2/dinov2_runner"
241+
222242
sortformer-cuda:
223243
@echo "==> Building and installing ExecuTorch with CUDA..."
224244
cmake --workflow --preset llm-release-cuda

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
from .fuse_equal_placeholders_pass import FuseEqualPlaceholdersPass # noqa
111111
from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa
112112
from .fuse_view_copy_transform_pass import FuseViewCopyTransformPass # noqa
113+
from .insert_const_shapes import InsertConstShapesPass # noqa
113114
from .insert_int32_casts_after_int64_placeholders import ( # noqa
114115
InsertInt32CastsAfterInt64PlaceholdersPass,
115116
)

backends/arm/_passes/arm_pass_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
FuseEqualPlaceholdersPass,
103103
FuseQuantizedActivationPass,
104104
FuseViewCopyTransformPass,
105+
InsertConstShapesPass,
105106
InsertControlFlowRescalesPass,
106107
InsertInt32CastsAfterInt64PlaceholdersPass,
107108
InsertRescaleInt32Pass,
@@ -380,6 +381,7 @@ def _tosa_pipeline(
380381
RewriteMatmulPass(),
381382
RewritePadPass(),
382383
RewriteSlicePass(),
384+
InsertConstShapesPass(),
383385
]
384386
)
385387

0 commit comments

Comments
 (0)