Skip to content

Commit ed81943

Browse files
committed
Update on "[ET Device Support] DeviceAllocator interface and DeviceAllocatorRegistry"
This diff introduces the `DeviceAllocator` abstract interface and `DeviceAllocatorRegistry` for device-specific memory allocation. This is a foundational abstraction that enables the runtime to dispatch memory operations to the appropriate device backend other than CPU (CUDA, etc.). **DeviceAllocator interface provides:** - `init_buffer()` - Initialize memory buffer pools for memory-planned tensors - `get_offset_address()` - Get pointer to offset within pre-allocated buffer - `allocate()` / `deallocate()` - Dynamic device memory allocation - `copy_host_to_device()` / `copy_device_to_host()` - Data transfer between host and device - `device_type()` - Returns the device type this allocator handles **DeviceAllocatorRegistry provides:** - Singleton registry mapping DeviceType → DeviceAllocator - `register_allocator()` / `get_allocator()` methods - Fixed-size array indexed by device type (no dynamic allocation, embedded-friendly) **Design notes:** - Registry stores raw pointers (non-owning) - allocators are expected to be singletons with static lifetime - Follows ExecuTorch's embedded-first philosophy (no std::unique_ptr, no heap allocation in registry) - Convenience free functions `register_device_allocator()` and `get_device_allocator()` for ease of use Differential Revision: [D93635656](https://our.internmc.facebook.com/intern/diff/D93635656/) [ghstack-poisoned]
2 parents 76d7f72 + f9e07cc commit ed81943

481 files changed

Lines changed: 22504 additions & 16741 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
659af3c353e49b35c191cdd2dba3b3c79d0e6822
1+
release/2.11

.ci/docker/common/install_pytorch.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ install_pytorch_and_domains() {
3232
pip_install "$(echo dist/*.whl)"
3333

3434
# Grab the pinned audio and vision commits from PyTorch
35-
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
35+
TORCHAUDIO_VERSION=release/2.11
3636
export TORCHAUDIO_VERSION
37-
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
37+
TORCHVISION_VERSION=release/0.26
3838
export TORCHVISION_VERSION
3939

4040
install_domains

.ci/scripts/export_model_artifact.sh

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Arguments:
2424
- google/gemma-3-4b-it
2525
- nvidia/diar_streaming_sortformer_4spk-v2
2626
- nvidia/parakeet-tdt
27+
- facebook/dinov2-small-imagenet1k-1-layer
2728
2829
quant_name Quantization type (optional, default: non-quantized)
2930
Options:
@@ -167,6 +168,14 @@ case "$HF_MODEL" in
167168
PREPROCESSOR_FEATURE_SIZE=""
168169
PREPROCESSOR_OUTPUT=""
169170
;;
171+
facebook/dinov2-small-imagenet1k-1-layer)
172+
MODEL_NAME="dinov2"
173+
TASK=""
174+
MAX_SEQ_LEN=""
175+
EXTRA_PIP=""
176+
PREPROCESSOR_FEATURE_SIZE=""
177+
PREPROCESSOR_OUTPUT=""
178+
;;
170179
mistralai/Voxtral-Mini-4B-Realtime-2602)
171180
MODEL_NAME="voxtral_realtime"
172181
TASK=""
@@ -177,7 +186,7 @@ case "$HF_MODEL" in
177186
;;
178187
*)
179188
echo "Error: Unsupported model '$HF_MODEL'"
180-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
189+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
181190
exit 1
182191
;;
183192
esac
@@ -293,6 +302,23 @@ if [ "$MODEL_NAME" = "sortformer" ]; then
293302
exit 0
294303
fi
295304

305+
# DINOv2 uses a custom export script
306+
if [ "$MODEL_NAME" = "dinov2" ]; then
307+
pip install -r examples/models/dinov2/install_requirements.txt
308+
309+
python -m executorch.examples.models.dinov2.export_dinov2 \
310+
--backend "$DEVICE" \
311+
--output-dir "${OUTPUT_DIR}"
312+
313+
test -f "${OUTPUT_DIR}/model.pte"
314+
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
315+
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
316+
fi
317+
ls -al "${OUTPUT_DIR}"
318+
echo "::endgroup::"
319+
exit 0
320+
fi
321+
296322
# Voxtral Realtime uses a custom export script
297323
if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
298324
pip install safetensors huggingface_hub
@@ -308,6 +334,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
308334
VR_QUANT_ARGS="--qlinear-encoder 8da4w --qlinear 8da4w --qlinear-group-size 32 --qembedding 8w"
309335
elif [ "$QUANT_NAME" = "quantized-int4-metal" ]; then
310336
VR_QUANT_ARGS="--qlinear-encoder fpa4w --qlinear fpa4w"
337+
VR_DTYPE_ARGS="--dtype bf16"
311338
elif [ "$QUANT_NAME" = "quantized-int4-tile-packed" ]; then
312339
VR_QUANT_ARGS="--qlinear-encoder 4w --qlinear-encoder-packing-format tile_packed_to_4d --qlinear 4w --qlinear-packing-format tile_packed_to_4d --qembedding 8w"
313340
VR_DTYPE_ARGS="--dtype bf16"
@@ -347,6 +374,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
347374
fi
348375
# Copy tokenizer from downloaded model weights
349376
cp "$LOCAL_MODEL_DIR/tekken.json" "${OUTPUT_DIR}/tekken.json"
377+
rm -rf "$LOCAL_MODEL_DIR"
350378
ls -al "${OUTPUT_DIR}"
351379
echo "::endgroup::"
352380
exit 0

.ci/scripts/test_backend.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ if [[ "$FLOW" == *qnn* ]]; then
4646
export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
4747

4848
# TODO Get SDK root from install scripts
49-
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
49+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
5050
fi
5151

5252
if [[ "$FLOW" == *vulkan* ]]; then

.ci/scripts/test_lora.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ Okay, so I need to calculate 15% of 80."
139139
EXPECTED_QUANT_LORA_PREFIX="
140140
<|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant
141141
To calculate 15% of 80, we can multiply 80 by 15/100.
142-
So, 15% of 80 is equal to (80 * 15) / 100 = 1200 / 100 = 12.
142+
80 * 15/100 = 12.
143+
So, 15% of 80 is 12.
143144
#### 12
144145
The answer is: 12<|im_end|>"
145146

.ci/scripts/test_model_e2e.sh

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Arguments:
2424
- google/gemma-3-4b-it
2525
- Qwen/Qwen3-0.6B
2626
- nvidia/parakeet-tdt
27+
- facebook/dinov2-small-imagenet1k-1-layer
2728
- mistralai/Voxtral-Mini-4B-Realtime-2602
2829
2930
quant_name Quantization type (required)
@@ -190,6 +191,19 @@ case "$HF_MODEL" in
190191
AUDIO_FILE="poem.wav"
191192
IMAGE_PATH=""
192193
;;
194+
facebook/dinov2-small-imagenet1k-1-layer)
195+
MODEL_NAME="dinov2"
196+
RUNNER_TARGET="dinov2_runner"
197+
RUNNER_PATH="dinov2"
198+
EXPECTED_OUTPUT="Samoyed"
199+
PREPROCESSOR=""
200+
TOKENIZER_URL=""
201+
TOKENIZER_FILE=""
202+
AUDIO_URL=""
203+
AUDIO_FILE=""
204+
IMAGE_URL="https://github.com/pytorch/hub/raw/master/images/dog.jpg"
205+
IMAGE_PATH=""
206+
;;
193207
mistralai/Voxtral-Mini-4B-Realtime-2602)
194208
MODEL_NAME="voxtral_realtime"
195209
RUNNER_TARGET="voxtral_realtime_runner"
@@ -204,7 +218,7 @@ case "$HF_MODEL" in
204218
;;
205219
*)
206220
echo "Error: Unsupported model '$HF_MODEL'"
207-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt"
221+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
208222
exit 1
209223
;;
210224
esac
@@ -218,7 +232,7 @@ echo "::group::Prepare $MODEL_NAME Artifacts"
218232

219233

220234
# Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
221-
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ]; then
235+
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
222236
if [ "$TOKENIZER_FILE" != "" ]; then
223237
curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
224238
else
@@ -234,10 +248,15 @@ if [ "$AUDIO_URL" != "" ]; then
234248
elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ]; then
235249
conda install -y -c conda-forge "ffmpeg<8"
236250
pip install datasets soundfile
237-
pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
251+
pip install torchcodec==0.11.0 --extra-index-url https://download.pytorch.org/whl/test/cpu
238252
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
239253
fi
240254

255+
# Download test image for vision models
256+
if [ -n "${IMAGE_URL:-}" ]; then
257+
curl -L "$IMAGE_URL" -o "${MODEL_DIR}/test_image.jpg"
258+
fi
259+
241260
ls -al
242261
echo "::endgroup::"
243262

@@ -316,6 +335,12 @@ EOF
316335
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
317336
fi
318337
;;
338+
dinov2)
339+
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --image_path ${MODEL_DIR}/test_image.jpg"
340+
if [ "$DEVICE" = "cuda" ]; then
341+
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
342+
fi
343+
;;
319344
voxtral_realtime)
320345
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
321346
# Add CUDA data path if present

.ci/scripts/test_model_e2e_windows.ps1

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ param(
1313
[Parameter(Mandatory = $true)]
1414
[string]$QuantName,
1515
[string]$ModelDir = ".",
16-
[string]$ExpectedCudaVersion = ""
16+
[string]$ExpectedCudaVersion = "",
17+
[string]$Mode = ""
1718
)
1819

1920
Set-StrictMode -Version Latest
@@ -25,6 +26,15 @@ if ($Device -ne "cuda-windows") {
2526
throw "Unsupported device '$Device'. Expected 'cuda-windows'."
2627
}
2728

29+
if ($Mode -ne "") {
30+
if ($Mode -notin @("vr-streaming", "vr-offline")) {
31+
throw "Unsupported mode '$Mode'. Supported modes: vr-streaming, vr-offline"
32+
}
33+
if ($HfModel -ne "mistralai/Voxtral-Mini-4B-Realtime-2602") {
34+
throw "Mode '$Mode' can only be used with Voxtral Realtime model"
35+
}
36+
}
37+
2838
Write-Host "Testing model: $HfModel (quantization: $QuantName)"
2939

3040
$resolvedModelDir = (Resolve-Path -Path $ModelDir).Path
@@ -79,15 +89,28 @@ switch ($HfModel) {
7989
$runnerTarget = "voxtral_realtime_runner"
8090
$runnerPath = "voxtral_realtime"
8191
$runnerPreset = "voxtral-realtime-cuda"
82-
$expectedOutput = "Loading audio from"
92+
$expectedOutput = "Quilter"
8393
$preprocessor = "preprocessor.pte"
8494
$tokenizerUrl = ""
8595
$tokenizerFile = "tekken.json"
8696
$audioUrl = "https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
8797
$audioFile = "poem.wav"
8898
}
99+
"facebook/dinov2-small-imagenet1k-1-layer" {
100+
$runnerTarget = "dinov2_runner"
101+
$runnerPath = "dinov2"
102+
$runnerPreset = "dinov2-cuda"
103+
$expectedOutput = "Samoyed"
104+
$preprocessor = ""
105+
$tokenizerUrl = ""
106+
$tokenizerFile = ""
107+
$audioUrl = ""
108+
$audioFile = ""
109+
$imageUrl = "https://github.com/pytorch/hub/raw/master/images/dog.jpg"
110+
$imageFile = "test_image.jpg"
111+
}
89112
default {
90-
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
113+
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
91114
}
92115
}
93116

@@ -162,6 +185,9 @@ try {
162185
if ($audioUrl -ne "") {
163186
Download-IfNeeded -Url $audioUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $audioFile)
164187
}
188+
if ((Get-Variable -Name imageUrl -ErrorAction SilentlyContinue) -and $imageUrl -ne "") {
189+
Download-IfNeeded -Url $imageUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
190+
}
165191
Get-ChildItem -Path $resolvedModelDir
166192
Write-Host "::endgroup::"
167193

@@ -207,6 +233,16 @@ try {
207233
"--audio_path", (Join-Path -Path $resolvedModelDir -ChildPath $audioFile),
208234
"--preprocessor_path", (Join-Path -Path $resolvedModelDir -ChildPath $preprocessor)
209235
)
236+
if ($Mode -ne "vr-offline") {
237+
$runnerArgs += "--streaming"
238+
}
239+
}
240+
"facebook/dinov2-small-imagenet1k-1-layer" {
241+
$runnerArgs = @(
242+
"--model_path", $modelPte,
243+
"--data_path", $cudaBlob,
244+
"--image_path", (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
245+
)
210246
}
211247
}
212248

.ci/scripts/test_wheel_package_qnn.sh

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -158,17 +158,17 @@ print(module_vars["TORCH_VERSION"])
158158
PY
159159
)
160160

161-
NIGHTLY_VERSION=$(
162-
"$PYBIN" - <<'PY'
163-
import runpy
164-
module_vars = runpy.run_path("torch_pin.py")
165-
print(module_vars["NIGHTLY_VERSION"])
166-
PY
167-
)
168-
echo "=== [$LABEL] Install torch==${TORCH_VERSION}.${NIGHTLY_VERSION} ==="
169-
170-
# Install torchao based on the pinned PyTorch version
171-
"$PIPBIN" install torch=="${TORCH_VERSION}.${NIGHTLY_VERSION}" --index-url "https://download.pytorch.org/whl/nightly/cpu"
161+
# NIGHTLY_VERSION=$(
162+
# "$PYBIN" - <<'PY'
163+
# import runpy
164+
# module_vars = runpy.run_path("torch_pin.py")
165+
# print(module_vars["NIGHTLY_VERSION"])
166+
# PY
167+
# )
168+
echo "=== [$LABEL] Install torch==${TORCH_VERSION} ==="
169+
170+
# Install torch based on the pinned PyTorch version, preferring the PyTorch test index
171+
"$PIPBIN" install torch=="${TORCH_VERSION}" --extra-index-url "https://download.pytorch.org/whl/test"
172172
"$PIPBIN" install wheel
173173

174174
# Install torchao based on the pinned commit from third-party/ao submodule

.ci/scripts/unittest-linux-cmake.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ if ! python -c "import tosa_serializer" >/dev/null 2>&1; then
1919
TOSA_SERIALIZATION_DIR="${TOSA_TOOLS_DIR}/serialization"
2020
fi
2121

22+
# NOTE: Will be removed when tosa-tools is installed via pypi
23+
python -m pip install pybind11==2.10.4
2224
CMAKE_POLICY_VERSION_MINIMUM=3.5 BUILD_PYBIND=1 \
2325
python -m pip install --no-dependencies \
2426
"${TOSA_SERIALIZATION_DIR}"

.ci/scripts/utils.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ dedupe_macos_loader_path_rpaths() {
5353
pushd ..
5454
torch_lib_dir=$(python -c "import importlib.util; print(importlib.util.find_spec('torch').submodule_search_locations[0])")/lib
5555
popd
56-
56+
5757
if [[ -z "${torch_lib_dir}" || ! -d "${torch_lib_dir}" ]]; then
5858
return
5959
fi
@@ -141,9 +141,9 @@ install_pytorch_and_domains() {
141141

142142
dedupe_macos_loader_path_rpaths
143143
# Grab the pinned audio and vision commits from PyTorch
144-
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
144+
TORCHAUDIO_VERSION=release/2.11
145145
export TORCHAUDIO_VERSION
146-
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
146+
TORCHVISION_VERSION=release/0.26
147147
export TORCHVISION_VERSION
148148

149149
install_domains

0 commit comments

Comments
 (0)