Skip to content

Commit e978df0

Browse files
committed
Update base for Update on "add parakeet into cuda benckmark ci"
as title Differential Revision: [D92208958](https://our.internmc.facebook.com/intern/diff/D92208958/) [ghstack-poisoned]
2 parents 593775b + fdb386c commit e978df0

363 files changed

Lines changed: 19861 additions & 3564 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/scripts/export_model_artifact.sh

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8-
# Export model to CUDA/Metal format with optional quantization
8+
# Export model to CUDA/Metal/XNNPACK format with optional quantization
99

1010
show_help() {
1111
cat << EOF
1212
Usage: export_model_artifact.sh <device> <hf_model> [quant_name] [output_dir]
1313
14-
Export a HuggingFace model to CUDA/Metal format with optional quantization.
14+
Export a HuggingFace model to CUDA/Metal/XNNPACK format with optional quantization.
1515
1616
Arguments:
17-
device cuda or metal (required)
17+
device cuda, metal, or xnnpack (required)
1818
1919
hf_model HuggingFace model ID (required)
2020
Supported models:
@@ -26,16 +26,20 @@ Arguments:
2626
quant_name Quantization type (optional, default: non-quantized)
2727
Options:
2828
- non-quantized
29-
- quantized-int4-tile-packed
30-
- quantized-int4-weight-only
29+
- quantized-int4-tile-packed (CUDA only)
30+
- quantized-int4-weight-only (CUDA only)
31+
- quantized-int4-metal (Metal only)
32+
- quantized-8da4w (XNNPACK only)
3133
3234
output_dir Output directory for artifacts (optional, default: current directory)
3335
3436
Examples:
3537
export_model_artifact.sh metal "openai/whisper-small"
38+
export_model_artifact.sh metal "nvidia/parakeet-tdt" "quantized-int4-metal"
3639
export_model_artifact.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
3740
export_model_artifact.sh cuda "google/gemma-3-4b-it" "non-quantized" "./output"
3841
export_model_artifact.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./output"
42+
export_model_artifact.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./output"
3943
EOF
4044
}
4145

@@ -64,9 +68,11 @@ case "$DEVICE" in
6468
;;
6569
metal)
6670
;;
71+
xnnpack)
72+
;;
6773
*)
6874
echo "Error: Unsupported device '$DEVICE'"
69-
echo "Supported devices: cuda, cuda-windows, metal"
75+
echo "Supported devices: cuda, cuda-windows, metal, xnnpack"
7076
exit 1
7177
;;
7278
esac
@@ -127,21 +133,35 @@ case "$QUANT_NAME" in
127133
;;
128134
quantized-int4-tile-packed)
129135
if [ "$DEVICE" = "metal" ]; then
130-
echo "Error: Metal backend does not yet support quantization '$QUANT_NAME'"
136+
echo "Error: Metal backend does not support quantization '$QUANT_NAME'"
131137
exit 1
132138
fi
133139
EXTRA_ARGS="--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
134140
;;
135141
quantized-int4-weight-only)
136142
if [ "$DEVICE" = "metal" ]; then
137-
echo "Error: Metal backend does not yet support quantization '$QUANT_NAME'"
143+
echo "Error: Metal backend does not support quantization '$QUANT_NAME'"
138144
exit 1
139145
fi
140146
EXTRA_ARGS="--qlinear_encoder 4w"
141147
;;
148+
quantized-int4-metal)
149+
if [ "$DEVICE" != "metal" ]; then
150+
echo "Error: Quantization '$QUANT_NAME' only supported on Metal backend"
151+
exit 1
152+
fi
153+
EXTRA_ARGS="--qlinear fpa4w --qlinear_encoder fpa4w"
154+
;;
155+
quantized-8da4w)
156+
if [ "$DEVICE" != "xnnpack" ]; then
157+
echo "Error: quantized-8da4w is only supported with xnnpack device"
158+
exit 1
159+
fi
160+
EXTRA_ARGS="--qlinear 8da4w --qlinear_group_size 32 --qlinear_encoder 8da4w --qlinear_encoder_group_size 32"
161+
;;
142162
*)
143163
echo "Error: Unsupported quantization '$QUANT_NAME'"
144-
echo "Supported quantizations: non-quantized, quantized-int4-tile-packed, quantized-int4-weight-only"
164+
echo "Supported quantizations: non-quantized, quantized-int4-tile-packed, quantized-int4-weight-only, quantized-int4-metal, quantized-8da4w"
145165
exit 1
146166
;;
147167
esac
@@ -157,10 +177,17 @@ pip list
157177
if [ "$MODEL_NAME" = "parakeet" ]; then
158178
pip install -r examples/models/parakeet/install_requirements.txt
159179

180+
# Set dtype based on backend (XNNPACK uses fp32, CUDA/Metal use bf16)
181+
if [ "$DEVICE" = "xnnpack" ]; then
182+
DTYPE_ARG=""
183+
else
184+
DTYPE_ARG="--dtype bf16"
185+
fi
186+
160187
python -m executorch.examples.models.parakeet.export_parakeet_tdt \
161188
--backend "$DEVICE" \
162189
--output-dir "${OUTPUT_DIR}" \
163-
--dtype bf16 \
190+
${DTYPE_ARG} \
164191
${EXTRA_ARGS}
165192

166193
test -f "${OUTPUT_DIR}/model.pte"

.ci/scripts/setup-samsung-linux-deps.sh

Lines changed: 43 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ download_ai_lite_core() {
8686
}
8787

8888
install_devicefarm_cli() {
89-
local cli_version="${1:-beta-1.0.8}"
89+
local cli_version="${1:-beta-1.0.9}"
9090
local cli_out="/tmp/devicefarm-cli-v${cli_version}.zip"
9191
local cli_dir="/tmp/devicefarm_cli"
9292

@@ -100,93 +100,59 @@ install_devicefarm_cli() {
100100
chmod +x "${cli_dir}/devicefarm-cli"
101101
}
102102

103-
reserve_if_needed() {
104-
# Set default value
103+
Enqueue_device_request() {
105104
export DEVICE_RESERVED=0
106-
107105
if ! command -v devicefarm-cli >/dev/null 2>&1; then
108106
echo "[WARN] devicefarm-cli is not installed." >&2
109107
return 1
110108
fi
111109

112-
local raw_info info_lines
113-
raw_info="$(devicefarm-cli -I)"
114-
115-
info_lines="$(printf '%s\n' "$raw_info" | grep -v '^\\[INFO\\]')"
116-
117-
local found_count
118-
119-
found_count=$(printf '%s\n' "$info_lines" \
120-
| grep -Eo 'Found available reservations *: *[0-9]+' \
121-
| grep -Eo '[0-9]+')
122-
[[ -z "$found_count" ]] && found_count=0
123-
124-
echo "[INFO] Current Reserved Count: $found_count"
125-
126-
local THRESHOLD_SECONDS=12600
127-
local any_below_threshold=0
128-
129-
if (( found_count > 0 )); then
130-
local table_body
131-
table_body=$(printf '%s\n' "$info_lines" | sed -n '2,$p')
132-
133-
while IFS= read -r line; do
134-
if [[ "$line" =~ ^[0-9]+[[:space:]]+([0-9]{1,2}:[0-9]{2}:[0-9]{2}) ]]; then
135-
local time_str="${BASH_REMATCH[1]}"
136-
IFS=: read -r hh mm ss <<<"$time_str"
137-
(( seconds = 10#$hh * 3600 + 10#$mm * 60 + 10#$ss ))
138-
if (( seconds <= THRESHOLD_SECONDS )); then
139-
any_below_threshold=1
140-
break
141-
fi
142-
fi
143-
done <<<"$table_body"
144-
else
145-
any_below_threshold=1
146-
fi
147-
148-
if (( any_below_threshold )); then
149-
echo "[INFO] Reserving now."
150-
if ! devicefarm-cli -R; then
151-
echo "::warning::Failed to reserve a device. No devices are currently available." >&2
152-
echo "[WARN] Device reservation failed - continuing without device." >&2
153-
return 0
154-
fi
155-
else
156-
echo "[INFO] Don't need to be reserved."
110+
echo "[INFO] Enqueue request (-Q)..."
111+
# Enqueue device request
112+
if ! devicefarm-cli -Q; then
113+
echo "::warning::Failed to enqueue device request (-Q)." >&2
114+
echo "[WARN] Device queue registration failed - continuing without device." >&2
115+
return 0
157116
fi
158117

159-
local info_after reservation_id max_seconds=0 max_id
118+
local interval_sec=60
119+
local out status
160120

161-
info_after="$(devicefarm-cli -I)"
121+
echo "[INFO] Polling assignment status (-C) every ${interval_sec}s..."
162122

163-
local body_after
164-
body_after=$(printf '%s\n' "$info_after" | grep -v '^\\[INFO\\]' | sed -n '2,$p')
123+
while true; do
124+
out="$(devicefarm-cli -C 2>&1)"
165125

166-
while IFS= read -r line; do
167-
if [[ "$line" =~ ^[0-9]+[[:space:]]+([0-9]{1,2}:[0-9]{2}:[0-9]{2})[[:space:]].*([0-9a-f-]{36})$ ]]; then
168-
local time_str="${BASH_REMATCH[1]}"
169-
local id="${BASH_REMATCH[2]}"
170-
IFS=: read -r hh mm ss <<<"$time_str"
171-
(( seconds = 10#$hh * 3600 + 10#$mm * 60 + 10#$ss ))
172-
if (( seconds > max_seconds )); then
173-
max_seconds=$seconds
174-
max_id=$id
175-
fi
126+
# Determine status: assigned / waiting / unavailable
127+
if printf '%s' "$out" | grep -qiE 'waiting|not[[:space:]-]*assigned'; then
128+
status="waiting"
129+
elif printf '%s' "$out" | grep -qi 'assigned'; then
130+
status="assigned"
131+
else
132+
status="unknown"
176133
fi
177-
done <<<"$body_after"
178-
179-
reservation_id=$max_id
180134

181-
if [[ -n "$reservation_id" ]]; then
182-
devicefarm-cli -C "$reservation_id"
183-
devicefarm-cli -E "ls /"
184-
export DEVICE_RESERVED=1
185-
echo "[INFO] Device successfully reserved and connected."
186-
else
187-
echo "::warning::No available devices found." >&2
188-
echo "[WARN] There is no available devices."
189-
fi
135+
case "$status" in
136+
assigned)
137+
echo "[INFO] Device assigned."
138+
echo "$out"
139+
# Execute test command
140+
devicefarm-cli -E "ls /" || true
141+
export DEVICE_RESERVED=1
142+
echo "[INFO] Device successfully assigned and connected."
143+
return 0
144+
;;
145+
waiting)
146+
echo "[INFO] Status: $status"
147+
sleep "$interval_sec"
148+
;;
149+
*)
150+
echo "[WARN] Unknown status from -C. Output:"
151+
echo "$out"
152+
return 0
153+
;;
154+
esac
155+
done
190156
}
191157

192158
install_enn_backend() {
@@ -208,9 +174,9 @@ install_enn_backend() {
208174
}
209175

210176
litecore_ver="1.0"
211-
devicefarm_ver="beta-1.0.8"
177+
devicefarm_ver="beta-1.0.9"
212178

213179
download_ai_lite_core ${litecore_ver}
214180
install_devicefarm_cli "${devicefarm_ver}"
215181
install_enn_backend
216-
reserve_if_needed
182+
Enqueue_device_request

.ci/scripts/test_model_e2e.sh

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8-
# Test CUDA/Metal model end-to-end, need to run .ci/scripts/export_model_artifact.sh first
8+
# Test CUDA/Metal/XNNPACK model end-to-end, need to run .ci/scripts/export_model_artifact.sh first
99

1010
show_help() {
1111
cat << EOF
1212
Usage: test_model_e2e.sh <device> <hf_model> <quant_name> [model_dir]
1313
14-
Build and run end-to-end tests for CUDA/Metal models.
14+
Build and run end-to-end tests for CUDA/Metal/XNNPACK models.
1515
1616
Arguments:
17-
device cuda or metal (required)
17+
device cuda, metal, or xnnpack (required)
1818
1919
hf_model HuggingFace model ID (required)
2020
Supported models:
@@ -28,6 +28,7 @@ Arguments:
2828
- non-quantized
2929
- quantized-int4-tile-packed
3030
- quantized-int4-weight-only
31+
- quantized-8da4w (XNNPACK only)
3132
3233
model_dir Directory containing model artifacts (optional, default: current directory)
3334
Expected files: model.pte, aoti_cuda_blob.ptd (CUDA only)
@@ -37,6 +38,7 @@ Examples:
3738
test_model_e2e.sh metal "openai/whisper-small" "non-quantized"
3839
test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
3940
test_model_e2e.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./model_output"
41+
test_model_e2e.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./model_output"
4042
EOF
4143
}
4244

@@ -174,12 +176,17 @@ echo "::endgroup::"
174176

175177
echo "::group::Build $MODEL_NAME Runner"
176178

177-
if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ]; then
178-
echo "Error: Unsupported device '$DEVICE'. Must be 'cuda' or 'metal'."
179+
if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ] && [ "$DEVICE" != "xnnpack" ]; then
180+
echo "Error: Unsupported device '$DEVICE'. Must be 'cuda', 'metal', or 'xnnpack'."
179181
exit 1
180182
fi
181183

182-
MAKE_TARGET="${RUNNER_PATH}-${DEVICE}"
184+
# Map device to make target (xnnpack uses cpu target which includes XNNPACK)
185+
if [ "$DEVICE" = "xnnpack" ]; then
186+
MAKE_TARGET="${RUNNER_PATH}-cpu"
187+
else
188+
MAKE_TARGET="${RUNNER_PATH}-${DEVICE}"
189+
fi
183190
make "${MAKE_TARGET}"
184191
echo "::endgroup::"
185192

.ci/scripts/unittest-linux.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then
2020
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
2121
source .ci/scripts/setup-vulkan-linux-deps.sh
2222

23+
# Enable sanitizers for Debug builds
24+
if [[ "$BUILD_MODE" == "Debug" ]]; then
25+
export EXECUTORCH_USE_SANITIZER=ON
26+
fi
27+
2328
# We need the runner to test the built library.
2429
PYTHON_EXECUTABLE=python \
2530
CMAKE_ARGS="-DEXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL=ON -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON -DEXECUTORCH_BUILD_TESTS=ON" \

.ci/scripts/unittest-macos.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ export TMP_DIR=$(mktemp -d)
1919
export PATH="${TMP_DIR}:$PATH"
2020
trap 'rm -rfv ${TMP_DIR}' EXIT
2121

22+
# Enable sanitizers for Debug builds
23+
if [[ "$BUILD_MODE" == "Debug" ]]; then
24+
export EXECUTORCH_USE_SANITIZER=ON
25+
fi
26+
2227
# Setup MacOS dependencies as there is no Docker support on MacOS atm
2328
# We need the runner to test the built library.
2429
PYTHON_EXECUTABLE=python \

.ci/scripts/utils.sh

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,14 +164,18 @@ build_executorch_runner_cmake() {
164164
clean_executorch_install_folders
165165
mkdir "${CMAKE_OUTPUT_DIR}"
166166

167-
if [[ $1 == "Debug" ]]; then
168-
CXXFLAGS="-fsanitize=address,undefined"
169-
else
170-
CXXFLAGS=""
167+
local build_type="${1:-Release}"
168+
local sanitizer_flag=""
169+
170+
if [[ "${EXECUTORCH_USE_SANITIZER:-OFF}" == "ON" ]]; then
171+
sanitizer_flag="-DEXECUTORCH_USE_SANITIZER=ON"
171172
fi
172-
CXXFLAGS="$CXXFLAGS" retry cmake \
173+
174+
retry cmake \
173175
-DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \
174-
-DCMAKE_BUILD_TYPE="${1:-Release}" \
176+
-DCMAKE_BUILD_TYPE="${build_type}" \
177+
${sanitizer_flag} \
178+
${CMAKE_ARGS:-} \
175179
-B${CMAKE_OUTPUT_DIR} .
176180

177181
if [ "$(uname)" == "Darwin" ]; then

0 commit comments

Comments
 (0)