Skip to content

Commit 73b4a69

Browse files
author
ssjia
committed
Update on "[ET-VK][ez] Implement helper functions to get fastest moving dim"
Add C++ and GLSL helpers to query the fastest moving dimension (the dimension with stride 1 in buffer layout). This is useful for optimizing memory access patterns in shaders, as iterating along the fastest moving dimension maximizes cache locality. The C++ `fastest_whcn_dim()` method accounts for block-transposed layouts by returning `outer_packed_dim` instead of `packed_dim` when applicable. A corresponding GLSL macro extracts this info from the hashed layout. Differential Revision: [D92061369](https://our.internmc.facebook.com/intern/diff/D92061369/) [ghstack-poisoned]
2 parents 66bf53e + 76a8ba4 commit 73b4a69

150 files changed

Lines changed: 4249 additions & 1144 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/scripts/setup-samsung-linux-deps.sh

Lines changed: 43 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ download_ai_lite_core() {
8686
}
8787

8888
install_devicefarm_cli() {
89-
local cli_version="${1:-beta-1.0.8}"
89+
local cli_version="${1:-beta-1.0.9}"
9090
local cli_out="/tmp/devicefarm-cli-v${cli_version}.zip"
9191
local cli_dir="/tmp/devicefarm_cli"
9292

@@ -100,93 +100,59 @@ install_devicefarm_cli() {
100100
chmod +x "${cli_dir}/devicefarm-cli"
101101
}
102102

103-
reserve_if_needed() {
104-
# Set default value
103+
Enqueue_device_request() {
105104
export DEVICE_RESERVED=0
106-
107105
if ! command -v devicefarm-cli >/dev/null 2>&1; then
108106
echo "[WARN] devicefarm-cli is not installed." >&2
109107
return 1
110108
fi
111109

112-
local raw_info info_lines
113-
raw_info="$(devicefarm-cli -I)"
114-
115-
info_lines="$(printf '%s\n' "$raw_info" | grep -v '^\\[INFO\\]')"
116-
117-
local found_count
118-
119-
found_count=$(printf '%s\n' "$info_lines" \
120-
| grep -Eo 'Found available reservations *: *[0-9]+' \
121-
| grep -Eo '[0-9]+')
122-
[[ -z "$found_count" ]] && found_count=0
123-
124-
echo "[INFO] Current Reserved Count: $found_count"
125-
126-
local THRESHOLD_SECONDS=12600
127-
local any_below_threshold=0
128-
129-
if (( found_count > 0 )); then
130-
local table_body
131-
table_body=$(printf '%s\n' "$info_lines" | sed -n '2,$p')
132-
133-
while IFS= read -r line; do
134-
if [[ "$line" =~ ^[0-9]+[[:space:]]+([0-9]{1,2}:[0-9]{2}:[0-9]{2}) ]]; then
135-
local time_str="${BASH_REMATCH[1]}"
136-
IFS=: read -r hh mm ss <<<"$time_str"
137-
(( seconds = 10#$hh * 3600 + 10#$mm * 60 + 10#$ss ))
138-
if (( seconds <= THRESHOLD_SECONDS )); then
139-
any_below_threshold=1
140-
break
141-
fi
142-
fi
143-
done <<<"$table_body"
144-
else
145-
any_below_threshold=1
146-
fi
147-
148-
if (( any_below_threshold )); then
149-
echo "[INFO] Reserving now."
150-
if ! devicefarm-cli -R; then
151-
echo "::warning::Failed to reserve a device. No devices are currently available." >&2
152-
echo "[WARN] Device reservation failed - continuing without device." >&2
153-
return 0
154-
fi
155-
else
156-
echo "[INFO] Don't need to be reserved."
110+
echo "[INFO] Enqueue request (-Q)..."
111+
# Enqueue device request
112+
if ! devicefarm-cli -Q; then
113+
echo "::warning::Failed to enqueue device request (-Q)." >&2
114+
echo "[WARN] Device queue registration failed - continuing without device." >&2
115+
return 0
157116
fi
158117

159-
local info_after reservation_id max_seconds=0 max_id
118+
local interval_sec=60
119+
local out status
160120

161-
info_after="$(devicefarm-cli -I)"
121+
echo "[INFO] Polling assignment status (-C) every ${interval_sec}s..."
162122

163-
local body_after
164-
body_after=$(printf '%s\n' "$info_after" | grep -v '^\\[INFO\\]' | sed -n '2,$p')
123+
while true; do
124+
out="$(devicefarm-cli -C 2>&1)"
165125

166-
while IFS= read -r line; do
167-
if [[ "$line" =~ ^[0-9]+[[:space:]]+([0-9]{1,2}:[0-9]{2}:[0-9]{2})[[:space:]].*([0-9a-f-]{36})$ ]]; then
168-
local time_str="${BASH_REMATCH[1]}"
169-
local id="${BASH_REMATCH[2]}"
170-
IFS=: read -r hh mm ss <<<"$time_str"
171-
(( seconds = 10#$hh * 3600 + 10#$mm * 60 + 10#$ss ))
172-
if (( seconds > max_seconds )); then
173-
max_seconds=$seconds
174-
max_id=$id
175-
fi
126+
# Determine status: assigned / waiting / unavailable
127+
if printf '%s' "$out" | grep -qiE 'waiting|not[[:space:]-]*assigned'; then
128+
status="waiting"
129+
elif printf '%s' "$out" | grep -qi 'assigned'; then
130+
status="assigned"
131+
else
132+
status="unknown"
176133
fi
177-
done <<<"$body_after"
178-
179-
reservation_id=$max_id
180134

181-
if [[ -n "$reservation_id" ]]; then
182-
devicefarm-cli -C "$reservation_id"
183-
devicefarm-cli -E "ls /"
184-
export DEVICE_RESERVED=1
185-
echo "[INFO] Device successfully reserved and connected."
186-
else
187-
echo "::warning::No available devices found." >&2
188-
echo "[WARN] There is no available devices."
189-
fi
135+
case "$status" in
136+
assigned)
137+
echo "[INFO] Device assigned."
138+
echo "$out"
139+
# Execute test command
140+
devicefarm-cli -E "ls /" || true
141+
export DEVICE_RESERVED=1
142+
echo "[INFO] Device successfully assigned and connected."
143+
return 0
144+
;;
145+
waiting)
146+
echo "[INFO] Status: $status"
147+
sleep "$interval_sec"
148+
;;
149+
*)
150+
echo "[WARN] Unknown status from -C. Output:"
151+
echo "$out"
152+
return 0
153+
;;
154+
esac
155+
done
190156
}
191157

192158
install_enn_backend() {
@@ -208,9 +174,9 @@ install_enn_backend() {
208174
}
209175

210176
litecore_ver="1.0"
211-
devicefarm_ver="beta-1.0.8"
177+
devicefarm_ver="beta-1.0.9"
212178

213179
download_ai_lite_core ${litecore_ver}
214180
install_devicefarm_cli "${devicefarm_ver}"
215181
install_enn_backend
216-
reserve_if_needed
182+
Enqueue_device_request

.ci/scripts/unittest-linux.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then
2020
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
2121
source .ci/scripts/setup-vulkan-linux-deps.sh
2222

23+
# Enable sanitizers for Debug builds
24+
if [[ "$BUILD_MODE" == "Debug" ]]; then
25+
export EXECUTORCH_USE_SANITIZER=ON
26+
fi
27+
2328
# We need the runner to test the built library.
2429
PYTHON_EXECUTABLE=python \
2530
CMAKE_ARGS="-DEXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL=ON -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON -DEXECUTORCH_BUILD_TESTS=ON" \

.ci/scripts/unittest-macos.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ export TMP_DIR=$(mktemp -d)
1919
export PATH="${TMP_DIR}:$PATH"
2020
trap 'rm -rfv ${TMP_DIR}' EXIT
2121

22+
# Enable sanitizers for Debug builds
23+
if [[ "$BUILD_MODE" == "Debug" ]]; then
24+
export EXECUTORCH_USE_SANITIZER=ON
25+
fi
26+
2227
# Setup MacOS dependencies as there is no Docker support on MacOS atm
2328
# We need the runner to test the built library.
2429
PYTHON_EXECUTABLE=python \

.ci/scripts/utils.sh

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,14 +164,18 @@ build_executorch_runner_cmake() {
164164
clean_executorch_install_folders
165165
mkdir "${CMAKE_OUTPUT_DIR}"
166166

167-
if [[ $1 == "Debug" ]]; then
168-
CXXFLAGS="-fsanitize=address,undefined"
169-
else
170-
CXXFLAGS=""
167+
local build_type="${1:-Release}"
168+
local sanitizer_flag=""
169+
170+
if [[ "${EXECUTORCH_USE_SANITIZER:-OFF}" == "ON" ]]; then
171+
sanitizer_flag="-DEXECUTORCH_USE_SANITIZER=ON"
171172
fi
172-
CXXFLAGS="$CXXFLAGS" retry cmake \
173+
174+
retry cmake \
173175
-DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \
174-
-DCMAKE_BUILD_TYPE="${1:-Release}" \
176+
-DCMAKE_BUILD_TYPE="${build_type}" \
177+
${sanitizer_flag} \
178+
${CMAKE_ARGS:-} \
175179
-B${CMAKE_OUTPUT_DIR} .
176180

177181
if [ "$(uname)" == "Darwin" ]; then

.github/workflows/pull.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -953,8 +953,6 @@ jobs:
953953
954954
test-samsung-quantmodels-linux:
955955
name: test-samsung-quantmodels-linux
956-
# Temporarily disabled - api key invalid
957-
if: false
958956
# if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
959957
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
960958
permissions:
@@ -996,9 +994,6 @@ jobs:
996994
997995
test-samsung-models-linux:
998996
name: test-samsung-models-linux
999-
# Temporarily disabled - edb (Exynos device bridge) failures and device reservation issues
1000-
# See: https://github.com/pytorch/executorch/issues/16678
1001-
if: false
1002997
# if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
1003998
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1004999
permissions:

.lintrunner.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,8 +506,12 @@ command = [
506506

507507
[[linter]]
508508
code = 'DOCFORMATTER'
509-
include_patterns = []
510-
exclude_patterns = ['**']
509+
include_patterns = [
510+
'backends/arm/vgf/**/*.py',
511+
'backends/arm/tosa/**/*.py',
512+
'backends/arm/ethosu/**/*.py',
513+
]
514+
exclude_patterns = ['third-party/**', '**/third-party/**']
511515
command = [
512516
'python','-m','lintrunner_adapters','run','docformatter_linter','--config=pyproject.toml','--','@{{PATHSFILE}}'
513517
]

CMakeLists.txt

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,43 @@ project(executorch)
5252

5353
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
5454

55+
# --- ExecuTorch Version ---
56+
# Parse version from version.txt (single source of truth)
57+
file(READ "${EXECUTORCH_ROOT}/version.txt" ET_VERSION_STRING)
58+
string(STRIP "${ET_VERSION_STRING}" ET_VERSION_STRING)
59+
60+
# Extract major.minor.patch (handles formats like "1.2.0a0")
61+
string(REGEX MATCH "^([0-9]+)\\.([0-9]+)\\.([0-9]+)" ET_VERSION_MATCH
62+
"${ET_VERSION_STRING}"
63+
)
64+
set(ET_VERSION_MAJOR "${CMAKE_MATCH_1}")
65+
set(ET_VERSION_MINOR "${CMAKE_MATCH_2}")
66+
set(ET_VERSION_PATCH "${CMAKE_MATCH_3}")
67+
68+
# Validate that version components were successfully extracted
69+
if("${ET_VERSION_MAJOR}" STREQUAL ""
70+
OR "${ET_VERSION_MINOR}" STREQUAL ""
71+
OR "${ET_VERSION_PATCH}" STREQUAL ""
72+
)
73+
message(
74+
FATAL_ERROR
75+
"Failed to parse version from ${EXECUTORCH_ROOT}/version.txt. "
76+
"Expected format: MAJOR.MINOR.PATCH (e.g., '1.2.0' or '1.2.0a0'), "
77+
"but got: '${ET_VERSION_STRING}'"
78+
)
79+
endif()
80+
81+
message(
82+
STATUS
83+
"ExecuTorch version: ${ET_VERSION_MAJOR}.${ET_VERSION_MINOR}.${ET_VERSION_PATCH}"
84+
)
85+
86+
# Generate version.h from template
87+
configure_file(
88+
"${EXECUTORCH_ROOT}/runtime/core/version.h.in"
89+
"${CMAKE_CURRENT_BINARY_DIR}/include/executorch/runtime/core/version.h" @ONLY
90+
)
91+
5592
include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
5693
include(${PROJECT_SOURCE_DIR}/tools/cmake/Codegen.cmake)
5794
include(${PROJECT_SOURCE_DIR}/tools/cmake/Utils.cmake)
@@ -74,6 +111,34 @@ if(NOT CMAKE_BUILD_TYPE)
74111
endif()
75112
announce_configured_options(CMAKE_BUILD_TYPE)
76113

114+
# Sanitizer support (ASAN + UBSAN)
115+
if(EXECUTORCH_USE_SANITIZER)
116+
if(MSVC)
117+
message(WARNING "Sanitizers are not fully supported on MSVC, skipping")
118+
else()
119+
set(EXECUTORCH_SANITIZER_FLAGS
120+
"-fsanitize=address,undefined -fno-omit-frame-pointer"
121+
)
122+
# Suppress deprecation warnings on macOS (third-party code like flatcc uses
123+
# deprecated sprintf)
124+
if(APPLE)
125+
set(EXECUTORCH_SANITIZER_FLAGS
126+
"${EXECUTORCH_SANITIZER_FLAGS} -Wno-deprecated-declarations"
127+
)
128+
endif()
129+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXECUTORCH_SANITIZER_FLAGS}")
130+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXECUTORCH_SANITIZER_FLAGS}")
131+
set(CMAKE_EXE_LINKER_FLAGS
132+
"${CMAKE_EXE_LINKER_FLAGS} ${EXECUTORCH_SANITIZER_FLAGS}"
133+
)
134+
set(CMAKE_SHARED_LINKER_FLAGS
135+
"${CMAKE_SHARED_LINKER_FLAGS} ${EXECUTORCH_SANITIZER_FLAGS}"
136+
)
137+
message(STATUS "Sanitizers enabled: address, undefined")
138+
endif()
139+
endif()
140+
announce_configured_options(EXECUTORCH_USE_SANITIZER)
141+
77142
if(NOT PYTHON_EXECUTABLE)
78143
resolve_python_executable()
79144
endif()
@@ -310,6 +375,7 @@ endif()
310375
set(_common_include_directories
311376
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
312377
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/runtime/core/portable_type/c10>
378+
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>
313379
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
314380
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/core/portable_type/c10>
315381
)
@@ -497,6 +563,11 @@ install(
497563
FILES_MATCHING
498564
PATTERN "*.h"
499565
)
566+
# Install generated version.h
567+
install(
568+
FILES "${CMAKE_CURRENT_BINARY_DIR}/include/executorch/runtime/core/version.h"
569+
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/core
570+
)
500571
install(
501572
DIRECTORY runtime/executor/
502573
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/executor

backends/arm/_passes/decompose_gelu_pass.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2025 Arm Limited and/or its affiliates.
1+
# Copyright 2025-2026 Arm Limited and/or its affiliates.
22
#
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
@@ -123,7 +123,10 @@ def call_operator(self, op, args, kwargs, meta):
123123
if approximate == "none":
124124
# Constant mirrors ExecuTorch implementation for parity.
125125
FULL_SQRT1_2 = super().call_operator(
126-
full_op, ([1] * len(shape), 0.70710678118654752440), {}, meta
126+
full_op,
127+
([1] * len(shape), 0.70710678118654752440),
128+
{"dtype": dtype},
129+
meta,
127130
)
128131

129132
op1 = super().call_operator(mul_op, (input, FULL_SQRT1_2), {}, meta)

backends/arm/_passes/decompose_quant_nodes.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2025 Arm Limited and/or its affiliates.
1+
# Copyright 2025-2026 Arm Limited and/or its affiliates.
22
#
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
@@ -62,6 +62,11 @@ def call(self, graph_module: torch.fx.GraphModule):
6262
args = node.args
6363
input_rank = args[0].meta["val"].ndim
6464
x, scale, zero_point, qmin, qmax, dtype = args
65+
input_dtype = x.meta["val"].dtype
66+
output_dtype = node.meta["val"].dtype
67+
fp_dtype = (
68+
output_dtype if node.target == DEQUANT_PER_TENSOR_OP else input_dtype
69+
)
6570
# Instead of dividing by scale in quantization, we multiply by 1/scale
6671
# when quantizing.
6772
scale = cast(float, scale)
@@ -71,15 +76,15 @@ def call(self, graph_module: torch.fx.GraphModule):
7176
graph_module.graph,
7277
exir_ops.edge.aten.full.default,
7378
args=((1,) * input_rank, scale),
74-
kwargs={"dtype": torch.float32},
79+
kwargs={"dtype": fp_dtype},
7580
)
7681
zp_const = create_node(
7782
graph_module.graph,
7883
exir_ops.edge.aten.full.default,
7984
args=((1,) * input_rank, zero_point),
8085
kwargs={
8186
"dtype": (
82-
torch.float32
87+
fp_dtype
8388
if node.target == QUANT_PER_TENSOR_OP
8489
else torch.int32
8590
)
@@ -137,7 +142,7 @@ def call(self, graph_module: torch.fx.GraphModule):
137142
graph_module.graph,
138143
exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
139144
args=(shifted,),
140-
kwargs={"dtype": torch.float32},
145+
kwargs={"dtype": fp_dtype},
141146
from_node=node,
142147
)
143148
dequantized = create_node(

0 commit comments

Comments
 (0)