Skip to content

Commit 9348eaf

Browse files
committed
Code Review
1 parent 5bf8132 commit 9348eaf

3 files changed

Lines changed: 39 additions & 44 deletions

File tree

backends/qualcomm/export_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ class QnnConfig:
118118
ci: Optional[bool] = False
119119
seed: Optional[int] = None
120120
htp_performance_mode: QnnExecuTorchHtpPerformanceMode = (
121-
QnnExecuTorchHtpPerformanceMode.kHtpBurst,
121+
QnnExecuTorchHtpPerformanceMode.kHtpBurst
122122
)
123123

124124
def __post_init__(self):

docs/source/tutorials_source/devtools-debugging-tutorial.py

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,9 @@
197197
# etdump_path=etdump_path,
198198
# etrecord=etrecord_path,
199199
# debug_buffer_path=debug_buffer_path,
200+
# # reference_graph_name defaults to EDGE_DIALECT_GRAPH_KEY; override when
201+
# # you want to use a different graph (e.g. a post-lowering graph key) as
202+
# # the reference for debug handle mapping.
200203
# )
201204
#
202205
# pd.set_option("display.width", 100000)
@@ -208,25 +211,27 @@
208211
#
209212
# The returned DataFrame contains columns for each operator including:
210213
#
214+
# - ``aot_debug_handle``: The debug handle tuple identifying the AOT operator(s)
211215
# - ``aot_ops``: The operators in the eager model graph
212216
# - ``aot_intermediate_output``: Intermediate outputs from eager model
213-
# - ``runtime_ops``: The operators executed at runtime (may show DELEGATE_CALL for delegated ops)
217+
# - ``runtime_ops``: The kernel-level operators executed at runtime
218+
# - ``runtime_debug_handle``: The debug handle tuple from the runtime
214219
# - ``runtime_intermediate_output``: Intermediate outputs from runtime
215-
# - ``gap``: The numerical gap (MSE) between eager and runtime outputs
220+
# - ``gap``: The numerical gap (MSE) between eager and runtime outputs; ``nan`` when shapes differ
216221
# - ``stacktraces``: A dictionary mapping each operator name to its source code stack trace
217222
#
218223
# Example output:
219224
#
220225
# .. code-block:: text
221226
#
222-
# | | aot_ops | aot_intermediate_output | runtime_ops | runtime_intermediate_output | gap | stacktraces |
223-
# |----|----------------------------------------------------------------|----------------------------------------------------|----------------------------------------------------|----------------------------------------------------| ---------------------------|----------------------------------------------------|
224-
# | 0 | [conv2d] | [[[tensor([-0.0130, 0.0075, -0.0334, -0.0122,... | [DELEGATE_CALL] | [[[tensor([-0.0130, 0.0075, -0.0334, -0.0122,... | [3.2530690555343034e-15] | {'conv2d': 'File "model.py", line 10...'} |
225-
# | 1 | [permute, cat, add, dropout] | [[[tensor(-0.0024), tensor(0.0054), tensor(0.0... | [DELEGATE_CALL] | [[[tensor(-0.0024), tensor(0.0054), tensor(0.0... | [3.2488685838924244e-15] | {'permute': 'File "model.py", line 15...', ...} |
226-
# ...
227-
# | 4 | [transpose, linear, unflatten, unsqueeze, tran...] | [[[tensor(0.0045), tensor(-0.0084), tensor(0.0... | [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ...] | [[tensor(0.0045), tensor(-0.0084), tensor(0.00... | [0.00010033142876115867] | {'transpose': 'File "model.py", line 20...', ...} |
228-
# ...
229-
# | 59 | [transpose_66, linear_44, unflatten_11, unsque...] | [[[tensor(-0.3346), tensor(0.1540), tensor(-0.... | [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ...] | [[tensor(-0.3346), tensor(0.1540), tensor(-0.0... | [0.02629170972698486] | {'transpose_66': 'File "model.py", line 50...'... |
227+
# | | aot_debug_handle | aot_ops | aot_intermediate_output | runtime_ops | runtime_debug_handle | runtime_intermediate_output | gap | stacktraces |
228+
# |----|-----------------|-------------------------------------------|---------------------------------------------------|--------------------------------------|---------------------|------------------------------------------------|--------------------------|------------------------------------------------|
229+
# | 0 | (4,) | [conv2d] | [[[tensor([-0.0130, 0.0075, -0.0334,... | [native_call_convolution_out] | (4,) | [[[tensor([-0.0130, 0.0075, -0.0334,... | [3.2530690555343034e-15] | {'conv2d': 'File "vit.py", line 10...'} |
230+
# | 1 | (11,) | [permute, cat, add, dropout] | [[[tensor(-0.0024), tensor(0.0054),... | [native_call_permute_copy_out] | (11,) | [[[tensor(-0.0024), tensor(0.0054),... | [3.2488685838924244e-15] | {'permute': 'File "vit.py", line 15...', ...} |
231+
# | ...| | | | | | | | |
232+
# | 4 | (62,) | [linear, unflatten, unsqueeze, transp...] | [[[tensor(0.0045), tensor(-0.0084),... | [native_call_expand_copy.out] | (62,) | [[[tensor([0.5541, 0.0014, 0.0015,... | [nan] | {'linear': 'File "vit.py", line 125...', ...} |
233+
# | ...| | | | | | | | |
234+
# | 37 | (164,) | [layer_norm_24] | [[[tensor(-0.9172), tensor(0.0853),... | [native_call_native_layer_norm.out] | (164,) | [[[tensor(-0.9172), tensor(0.0853),... | [2.2175176622973748e-11] | {'layer_norm_24': 'File "vit.py"...'} |
230235
#
231236
# The ``stacktraces`` column is particularly useful for tracing operators back to the
232237
# original PyTorch source code. Each entry is a dictionary where keys are operator names
@@ -274,34 +279,33 @@
274279
# .. code-block:: text
275280
#
276281
# Top 5 operators with largest numerical discrepancies:
277-
# aot_ops aot_intermediate_output runtime_ops runtime_intermediate_output gap stacktraces
278-
# 59 [transpose_66, linear_44, unflatten_11, unsque... [[[tensor(-0.3346), tensor(0.1540), tensor(-0.... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(-0.3346), tensor(0.1540), tensor(-0.0... [0.02629170972698486] {'transpose_66': 'File "vit.py", line 125...'}
279-
# 24 [transpose_24, linear_16, unflatten_4, unsquee... [[[tensor(0.0344), tensor(-0.0583), tensor(-0.... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(0.0344), tensor(-0.0583), tensor(-0.0... [0.010045093258604096] {'transpose_24': 'File "vit.py", line 125...'}
280-
# 29 [transpose_30, linear_20, unflatten_5, unsquee... [[[tensor(0.0457), tensor(0.0266), tensor(-0.0... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(0.0457), tensor(0.0266), tensor(-0.05... [0.008497326594593926] {'transpose_30': 'File "vit.py", line 125...'}
281-
# 34 [transpose_36, linear_24, unflatten_6, unsquee... [[[tensor(-0.1336), tensor(-0.0154), tensor(-0... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(-0.1336), tensor(-0.0154), tensor(-0.... [0.007672668965640913] {'transpose_36': 'File "vit.py", line 125...'}
282-
# 19 [transpose_18, linear_12, unflatten_3, unsquee... [[[tensor(-0.0801), tensor(0.0458), tensor(-0.... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(-0.0801), tensor(0.0458), tensor(-0.0... [0.007446783635888463] {'transpose_18': 'File "vit.py", line 125...'}
283-
#
284-
# --- Operator 59 ---
285-
# Operators: ['transpose_66', 'linear_44', 'unflatten_11', 'unsqueeze_11', 'transpose_67']
286-
# Gap: [0.02629170972698486]
282+
# aot_debug_handle aot_ops aot_intermediate_output runtime_ops runtime_debug_handle runtime_intermediate_output gap stacktraces
283+
# 37 (164,) [layer_norm_24] [[[tensor(-0.9172), tensor(0.0853),... [native_call_native_layer_norm.out] (164,) [[[tensor(-0.9172), tensor(0.0853),... [2.2175176622973748e-11] {'layer_norm_24': 'File "vit.py"...'}
284+
# 33 (144,) [layer_norm_21] [[[tensor(-0.8958), tensor(-0.0307),... [native_call_native_layer_norm.out] (144,) [[[tensor(-0.8958), tensor(-0.0307),... [1.2286585568717539e-11] {'layer_norm_21': 'File "vit.py"...'}
285+
# 36 (157,) [layer_norm_23] [[[tensor(-0.8750), tensor(-0.0243),... [native_call_native_layer_norm.out] (157,) [[[tensor(-0.8750), tensor(-0.0243),... [1.2271681610366983e-11] {'layer_norm_23': 'File "vit.py"...'}
286+
# 30 (131,) [layer_norm_19] [[[tensor(-0.4218), tensor(-0.3333),... [native_call_native_layer_norm.out] (131,) [[[tensor(-0.4218), tensor(-0.3333),... [1.1904724456170941e-11] {'layer_norm_19': 'File "vit.py"...'}
287+
# 24 (105,) [layer_norm_15] [[[tensor(-0.2805), tensor(-0.3079),... [native_call_native_layer_norm.out] (105,) [[[tensor(-0.2805), tensor(-0.3079),... [1.1866889275499194e-11] {'layer_norm_15': 'File "vit.py"...'}
288+
#
289+
# --- Operator 37 ---
290+
# Operators: ['layer_norm_24']
291+
# Gap: [2.2175176622973748e-11]
287292
# Stack traces:
288-
# transpose_66:
289-
# File "torchvision/models/vision_transformer.py", line 125, in forward
290-
# x = self.self_attention(x)
293+
# layer_norm_24:
294+
# File "torchvision/models/vision_transformer.py", line 78, in forward
295+
# x = self.ln(x)
291296
# File "torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
292297
#
293298
# Operators with MSE > 0.0001:
294-
# aot_ops aot_intermediate_output runtime_ops runtime_intermediate_output gap stacktraces
295-
# 4 [transpose, linear, unflatten, unsqueeze, tran... [[[tensor(0.0045), tensor(-0.0084), tensor(0.0... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(0.0045), tensor(-0.0084), tensor(0.00... [0.00010033142876115867] {'transpose': 'File "vit.py", line 125...'}
296-
# 9 [transpose_6, linear_4, unflatten_1, unsqueeze... [[[tensor(0.0113), tensor(-0.0737), tensor(-0.... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(0.0113), tensor(-0.0737), tensor(-0.0... [0.0005611182577030275] {'transpose_6': 'File "vit.py", line 125...'}
297-
# 14 [transpose_12, linear_8, unflatten_2, unsqueez... [[[tensor(-0.0476), tensor(-0.0941), tensor(-0... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(-0.0476), tensor(-0.0941), tensor(-0.... [0.004658652508649068] {'transpose_12': 'File "vit.py", line 125...'}
298-
# ...
299-
#
300-
# In this example, we can see that the attention layers (transpose + linear + unflatten patterns)
301-
# show the largest numerical discrepancies, which is expected behavior for delegated operators
302-
# using different precision. The ``stacktraces`` column shows that these operators originate from
303-
# ``self.self_attention(x)`` in the ViT model's forward method, helping you identify exactly
304-
# where in your model code the discrepancies arise.
299+
# Empty DataFrame
300+
# Columns: [aot_debug_handle, aot_ops, aot_intermediate_output, runtime_ops, runtime_debug_handle, runtime_intermediate_output, gap, stacktraces]
301+
# Index: []
302+
#
303+
# In this example, the largest numerical gaps come from layer norm operators (gaps ~1e-11),
304+
# which reflects floating-point rounding at float32 precision — well within acceptable tolerance.
305+
# Some attention-related operators (e.g. ``linear, unflatten, unsqueeze, transpose`` groups) show
306+
# ``nan`` gap: this occurs when the AOT op-group output shape does not match the shape of the
307+
# individual runtime kernel output that was captured for the same debug handle. No operators
308+
# exceed the 1e-4 threshold, confirming that XNNPACK float32 delegation is numerically accurate.
305309

306310
######################################################################
307311
# Pipeline 2: CMake Runtime

examples/qualcomm/util_scripts/qnn_intermediate_debugger_demo.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,6 @@ def main(args):
4747
# ensure the working directory exist.
4848
os.makedirs(args.artifact, exist_ok=True)
4949

50-
if not args.compile_only and args.device is None:
51-
raise RuntimeError(
52-
"device serial is required if not compile only. "
53-
"Please specify a device serial by -s/--device argument."
54-
)
55-
5650
data_num = 100
5751

5852
inputs, targets = get_imagenet_dataset(
@@ -81,9 +75,6 @@ def main(args):
8175
inputs = [inputs[0]]
8276
targets = [targets[0]]
8377

84-
if args.compile_only:
85-
return
86-
8778
# Please ensure that dump_intermediate_outputs are set to true when creating SimpleADB
8879
adb = SimpleADB(
8980
qnn_config=qnn_config,

0 commit comments

Comments
 (0)