|
197 | 197 | # etdump_path=etdump_path, |
198 | 198 | # etrecord=etrecord_path, |
199 | 199 | # debug_buffer_path=debug_buffer_path, |
| 200 | +# # reference_graph_name defaults to EDGE_DIALECT_GRAPH_KEY; override when |
| 201 | +# # you want to use a different graph (e.g. a post-lowering graph key) as |
| 202 | +# # the reference for debug handle mapping. |
200 | 203 | # ) |
201 | 204 | # |
202 | 205 | # pd.set_option("display.width", 100000) |
|
208 | 211 | # |
209 | 212 | # The returned DataFrame contains columns for each operator including: |
210 | 213 | # |
| 214 | +# - ``aot_debug_handle``: The debug handle tuple identifying the AOT operator(s) |
211 | 215 | # - ``aot_ops``: The operators in the eager model graph |
212 | 216 | # - ``aot_intermediate_output``: Intermediate outputs from eager model |
213 | | -# - ``runtime_ops``: The operators executed at runtime (may show DELEGATE_CALL for delegated ops) |
| 217 | +# - ``runtime_ops``: The kernel-level operators executed at runtime |
| 218 | +# - ``runtime_debug_handle``: The debug handle tuple from the runtime |
214 | 219 | # - ``runtime_intermediate_output``: Intermediate outputs from runtime |
215 | | -# - ``gap``: The numerical gap (MSE) between eager and runtime outputs |
| 220 | +# - ``gap``: The numerical gap (MSE) between eager and runtime outputs; ``nan`` when shapes differ |
216 | 221 | # - ``stacktraces``: A dictionary mapping each operator name to its source code stack trace |
217 | 222 | # |
218 | 223 | # Example output: |
219 | 224 | # |
220 | 225 | # .. code-block:: text |
221 | 226 | # |
222 | | -# | | aot_ops | aot_intermediate_output | runtime_ops | runtime_intermediate_output | gap | stacktraces | |
223 | | -# |----|----------------------------------------------------------------|----------------------------------------------------|----------------------------------------------------|----------------------------------------------------| ---------------------------|----------------------------------------------------| |
224 | | -# | 0 | [conv2d] | [[[tensor([-0.0130, 0.0075, -0.0334, -0.0122,... | [DELEGATE_CALL] | [[[tensor([-0.0130, 0.0075, -0.0334, -0.0122,... | [3.2530690555343034e-15] | {'conv2d': 'File "model.py", line 10...'} | |
225 | | -# | 1 | [permute, cat, add, dropout] | [[[tensor(-0.0024), tensor(0.0054), tensor(0.0... | [DELEGATE_CALL] | [[[tensor(-0.0024), tensor(0.0054), tensor(0.0... | [3.2488685838924244e-15] | {'permute': 'File "model.py", line 15...', ...} | |
226 | | -# ... |
227 | | -# | 4 | [transpose, linear, unflatten, unsqueeze, tran...] | [[[tensor(0.0045), tensor(-0.0084), tensor(0.0... | [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ...] | [[tensor(0.0045), tensor(-0.0084), tensor(0.00... | [0.00010033142876115867] | {'transpose': 'File "model.py", line 20...', ...} | |
228 | | -# ... |
229 | | -# | 59 | [transpose_66, linear_44, unflatten_11, unsque...] | [[[tensor(-0.3346), tensor(0.1540), tensor(-0.... | [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ...] | [[tensor(-0.3346), tensor(0.1540), tensor(-0.0... | [0.02629170972698486] | {'transpose_66': 'File "model.py", line 50...'... | |
| 227 | +# | | aot_debug_handle | aot_ops | aot_intermediate_output | runtime_ops | runtime_debug_handle | runtime_intermediate_output | gap | stacktraces | |
| 228 | +# |----|-----------------|-------------------------------------------|---------------------------------------------------|--------------------------------------|---------------------|------------------------------------------------|--------------------------|------------------------------------------------| |
| 229 | +# | 0 | (4,) | [conv2d] | [[[tensor([-0.0130, 0.0075, -0.0334,... | [native_call_convolution_out] | (4,) | [[[tensor([-0.0130, 0.0075, -0.0334,... | [3.2530690555343034e-15] | {'conv2d': 'File "vit.py", line 10...'} | |
| 230 | +# | 1 | (11,) | [permute, cat, add, dropout] | [[[tensor(-0.0024), tensor(0.0054),... | [native_call_permute_copy_out] | (11,) | [[[tensor(-0.0024), tensor(0.0054),... | [3.2488685838924244e-15] | {'permute': 'File "vit.py", line 15...', ...} | |
| 231 | +# | ...| | | | | | | | | |
| 232 | +# | 4 | (62,) | [linear, unflatten, unsqueeze, transp...] | [[[tensor(0.0045), tensor(-0.0084),... | [native_call_expand_copy.out] | (62,) | [[[tensor([0.5541, 0.0014, 0.0015,... | [nan] | {'linear': 'File "vit.py", line 125...', ...} | |
| 233 | +# | ...| | | | | | | | | |
| 234 | +# | 37 | (164,) | [layer_norm_24] | [[[tensor(-0.9172), tensor(0.0853),... | [native_call_native_layer_norm.out] | (164,) | [[[tensor(-0.9172), tensor(0.0853),... | [2.2175176622973748e-11] | {'layer_norm_24': 'File "vit.py"...'} | |
230 | 235 | # |
231 | 236 | # The ``stacktraces`` column is particularly useful for tracing operators back to the |
232 | 237 | # original PyTorch source code. Each entry is a dictionary where keys are operator names |
|
274 | 279 | # .. code-block:: text |
275 | 280 | # |
276 | 281 | # Top 5 operators with largest numerical discrepancies: |
277 | | -# aot_ops aot_intermediate_output runtime_ops runtime_intermediate_output gap stacktraces |
278 | | -# 59 [transpose_66, linear_44, unflatten_11, unsque... [[[tensor(-0.3346), tensor(0.1540), tensor(-0.... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(-0.3346), tensor(0.1540), tensor(-0.0... [0.02629170972698486] {'transpose_66': 'File "vit.py", line 125...'} |
279 | | -# 24 [transpose_24, linear_16, unflatten_4, unsquee... [[[tensor(0.0344), tensor(-0.0583), tensor(-0.... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(0.0344), tensor(-0.0583), tensor(-0.0... [0.010045093258604096] {'transpose_24': 'File "vit.py", line 125...'} |
280 | | -# 29 [transpose_30, linear_20, unflatten_5, unsquee... [[[tensor(0.0457), tensor(0.0266), tensor(-0.0... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(0.0457), tensor(0.0266), tensor(-0.05... [0.008497326594593926] {'transpose_30': 'File "vit.py", line 125...'} |
281 | | -# 34 [transpose_36, linear_24, unflatten_6, unsquee... [[[tensor(-0.1336), tensor(-0.0154), tensor(-0... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(-0.1336), tensor(-0.0154), tensor(-0.... [0.007672668965640913] {'transpose_36': 'File "vit.py", line 125...'} |
282 | | -# 19 [transpose_18, linear_12, unflatten_3, unsquee... [[[tensor(-0.0801), tensor(0.0458), tensor(-0.... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(-0.0801), tensor(0.0458), tensor(-0.0... [0.007446783635888463] {'transpose_18': 'File "vit.py", line 125...'} |
283 | | -# |
284 | | -# --- Operator 59 --- |
285 | | -# Operators: ['transpose_66', 'linear_44', 'unflatten_11', 'unsqueeze_11', 'transpose_67'] |
286 | | -# Gap: [0.02629170972698486] |
| 282 | +# aot_debug_handle aot_ops aot_intermediate_output runtime_ops runtime_debug_handle runtime_intermediate_output gap stacktraces |
| 283 | +# 37 (164,) [layer_norm_24] [[[tensor(-0.9172), tensor(0.0853),... [native_call_native_layer_norm.out] (164,) [[[tensor(-0.9172), tensor(0.0853),... [2.2175176622973748e-11] {'layer_norm_24': 'File "vit.py"...'} |
| 284 | +# 33 (144,) [layer_norm_21] [[[tensor(-0.8958), tensor(-0.0307),... [native_call_native_layer_norm.out] (144,) [[[tensor(-0.8958), tensor(-0.0307),... [1.2286585568717539e-11] {'layer_norm_21': 'File "vit.py"...'} |
| 285 | +# 36 (157,) [layer_norm_23] [[[tensor(-0.8750), tensor(-0.0243),... [native_call_native_layer_norm.out] (157,) [[[tensor(-0.8750), tensor(-0.0243),... [1.2271681610366983e-11] {'layer_norm_23': 'File "vit.py"...'} |
| 286 | +# 30 (131,) [layer_norm_19] [[[tensor(-0.4218), tensor(-0.3333),... [native_call_native_layer_norm.out] (131,) [[[tensor(-0.4218), tensor(-0.3333),... [1.1904724456170941e-11] {'layer_norm_19': 'File "vit.py"...'} |
| 287 | +# 24 (105,) [layer_norm_15] [[[tensor(-0.2805), tensor(-0.3079),... [native_call_native_layer_norm.out] (105,) [[[tensor(-0.2805), tensor(-0.3079),... [1.1866889275499194e-11] {'layer_norm_15': 'File "vit.py"...'} |
| 288 | +# |
| 289 | +# --- Operator 37 --- |
| 290 | +# Operators: ['layer_norm_24'] |
| 291 | +# Gap: [2.2175176622973748e-11] |
287 | 292 | # Stack traces: |
288 | | -# transpose_66: |
289 | | -# File "torchvision/models/vision_transformer.py", line 125, in forward |
290 | | -# x = self.self_attention(x) |
| 293 | +# layer_norm_24: |
| 294 | +# File "torchvision/models/vision_transformer.py", line 78, in forward |
| 295 | +# x = self.ln(x) |
291 | 296 | # File "torch/nn/modules/module.py", line 1532, in _wrapped_call_impl |
292 | 297 | # |
293 | 298 | # Operators with MSE > 0.0001: |
294 | | -# aot_ops aot_intermediate_output runtime_ops runtime_intermediate_output gap stacktraces |
295 | | -# 4 [transpose, linear, unflatten, unsqueeze, tran... [[[tensor(0.0045), tensor(-0.0084), tensor(0.0... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(0.0045), tensor(-0.0084), tensor(0.00... [0.00010033142876115867] {'transpose': 'File "vit.py", line 125...'} |
296 | | -# 9 [transpose_6, linear_4, unflatten_1, unsqueeze... [[[tensor(0.0113), tensor(-0.0737), tensor(-0.... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(0.0113), tensor(-0.0737), tensor(-0.0... [0.0005611182577030275] {'transpose_6': 'File "vit.py", line 125...'} |
297 | | -# 14 [transpose_12, linear_8, unflatten_2, unsqueez... [[[tensor(-0.0476), tensor(-0.0941), tensor(-0... [DELEGATE_CALL, DELEGATE_CALL, DELEGATE_CALL, ... [[tensor(-0.0476), tensor(-0.0941), tensor(-0.... [0.004658652508649068] {'transpose_12': 'File "vit.py", line 125...'} |
298 | | -# ... |
299 | | -# |
300 | | -# In this example, we can see that the attention layers (transpose + linear + unflatten patterns) |
301 | | -# show the largest numerical discrepancies, which is expected behavior for delegated operators |
302 | | -# using different precision. The ``stacktraces`` column shows that these operators originate from |
303 | | -# ``self.self_attention(x)`` in the ViT model's forward method, helping you identify exactly |
304 | | -# where in your model code the discrepancies arise. |
| 299 | +# Empty DataFrame |
| 300 | +# Columns: [aot_debug_handle, aot_ops, aot_intermediate_output, runtime_ops, runtime_debug_handle, runtime_intermediate_output, gap, stacktraces] |
| 301 | +# Index: [] |
| 302 | +# |
| 303 | +# In this example, the largest numerical gaps come from layer norm operators (gaps ~1e-11), |
| 304 | +# which reflects floating-point rounding at float32 precision — well within acceptable tolerance. |
| 305 | +# Some attention-related operators (e.g. ``linear, unflatten, unsqueeze, transpose`` groups) show |
| 306 | +# ``nan`` gap: this occurs when the AOT op-group output shape does not match the shape of the |
| 307 | +# individual runtime kernel output that was captured for the same debug handle. No operators |
| 308 | +# exceed the 1e-4 threshold, confirming that XNNPACK float32 delegation is numerically accurate. |
305 | 309 |
|
306 | 310 | ###################################################################### |
307 | 311 | # Pipeline 2: CMake Runtime |
|
0 commit comments