Skip to content

Commit ca5dda0

Browse files
Add standalone variance repro script
Summary: Add a small standalone QNN repro script that isolates the variance-channel materialization issue by comparing `torch.ones(...) * scalar -> cat -> conv` against the semantically equivalent `scalar.reshape(...).expand(...) -> cat -> conv`. The script exports `.pte` artifacts, saves raw inputs and reference outputs, and can optionally run the local ExecuTorch portable runtime so the issue can be shared with Qualcomm without the larger DSR debugging harness. Differential Revision: D102723672
1 parent d0b7934 commit ca5dda0

1 file changed

Lines changed: 381 additions & 0 deletions

File tree

Lines changed: 381 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,381 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
"""
8+
Standalone QNN repro for the variance-channel materialization issue.
9+
10+
This script isolates two mathematically equivalent toy models:
11+
12+
1. broken: torch.ones(...) * scalar -> cat -> conv
13+
2. fixed: scalar.reshape(...).expand(...) -> cat -> conv
14+
15+
Both variants use the same randomly initialized conv weights for a given seed.
16+
The script:
17+
- computes eager vs quantized SQNR
18+
- lowers each variant to a QNN-backed `.pte`
19+
- saves inputs and reference outputs
20+
- optionally runs the local ExecuTorch portable runtime on x86 if available
21+
22+
Requirements:
23+
- a Python environment with `torch`, `torchao`, and `executorch`
24+
- `QNN_SDK_ROOT` pointing to a local QNN SDK
25+
- optional: built ExecuTorch portable bindings for `--run-runtime`
26+
27+
Example:
28+
export QNN_SDK_ROOT=/path/to/qnn-2.37
29+
python3 qnn_variance_repro.py \
30+
--height 64 \
31+
--width 64 \
32+
--quant-dtype 8a8w \
33+
--variance 1.0 \
34+
--output-dir /tmp/qcom_variance_repro
35+
"""
36+
37+
from __future__ import annotations
38+
39+
import argparse
40+
import ctypes
41+
import json
42+
import math
43+
import os
44+
from pathlib import Path
45+
from typing import Iterable
46+
47+
import executorch.backends.qualcomm # noqa: F401
48+
import torch
49+
import torch.nn as nn
50+
from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer, QuantDtype
51+
from executorch.backends.qualcomm.serialization.qc_schema import (
52+
QcomChipset,
53+
QnnExecuTorchBackendType,
54+
)
55+
from executorch.backends.qualcomm.utils.utils import (
56+
generate_htp_compiler_spec,
57+
generate_qnn_executorch_compiler_spec,
58+
to_edge_transform_and_lower_to_qnn,
59+
)
60+
from torch.export import export
61+
from torch.utils._pytree import tree_flatten
62+
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
63+
64+
try:
65+
from executorch.extension.pybindings.portable_lib import _load_for_executorch
66+
except ImportError:
67+
_load_for_executorch = None
68+
69+
70+
UPSCALE = 18.0
71+
72+
73+
class OnesMulCatConv(nn.Module):
74+
def __init__(self) -> None:
75+
super().__init__()
76+
self.conv = nn.Conv2d(17, 64, 3, 1, 1, bias=False)
77+
78+
def forward(self, x: torch.Tensor, variance: torch.Tensor) -> torch.Tensor:
79+
avg_variance = variance * UPSCALE
80+
b, _, h, w = x.shape
81+
variance_tensor = (
82+
torch.ones((b, 1, h, w), device=x.device, dtype=x.dtype) * avg_variance
83+
)
84+
return self.conv(torch.cat((x, variance_tensor), dim=1))
85+
86+
87+
class ExpandCatConv(nn.Module):
88+
def __init__(self) -> None:
89+
super().__init__()
90+
self.conv = nn.Conv2d(17, 64, 3, 1, 1, bias=False)
91+
92+
def forward(self, x: torch.Tensor, variance: torch.Tensor) -> torch.Tensor:
93+
avg_variance = variance * UPSCALE
94+
b, _, h, w = x.shape
95+
variance_tensor = avg_variance.reshape(b, 1, 1, 1).expand(b, 1, h, w)
96+
return self.conv(torch.cat((x, variance_tensor), dim=1))
97+
98+
99+
VARIANT_FACTORIES = {
100+
"ones_mul_cat_conv": OnesMulCatConv,
101+
"expand_cat_conv": ExpandCatConv,
102+
}
103+
104+
105+
def parse_args() -> argparse.Namespace:
106+
parser = argparse.ArgumentParser(description=__doc__)
107+
parser.add_argument(
108+
"--variant",
109+
choices=("both", *VARIANT_FACTORIES.keys()),
110+
default="both",
111+
help="Which toy model to run.",
112+
)
113+
parser.add_argument("--height", type=int, default=64)
114+
parser.add_argument("--width", type=int, default=64)
115+
parser.add_argument("--seed", type=int, default=42)
116+
parser.add_argument("--variance", type=float, default=1.0)
117+
parser.add_argument(
118+
"--calibration-runs",
119+
type=int,
120+
default=3,
121+
help="Number of random calibration passes per model.",
122+
)
123+
parser.add_argument(
124+
"--calibration-variance",
125+
type=float,
126+
default=0.5,
127+
help="Scalar variance value used during calibration.",
128+
)
129+
parser.add_argument(
130+
"--quant-dtype",
131+
choices=("8a8w", "16a8w"),
132+
default="16a8w",
133+
)
134+
parser.add_argument(
135+
"--soc-model",
136+
default="SM8750",
137+
help="Name of the QcomChipset enum member to use, e.g. SM8650 or SM8750.",
138+
)
139+
parser.add_argument(
140+
"--output-dir",
141+
type=Path,
142+
default=Path("/tmp/qcom_variance_repro"),
143+
help="Directory for generated artifacts.",
144+
)
145+
parser.add_argument(
146+
"--skip-runtime",
147+
action="store_true",
148+
help="Export `.pte` and references only; do not run the local portable runtime.",
149+
)
150+
return parser.parse_args()
151+
152+
153+
def ensure_qnn_env() -> None:
154+
sdk_root = os.environ.get("QNN_SDK_ROOT")
155+
if not sdk_root:
156+
raise RuntimeError(
157+
"QNN_SDK_ROOT is not set. Importing `executorch.backends.qualcomm` "
158+
"should auto-stage the SDK on supported x86 hosts; otherwise set "
159+
"QNN_SDK_ROOT manually before running."
160+
)
161+
162+
lib_dir = Path(sdk_root) / "lib" / "x86_64-linux-clang"
163+
if not lib_dir.is_dir():
164+
raise RuntimeError(f"QNN x86 lib directory not found: {lib_dir}")
165+
166+
current = os.environ.get("LD_LIBRARY_PATH", "")
167+
current_paths = [path for path in current.split(":") if path]
168+
if str(lib_dir) not in current_paths:
169+
os.environ["LD_LIBRARY_PATH"] = ":".join([str(lib_dir), *current_paths])
170+
171+
# Best-effort preload for in-process x86 runtime execution.
172+
# Setting LD_LIBRARY_PATH after Python starts is not sufficient for all
173+
# later dlopen-by-name paths, so preload the QNN runtime libraries directly.
174+
for lib_name in ("libQnnSystem.so", "libQnnHtp.so"):
175+
lib_path = lib_dir / lib_name
176+
if lib_path.is_file():
177+
ctypes.CDLL(str(lib_path), mode=ctypes.RTLD_GLOBAL)
178+
179+
180+
def sqnr_db(reference: torch.Tensor, other: torch.Tensor) -> float:
181+
reference = reference.detach().float()
182+
other = other.detach().float()
183+
noise = (reference - other).pow(2).sum()
184+
if noise == 0:
185+
return math.inf
186+
signal = reference.pow(2).sum()
187+
if signal == 0:
188+
return -math.inf
189+
return 10.0 * torch.log10(signal / noise).item()
190+
191+
192+
def max_abs_diff(reference: torch.Tensor, other: torch.Tensor) -> float:
193+
return (reference.detach().float() - other.detach().float()).abs().max().item()
194+
195+
196+
def save_tensor(path: Path, tensor: torch.Tensor) -> None:
197+
path.parent.mkdir(parents=True, exist_ok=True)
198+
torch.save(tensor.detach().cpu(), path)
199+
200+
201+
def save_raw(path: Path, tensor: torch.Tensor) -> None:
202+
path.parent.mkdir(parents=True, exist_ok=True)
203+
tensor.detach().cpu().to(torch.float32).contiguous().numpy().tofile(path)
204+
205+
206+
def build_model(variant: str, seed: int) -> nn.Module:
207+
torch.manual_seed(seed)
208+
return VARIANT_FACTORIES[variant]().eval()
209+
210+
211+
def build_inputs(
212+
height: int, width: int, seed: int, variance: float
213+
) -> tuple[torch.Tensor, torch.Tensor]:
214+
torch.manual_seed(seed)
215+
x = torch.randn(1, 16, height, width)
216+
variance_tensor = torch.tensor([variance], dtype=torch.float32)
217+
return x, variance_tensor
218+
219+
220+
def calibrate(
221+
prepared: nn.Module,
222+
reference_input: torch.Tensor,
223+
calibration_runs: int,
224+
calibration_variance: float,
225+
seed: int,
226+
) -> None:
227+
variance_tensor = torch.tensor([calibration_variance], dtype=torch.float32)
228+
with torch.no_grad():
229+
for index in range(calibration_runs):
230+
torch.manual_seed(seed + 1000 + index)
231+
prepared(torch.randn_like(reference_input), variance_tensor)
232+
233+
234+
def get_soc_model(name: str) -> QcomChipset:
235+
try:
236+
return getattr(QcomChipset, name)
237+
except AttributeError as exc:
238+
valid = sorted(item.name for item in QcomChipset)
239+
raise ValueError(
240+
f"Unknown QcomChipset '{name}'. Valid values: {valid}"
241+
) from exc
242+
243+
244+
def get_quant_dtype(name: str) -> QuantDtype:
245+
return QuantDtype.use_8a8w if name == "8a8w" else QuantDtype.use_16a8w
246+
247+
248+
def run_portable_runtime(
249+
pte_path: Path,
250+
sample_inputs: tuple[torch.Tensor, torch.Tensor],
251+
) -> torch.Tensor:
252+
if _load_for_executorch is None:
253+
raise RuntimeError(
254+
"executorch portable runtime bindings are not available. "
255+
"Build/install `executorch.extension.pybindings.portable_lib` "
256+
"or rerun with --skip-runtime."
257+
)
258+
259+
exec_mod = _load_for_executorch(str(pte_path))
260+
flat_inputs, _ = tree_flatten(sample_inputs)
261+
outputs = exec_mod.forward(flat_inputs)
262+
if not outputs:
263+
raise RuntimeError("Portable runtime returned no outputs")
264+
output = outputs[0]
265+
if not isinstance(output, torch.Tensor):
266+
output = torch.tensor(output)
267+
return output
268+
269+
270+
def variant_names(requested: str) -> Iterable[str]:
271+
if requested == "both":
272+
return VARIANT_FACTORIES.keys()
273+
return (requested,)
274+
275+
276+
def run_variant(
277+
variant: str,
278+
args: argparse.Namespace,
279+
x: torch.Tensor,
280+
variance: torch.Tensor,
281+
soc_model: QcomChipset,
282+
quant_dtype: QuantDtype,
283+
) -> dict[str, object]:
284+
out_dir = args.output_dir / variant
285+
out_dir.mkdir(parents=True, exist_ok=True)
286+
287+
model = build_model(variant, args.seed)
288+
with torch.no_grad():
289+
eager_out = model(x, variance)
290+
291+
quantizer = QnnQuantizer(
292+
backend=QnnExecuTorchBackendType.kHtpBackend,
293+
soc_model=soc_model,
294+
)
295+
quantizer.set_default_quant_config(
296+
quant_dtype=quant_dtype,
297+
is_qat=False,
298+
is_conv_per_channel=True,
299+
is_linear_per_channel=False,
300+
)
301+
302+
exported = export(model, (x, variance), strict=False).module()
303+
prepared = prepare_pt2e(exported, quantizer)
304+
calibrate(
305+
prepared,
306+
x,
307+
args.calibration_runs,
308+
args.calibration_variance,
309+
args.seed,
310+
)
311+
quantized = convert_pt2e(prepared)
312+
313+
with torch.no_grad():
314+
quantized_out = quantized(x, variance)
315+
316+
backend_options = generate_htp_compiler_spec(use_fp16=False)
317+
compiler_specs = generate_qnn_executorch_compiler_spec(
318+
soc_model=soc_model,
319+
backend_options=backend_options,
320+
)
321+
edge_program = to_edge_transform_and_lower_to_qnn(
322+
module=quantized,
323+
inputs=(x, variance),
324+
compiler_specs=compiler_specs,
325+
)
326+
executorch_program = edge_program.to_executorch()
327+
328+
pte_path = out_dir / "model.pte"
329+
with open(pte_path, "wb") as file:
330+
executorch_program.write_to_file(file)
331+
332+
save_tensor(out_dir / "input_0.pt", x)
333+
save_tensor(out_dir / "input_1.pt", variance)
334+
save_tensor(out_dir / "eager_out.pt", eager_out)
335+
save_tensor(out_dir / "quantized_out.pt", quantized_out)
336+
save_raw(out_dir / "input_0.raw", x)
337+
save_raw(out_dir / "input_1.raw", variance)
338+
save_raw(out_dir / "quantized_out.raw", quantized_out)
339+
340+
result: dict[str, object] = {
341+
"variant": variant,
342+
"pte_path": str(pte_path),
343+
"eager_vs_quant_sqnr_db": sqnr_db(eager_out, quantized_out),
344+
"eager_vs_quant_max_abs_diff": max_abs_diff(eager_out, quantized_out),
345+
}
346+
347+
if not args.skip_runtime:
348+
runtime_out = run_portable_runtime(pte_path, (x, variance))
349+
save_tensor(out_dir / "runtime_out.pt", runtime_out)
350+
save_raw(out_dir / "runtime_out.raw", runtime_out)
351+
result["quant_vs_runtime_sqnr_db"] = sqnr_db(quantized_out, runtime_out)
352+
result["quant_vs_runtime_max_abs_diff"] = max_abs_diff(
353+
quantized_out, runtime_out
354+
)
355+
356+
return result
357+
358+
359+
def main() -> None:
360+
args = parse_args()
361+
ensure_qnn_env()
362+
args.output_dir.mkdir(parents=True, exist_ok=True)
363+
364+
x, variance = build_inputs(args.height, args.width, args.seed, args.variance)
365+
soc_model = get_soc_model(args.soc_model)
366+
quant_dtype = get_quant_dtype(args.quant_dtype)
367+
368+
results = []
369+
for variant in variant_names(args.variant):
370+
print(f"Running variant: {variant}")
371+
result = run_variant(variant, args, x, variance, soc_model, quant_dtype)
372+
print(json.dumps(result, indent=2))
373+
results.append(result)
374+
375+
summary_path = args.output_dir / "summary.json"
376+
summary_path.write_text(json.dumps(results, indent=2) + "\n")
377+
print(f"Wrote summary to {summary_path}")
378+
379+
380+
if __name__ == "__main__":
381+
main()

0 commit comments

Comments
 (0)