|
| 1 | +# Copyright (c) Qualcomm Innovation Center, Inc. |
| 2 | +# All rights reserved |
| 3 | +# |
| 4 | +# This source code is licensed under the BSD-style license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. |
| 6 | + |
| 7 | +""" |
| 8 | +Standalone QNN repro for the variance-channel materialization issue. |
| 9 | +
|
| 10 | +This script isolates two mathematically equivalent toy models: |
| 11 | +
|
| 12 | +1. broken: torch.ones(...) * scalar -> cat -> conv |
| 13 | +2. fixed: scalar.reshape(...).expand(...) -> cat -> conv |
| 14 | +
|
| 15 | +Both variants use the same randomly initialized conv weights for a given seed. |
| 16 | +The script: |
| 17 | + - computes eager vs quantized SQNR |
| 18 | + - lowers each variant to a QNN-backed `.pte` |
| 19 | + - saves inputs and reference outputs |
| 20 | + - optionally runs the local ExecuTorch portable runtime on x86 if available |
| 21 | +
|
| 22 | +Requirements: |
| 23 | + - a Python environment with `torch`, `torchao`, and `executorch` |
| 24 | + - `QNN_SDK_ROOT` pointing to a local QNN SDK |
| 25 | + - optional: built ExecuTorch portable bindings for `--run-runtime` |
| 26 | +
|
| 27 | +Example: |
| 28 | + export QNN_SDK_ROOT=/path/to/qnn-2.37 |
| 29 | + python3 qnn_variance_repro.py \ |
| 30 | + --height 64 \ |
| 31 | + --width 64 \ |
| 32 | + --quant-dtype 8a8w \ |
| 33 | + --variance 1.0 \ |
| 34 | + --output-dir /tmp/qcom_variance_repro |
| 35 | +""" |
| 36 | + |
| 37 | +from __future__ import annotations |
| 38 | + |
| 39 | +import argparse |
| 40 | +import ctypes |
| 41 | +import json |
| 42 | +import math |
| 43 | +import os |
| 44 | +from pathlib import Path |
| 45 | +from typing import Iterable |
| 46 | + |
| 47 | +import executorch.backends.qualcomm # noqa: F401 |
| 48 | +import torch |
| 49 | +import torch.nn as nn |
| 50 | +from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer, QuantDtype |
| 51 | +from executorch.backends.qualcomm.serialization.qc_schema import ( |
| 52 | + QcomChipset, |
| 53 | + QnnExecuTorchBackendType, |
| 54 | +) |
| 55 | +from executorch.backends.qualcomm.utils.utils import ( |
| 56 | + generate_htp_compiler_spec, |
| 57 | + generate_qnn_executorch_compiler_spec, |
| 58 | + to_edge_transform_and_lower_to_qnn, |
| 59 | +) |
| 60 | +from torch.export import export |
| 61 | +from torch.utils._pytree import tree_flatten |
| 62 | +from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e |
| 63 | + |
| 64 | +try: |
| 65 | + from executorch.extension.pybindings.portable_lib import _load_for_executorch |
| 66 | +except ImportError: |
| 67 | + _load_for_executorch = None |
| 68 | + |
| 69 | + |
| 70 | +UPSCALE = 18.0 |
| 71 | + |
| 72 | + |
| 73 | +class OnesMulCatConv(nn.Module): |
| 74 | + def __init__(self) -> None: |
| 75 | + super().__init__() |
| 76 | + self.conv = nn.Conv2d(17, 64, 3, 1, 1, bias=False) |
| 77 | + |
| 78 | + def forward(self, x: torch.Tensor, variance: torch.Tensor) -> torch.Tensor: |
| 79 | + avg_variance = variance * UPSCALE |
| 80 | + b, _, h, w = x.shape |
| 81 | + variance_tensor = ( |
| 82 | + torch.ones((b, 1, h, w), device=x.device, dtype=x.dtype) * avg_variance |
| 83 | + ) |
| 84 | + return self.conv(torch.cat((x, variance_tensor), dim=1)) |
| 85 | + |
| 86 | + |
| 87 | +class ExpandCatConv(nn.Module): |
| 88 | + def __init__(self) -> None: |
| 89 | + super().__init__() |
| 90 | + self.conv = nn.Conv2d(17, 64, 3, 1, 1, bias=False) |
| 91 | + |
| 92 | + def forward(self, x: torch.Tensor, variance: torch.Tensor) -> torch.Tensor: |
| 93 | + avg_variance = variance * UPSCALE |
| 94 | + b, _, h, w = x.shape |
| 95 | + variance_tensor = avg_variance.reshape(b, 1, 1, 1).expand(b, 1, h, w) |
| 96 | + return self.conv(torch.cat((x, variance_tensor), dim=1)) |
| 97 | + |
| 98 | + |
| 99 | +VARIANT_FACTORIES = { |
| 100 | + "ones_mul_cat_conv": OnesMulCatConv, |
| 101 | + "expand_cat_conv": ExpandCatConv, |
| 102 | +} |
| 103 | + |
| 104 | + |
| 105 | +def parse_args() -> argparse.Namespace: |
| 106 | + parser = argparse.ArgumentParser(description=__doc__) |
| 107 | + parser.add_argument( |
| 108 | + "--variant", |
| 109 | + choices=("both", *VARIANT_FACTORIES.keys()), |
| 110 | + default="both", |
| 111 | + help="Which toy model to run.", |
| 112 | + ) |
| 113 | + parser.add_argument("--height", type=int, default=64) |
| 114 | + parser.add_argument("--width", type=int, default=64) |
| 115 | + parser.add_argument("--seed", type=int, default=42) |
| 116 | + parser.add_argument("--variance", type=float, default=1.0) |
| 117 | + parser.add_argument( |
| 118 | + "--calibration-runs", |
| 119 | + type=int, |
| 120 | + default=3, |
| 121 | + help="Number of random calibration passes per model.", |
| 122 | + ) |
| 123 | + parser.add_argument( |
| 124 | + "--calibration-variance", |
| 125 | + type=float, |
| 126 | + default=0.5, |
| 127 | + help="Scalar variance value used during calibration.", |
| 128 | + ) |
| 129 | + parser.add_argument( |
| 130 | + "--quant-dtype", |
| 131 | + choices=("8a8w", "16a8w"), |
| 132 | + default="16a8w", |
| 133 | + ) |
| 134 | + parser.add_argument( |
| 135 | + "--soc-model", |
| 136 | + default="SM8750", |
| 137 | + help="Name of the QcomChipset enum member to use, e.g. SM8650 or SM8750.", |
| 138 | + ) |
| 139 | + parser.add_argument( |
| 140 | + "--output-dir", |
| 141 | + type=Path, |
| 142 | + default=Path("/tmp/qcom_variance_repro"), |
| 143 | + help="Directory for generated artifacts.", |
| 144 | + ) |
| 145 | + parser.add_argument( |
| 146 | + "--skip-runtime", |
| 147 | + action="store_true", |
| 148 | + help="Export `.pte` and references only; do not run the local portable runtime.", |
| 149 | + ) |
| 150 | + return parser.parse_args() |
| 151 | + |
| 152 | + |
| 153 | +def ensure_qnn_env() -> None: |
| 154 | + sdk_root = os.environ.get("QNN_SDK_ROOT") |
| 155 | + if not sdk_root: |
| 156 | + raise RuntimeError( |
| 157 | + "QNN_SDK_ROOT is not set. Importing `executorch.backends.qualcomm` " |
| 158 | + "should auto-stage the SDK on supported x86 hosts; otherwise set " |
| 159 | + "QNN_SDK_ROOT manually before running." |
| 160 | + ) |
| 161 | + |
| 162 | + lib_dir = Path(sdk_root) / "lib" / "x86_64-linux-clang" |
| 163 | + if not lib_dir.is_dir(): |
| 164 | + raise RuntimeError(f"QNN x86 lib directory not found: {lib_dir}") |
| 165 | + |
| 166 | + current = os.environ.get("LD_LIBRARY_PATH", "") |
| 167 | + current_paths = [path for path in current.split(":") if path] |
| 168 | + if str(lib_dir) not in current_paths: |
| 169 | + os.environ["LD_LIBRARY_PATH"] = ":".join([str(lib_dir), *current_paths]) |
| 170 | + |
| 171 | + # Best-effort preload for in-process x86 runtime execution. |
| 172 | + # Setting LD_LIBRARY_PATH after Python starts is not sufficient for all |
| 173 | + # later dlopen-by-name paths, so preload the QNN runtime libraries directly. |
| 174 | + for lib_name in ("libQnnSystem.so", "libQnnHtp.so"): |
| 175 | + lib_path = lib_dir / lib_name |
| 176 | + if lib_path.is_file(): |
| 177 | + ctypes.CDLL(str(lib_path), mode=ctypes.RTLD_GLOBAL) |
| 178 | + |
| 179 | + |
| 180 | +def sqnr_db(reference: torch.Tensor, other: torch.Tensor) -> float: |
| 181 | + reference = reference.detach().float() |
| 182 | + other = other.detach().float() |
| 183 | + noise = (reference - other).pow(2).sum() |
| 184 | + if noise == 0: |
| 185 | + return math.inf |
| 186 | + signal = reference.pow(2).sum() |
| 187 | + if signal == 0: |
| 188 | + return -math.inf |
| 189 | + return 10.0 * torch.log10(signal / noise).item() |
| 190 | + |
| 191 | + |
| 192 | +def max_abs_diff(reference: torch.Tensor, other: torch.Tensor) -> float: |
| 193 | + return (reference.detach().float() - other.detach().float()).abs().max().item() |
| 194 | + |
| 195 | + |
| 196 | +def save_tensor(path: Path, tensor: torch.Tensor) -> None: |
| 197 | + path.parent.mkdir(parents=True, exist_ok=True) |
| 198 | + torch.save(tensor.detach().cpu(), path) |
| 199 | + |
| 200 | + |
| 201 | +def save_raw(path: Path, tensor: torch.Tensor) -> None: |
| 202 | + path.parent.mkdir(parents=True, exist_ok=True) |
| 203 | + tensor.detach().cpu().to(torch.float32).contiguous().numpy().tofile(path) |
| 204 | + |
| 205 | + |
| 206 | +def build_model(variant: str, seed: int) -> nn.Module: |
| 207 | + torch.manual_seed(seed) |
| 208 | + return VARIANT_FACTORIES[variant]().eval() |
| 209 | + |
| 210 | + |
| 211 | +def build_inputs( |
| 212 | + height: int, width: int, seed: int, variance: float |
| 213 | +) -> tuple[torch.Tensor, torch.Tensor]: |
| 214 | + torch.manual_seed(seed) |
| 215 | + x = torch.randn(1, 16, height, width) |
| 216 | + variance_tensor = torch.tensor([variance], dtype=torch.float32) |
| 217 | + return x, variance_tensor |
| 218 | + |
| 219 | + |
| 220 | +def calibrate( |
| 221 | + prepared: nn.Module, |
| 222 | + reference_input: torch.Tensor, |
| 223 | + calibration_runs: int, |
| 224 | + calibration_variance: float, |
| 225 | + seed: int, |
| 226 | +) -> None: |
| 227 | + variance_tensor = torch.tensor([calibration_variance], dtype=torch.float32) |
| 228 | + with torch.no_grad(): |
| 229 | + for index in range(calibration_runs): |
| 230 | + torch.manual_seed(seed + 1000 + index) |
| 231 | + prepared(torch.randn_like(reference_input), variance_tensor) |
| 232 | + |
| 233 | + |
| 234 | +def get_soc_model(name: str) -> QcomChipset: |
| 235 | + try: |
| 236 | + return getattr(QcomChipset, name) |
| 237 | + except AttributeError as exc: |
| 238 | + valid = sorted(item.name for item in QcomChipset) |
| 239 | + raise ValueError( |
| 240 | + f"Unknown QcomChipset '{name}'. Valid values: {valid}" |
| 241 | + ) from exc |
| 242 | + |
| 243 | + |
| 244 | +def get_quant_dtype(name: str) -> QuantDtype: |
| 245 | + return QuantDtype.use_8a8w if name == "8a8w" else QuantDtype.use_16a8w |
| 246 | + |
| 247 | + |
| 248 | +def run_portable_runtime( |
| 249 | + pte_path: Path, |
| 250 | + sample_inputs: tuple[torch.Tensor, torch.Tensor], |
| 251 | +) -> torch.Tensor: |
| 252 | + if _load_for_executorch is None: |
| 253 | + raise RuntimeError( |
| 254 | + "executorch portable runtime bindings are not available. " |
| 255 | + "Build/install `executorch.extension.pybindings.portable_lib` " |
| 256 | + "or rerun with --skip-runtime." |
| 257 | + ) |
| 258 | + |
| 259 | + exec_mod = _load_for_executorch(str(pte_path)) |
| 260 | + flat_inputs, _ = tree_flatten(sample_inputs) |
| 261 | + outputs = exec_mod.forward(flat_inputs) |
| 262 | + if not outputs: |
| 263 | + raise RuntimeError("Portable runtime returned no outputs") |
| 264 | + output = outputs[0] |
| 265 | + if not isinstance(output, torch.Tensor): |
| 266 | + output = torch.tensor(output) |
| 267 | + return output |
| 268 | + |
| 269 | + |
| 270 | +def variant_names(requested: str) -> Iterable[str]: |
| 271 | + if requested == "both": |
| 272 | + return VARIANT_FACTORIES.keys() |
| 273 | + return (requested,) |
| 274 | + |
| 275 | + |
| 276 | +def run_variant( |
| 277 | + variant: str, |
| 278 | + args: argparse.Namespace, |
| 279 | + x: torch.Tensor, |
| 280 | + variance: torch.Tensor, |
| 281 | + soc_model: QcomChipset, |
| 282 | + quant_dtype: QuantDtype, |
| 283 | +) -> dict[str, object]: |
| 284 | + out_dir = args.output_dir / variant |
| 285 | + out_dir.mkdir(parents=True, exist_ok=True) |
| 286 | + |
| 287 | + model = build_model(variant, args.seed) |
| 288 | + with torch.no_grad(): |
| 289 | + eager_out = model(x, variance) |
| 290 | + |
| 291 | + quantizer = QnnQuantizer( |
| 292 | + backend=QnnExecuTorchBackendType.kHtpBackend, |
| 293 | + soc_model=soc_model, |
| 294 | + ) |
| 295 | + quantizer.set_default_quant_config( |
| 296 | + quant_dtype=quant_dtype, |
| 297 | + is_qat=False, |
| 298 | + is_conv_per_channel=True, |
| 299 | + is_linear_per_channel=False, |
| 300 | + ) |
| 301 | + |
| 302 | + exported = export(model, (x, variance), strict=False).module() |
| 303 | + prepared = prepare_pt2e(exported, quantizer) |
| 304 | + calibrate( |
| 305 | + prepared, |
| 306 | + x, |
| 307 | + args.calibration_runs, |
| 308 | + args.calibration_variance, |
| 309 | + args.seed, |
| 310 | + ) |
| 311 | + quantized = convert_pt2e(prepared) |
| 312 | + |
| 313 | + with torch.no_grad(): |
| 314 | + quantized_out = quantized(x, variance) |
| 315 | + |
| 316 | + backend_options = generate_htp_compiler_spec(use_fp16=False) |
| 317 | + compiler_specs = generate_qnn_executorch_compiler_spec( |
| 318 | + soc_model=soc_model, |
| 319 | + backend_options=backend_options, |
| 320 | + ) |
| 321 | + edge_program = to_edge_transform_and_lower_to_qnn( |
| 322 | + module=quantized, |
| 323 | + inputs=(x, variance), |
| 324 | + compiler_specs=compiler_specs, |
| 325 | + ) |
| 326 | + executorch_program = edge_program.to_executorch() |
| 327 | + |
| 328 | + pte_path = out_dir / "model.pte" |
| 329 | + with open(pte_path, "wb") as file: |
| 330 | + executorch_program.write_to_file(file) |
| 331 | + |
| 332 | + save_tensor(out_dir / "input_0.pt", x) |
| 333 | + save_tensor(out_dir / "input_1.pt", variance) |
| 334 | + save_tensor(out_dir / "eager_out.pt", eager_out) |
| 335 | + save_tensor(out_dir / "quantized_out.pt", quantized_out) |
| 336 | + save_raw(out_dir / "input_0.raw", x) |
| 337 | + save_raw(out_dir / "input_1.raw", variance) |
| 338 | + save_raw(out_dir / "quantized_out.raw", quantized_out) |
| 339 | + |
| 340 | + result: dict[str, object] = { |
| 341 | + "variant": variant, |
| 342 | + "pte_path": str(pte_path), |
| 343 | + "eager_vs_quant_sqnr_db": sqnr_db(eager_out, quantized_out), |
| 344 | + "eager_vs_quant_max_abs_diff": max_abs_diff(eager_out, quantized_out), |
| 345 | + } |
| 346 | + |
| 347 | + if not args.skip_runtime: |
| 348 | + runtime_out = run_portable_runtime(pte_path, (x, variance)) |
| 349 | + save_tensor(out_dir / "runtime_out.pt", runtime_out) |
| 350 | + save_raw(out_dir / "runtime_out.raw", runtime_out) |
| 351 | + result["quant_vs_runtime_sqnr_db"] = sqnr_db(quantized_out, runtime_out) |
| 352 | + result["quant_vs_runtime_max_abs_diff"] = max_abs_diff( |
| 353 | + quantized_out, runtime_out |
| 354 | + ) |
| 355 | + |
| 356 | + return result |
| 357 | + |
| 358 | + |
| 359 | +def main() -> None: |
| 360 | + args = parse_args() |
| 361 | + ensure_qnn_env() |
| 362 | + args.output_dir.mkdir(parents=True, exist_ok=True) |
| 363 | + |
| 364 | + x, variance = build_inputs(args.height, args.width, args.seed, args.variance) |
| 365 | + soc_model = get_soc_model(args.soc_model) |
| 366 | + quant_dtype = get_quant_dtype(args.quant_dtype) |
| 367 | + |
| 368 | + results = [] |
| 369 | + for variant in variant_names(args.variant): |
| 370 | + print(f"Running variant: {variant}") |
| 371 | + result = run_variant(variant, args, x, variance, soc_model, quant_dtype) |
| 372 | + print(json.dumps(result, indent=2)) |
| 373 | + results.append(result) |
| 374 | + |
| 375 | + summary_path = args.output_dir / "summary.json" |
| 376 | + summary_path.write_text(json.dumps(results, indent=2) + "\n") |
| 377 | + print(f"Wrote summary to {summary_path}") |
| 378 | + |
| 379 | + |
| 380 | +if __name__ == "__main__": |
| 381 | + main() |
0 commit comments