|
| 1 | +# Copyright (c) Meta Platforms, Inc. and affiliates. |
| 2 | +# All rights reserved. |
| 3 | +# Copyright 2023-2026 Arm Limited and/or its affiliates. |
| 4 | +# |
| 5 | +# This source code is licensed under the BSD-style license found in the |
| 6 | +# LICENSE file in the root directory of this source tree. |
| 7 | + |
| 8 | +import argparse |
| 9 | +import copy |
| 10 | +import json |
| 11 | +import logging |
| 12 | +import os |
| 13 | +import sys |
| 14 | + |
| 15 | +from pathlib import Path |
| 16 | + |
| 17 | +# Add Executorch root to path so this script can be run from anywhere |
| 18 | +_EXECUTORCH_DIR = Path(__file__).resolve().parents[3] |
| 19 | +_EXECUTORCH_DIR_STR = str(_EXECUTORCH_DIR) |
| 20 | +if _EXECUTORCH_DIR_STR not in sys.path: |
| 21 | + sys.path.insert(0, _EXECUTORCH_DIR_STR) |
| 22 | + |
| 23 | +from typing import Any |
| 24 | + |
| 25 | +import torch |
| 26 | + |
| 27 | +from backends.arm.util.arm_model_evaluator import ( |
| 28 | + Evaluator, |
| 29 | + FileCompressionEvaluator, |
| 30 | + ImageNetEvaluator, |
| 31 | + NumericalModelEvaluator, |
| 32 | +) |
| 33 | +from examples.arm.aot_arm_compiler import ( |
| 34 | + CALIBRATION_MAX_SAMPLES, |
| 35 | + dump_delegation_info, |
| 36 | + get_model_and_inputs_from_name, |
| 37 | + load_calibration_samples, |
| 38 | + quantize_model, |
| 39 | + QuantMode, |
| 40 | +) |
| 41 | + |
| 42 | +from examples.models import MODEL_NAME_TO_MODEL |
| 43 | +from executorch.backends.arm.tosa import TosaSpecification |
| 44 | +from executorch.backends.arm.tosa.compile_spec import TosaCompileSpec |
| 45 | +from executorch.backends.arm.util._factory import create_partitioner |
| 46 | + |
| 47 | +from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower |
| 48 | +from torch.utils.data import DataLoader |
| 49 | + |
| 50 | + |
| 51 | +_EVALUATORS = [ |
| 52 | + "numerical", |
| 53 | + "imagenet", |
| 54 | +] |
| 55 | + |
| 56 | +_QUANT_MODES = [ |
| 57 | + "int8", |
| 58 | + "a16w8", |
| 59 | +] |
| 60 | + |
| 61 | +_DTYPE_MAP = { |
| 62 | + "fp32": torch.float32, |
| 63 | + "fp16": torch.float16, |
| 64 | + "bf16": torch.bfloat16, |
| 65 | +} |
| 66 | + |
| 67 | + |
| 68 | +def _get_args(): |
| 69 | + parser = argparse.ArgumentParser( |
| 70 | + "Evaluate a model quantized and/or delegated for the Arm backend." |
| 71 | + " Evaluations include numerical comparison to the original model" |
| 72 | + "and/or top-1/top-5 accuracy if applicable." |
| 73 | + ) |
| 74 | + parser.add_argument( |
| 75 | + "-m", |
| 76 | + "--model_name", |
| 77 | + required=True, |
| 78 | + help="Model file .py/.pth/.pt or a model from examples/models." |
| 79 | + f" Available models from examples/models: {', '.join(MODEL_NAME_TO_MODEL.keys())}", |
| 80 | + ) |
| 81 | + parser.add_argument( |
| 82 | + "-t", |
| 83 | + "--target", |
| 84 | + action="store", |
| 85 | + required=True, |
| 86 | + help=( |
| 87 | + "For Arm backend delegated models, pick the target." |
| 88 | + " Examples of valid targets: TOSA-1.0+INT, TOSA-1.0+FP+bf16" |
| 89 | + ), |
| 90 | + ) |
| 91 | + parser.add_argument( |
| 92 | + "-q", |
| 93 | + "--quant_mode", |
| 94 | + required=False, |
| 95 | + default=None, |
| 96 | + choices=_QUANT_MODES, |
| 97 | + help="Quantize the model using the requested mode.", |
| 98 | + ) |
| 99 | + parser.add_argument( |
| 100 | + "--calibration_data", |
| 101 | + required=False, |
| 102 | + default=None, |
| 103 | + help=( |
| 104 | + "Optional calibration data file or directory. If a directory is " |
| 105 | + "provided, up to 1000 samples are used for calibration. " |
| 106 | + "Supported files: Common image formats (e.g., .png or .jpg) if " |
| 107 | + "using imagenet evaluator, otherwise .pt/.pth files. If not provided," |
| 108 | + "quantized models are calibrated on their example inputs." |
| 109 | + ), |
| 110 | + ) |
| 111 | + parser.add_argument( |
| 112 | + "--no_delegate", |
| 113 | + action="store_false", |
| 114 | + dest="delegate", |
| 115 | + default=True, |
| 116 | + help=( |
| 117 | + "Disable delegation for cases where a quantized but non-delegated " |
| 118 | + "model is to be tested." |
| 119 | + ), |
| 120 | + ) |
| 121 | + parser.add_argument( |
| 122 | + "-e", |
| 123 | + "--evaluators", |
| 124 | + required=True, |
| 125 | + help=( |
| 126 | + "Comma-separated list of evaluators to use. " f"Valid values: {_EVALUATORS}" |
| 127 | + ), |
| 128 | + ) |
| 129 | + parser.add_argument( |
| 130 | + "--evaluation_dataset", |
| 131 | + required=False, |
| 132 | + default=None, |
| 133 | + help="Provide path to evaluation dataset directory. (only applicable for ImageNet evaluation).", |
| 134 | + ) |
| 135 | + parser.add_argument( |
| 136 | + "--batch_size", |
| 137 | + type=int, |
| 138 | + required=False, |
| 139 | + default=1, |
| 140 | + help="Batch size to use for ImageNet evaluation. (only applicable for ImageNet evaluation).", |
| 141 | + ) |
| 142 | + parser.add_argument( |
| 143 | + "-s", |
| 144 | + "--so_library", |
| 145 | + required=False, |
| 146 | + default=None, |
| 147 | + help="Path to .so library to load custom ops from before evaluation.", |
| 148 | + ) |
| 149 | + parser.add_argument( |
| 150 | + "--debug", action="store_true", help="Set the logging level to debug." |
| 151 | + ) |
| 152 | + parser.add_argument( |
| 153 | + "--dtype", |
| 154 | + choices=sorted(_DTYPE_MAP.keys()), |
| 155 | + default=None, |
| 156 | + help="Cast the model to evaluate and its inputs to the given dtype.", |
| 157 | + ) |
| 158 | + parser.add_argument( |
| 159 | + "-i", |
| 160 | + "--intermediates", |
| 161 | + action="store", |
| 162 | + required=True, |
| 163 | + help="Store intermediate output (like TOSA artifacts) at the specified directory.", |
| 164 | + ) |
| 165 | + parser.add_argument( |
| 166 | + "-o", |
| 167 | + "--output", |
| 168 | + required=False, |
| 169 | + default=None, |
| 170 | + help="Path to JSON file where evaluation metrics will be stored.", |
| 171 | + ) |
| 172 | + args = parser.parse_args() |
| 173 | + |
| 174 | + LOGGING_FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" |
| 175 | + logging_level = logging.DEBUG if args.debug else logging.WARNING |
| 176 | + logging.basicConfig(level=logging_level, format=LOGGING_FORMAT, force=True) |
| 177 | + |
| 178 | + if args.quant_mode is None and not args.delegate: |
| 179 | + raise ValueError( |
| 180 | + "The model to test must be either quantized or delegated (--quant_mode or --delegate)." |
| 181 | + ) |
| 182 | + |
| 183 | + if args.calibration_data is not None and args.quant_mode is None: |
| 184 | + raise ValueError("--calibration_data requires --quant_mode to be enabled.") |
| 185 | + |
| 186 | + if args.quant_mode is not None and args.dtype is not None: |
| 187 | + raise ValueError("Cannot specify --dtype when --quant_mode is enabled.") |
| 188 | + |
| 189 | + evaluators: list[Evaluator] = [ |
| 190 | + entry.strip() for entry in args.evaluators.split(",") if entry.strip() |
| 191 | + ] |
| 192 | + unknown = [entry for entry in evaluators if entry not in _EVALUATORS] |
| 193 | + if not evaluators: |
| 194 | + raise ValueError("At least one evaluator must be specified in --evaluators.") |
| 195 | + if unknown: |
| 196 | + raise ValueError( |
| 197 | + "Unknown evaluators in --evaluators: " f"{', '.join(sorted(set(unknown)))}" |
| 198 | + ) |
| 199 | + args.evaluators = evaluators |
| 200 | + |
| 201 | + if "imagenet" in args.evaluators and args.evaluation_dataset is None: |
| 202 | + raise ValueError("Evaluation dataset must be provided for ImageNet evaluation.") |
| 203 | + |
| 204 | + # Default output path to intermediates folder with name based on target and extensions |
| 205 | + if args.output is None: |
| 206 | + args.output = os.path.join(args.intermediates, f"{args.target}_metrics.json") |
| 207 | + |
| 208 | + try: |
| 209 | + TosaSpecification.create_from_string(args.target) |
| 210 | + except ValueError as e: |
| 211 | + raise ValueError(f"Invalid target format for --target: {e}") |
| 212 | + |
| 213 | + return args |
| 214 | + |
| 215 | + |
| 216 | +def _get_compile_spec(args) -> TosaCompileSpec: |
| 217 | + tosa_spec = TosaSpecification.create_from_string(args.target) |
| 218 | + compile_spec = TosaCompileSpec(tosa_spec) |
| 219 | + |
| 220 | + if args.intermediates is not None: |
| 221 | + compile_spec.dump_intermediate_artifacts_to(args.intermediates) |
| 222 | + |
| 223 | + return compile_spec |
| 224 | + |
| 225 | + |
| 226 | +def _build_imagenet_calibration_samples( |
| 227 | + calibration_dir: str, max_samples: int |
| 228 | +) -> list[tuple[torch.Tensor, ...]]: |
| 229 | + dataset = ImageNetEvaluator.load_imagenet_folder(calibration_dir) |
| 230 | + loader = DataLoader(dataset, batch_size=1, shuffle=False) |
| 231 | + samples: list[tuple[torch.Tensor, ...]] = [] |
| 232 | + for image, _ in loader: |
| 233 | + samples.append((image,)) |
| 234 | + if len(samples) >= max_samples: |
| 235 | + break |
| 236 | + return samples |
| 237 | + |
| 238 | + |
| 239 | +def _evaluate( |
| 240 | + args, model_name, ref_model, eval_model, example_inputs |
| 241 | +) -> dict[str, Any]: |
| 242 | + evaluators: list[Evaluator] = [] |
| 243 | + |
| 244 | + # Add evaluator for compression ratio of TOSA file |
| 245 | + intermediates_path = Path(args.intermediates) |
| 246 | + tosa_paths = list(intermediates_path.glob("*.tosa")) |
| 247 | + if tosa_paths: |
| 248 | + evaluators.append(FileCompressionEvaluator(model_name, str(tosa_paths[0]))) |
| 249 | + else: |
| 250 | + logging.warning( |
| 251 | + f"No TOSA file found in {args.intermediates} for compression evaluation" |
| 252 | + ) |
| 253 | + |
| 254 | + # Add user-specified evaluators |
| 255 | + for evaluator_name in args.evaluators: |
| 256 | + evaluator: Evaluator |
| 257 | + match evaluator_name: |
| 258 | + case "numerical": |
| 259 | + evaluator = NumericalModelEvaluator( |
| 260 | + model_name, |
| 261 | + ref_model, |
| 262 | + eval_model, |
| 263 | + example_inputs, |
| 264 | + eval_dtype=_DTYPE_MAP.get(args.dtype, None), |
| 265 | + ) |
| 266 | + case "imagenet": |
| 267 | + evaluator = ImageNetEvaluator( |
| 268 | + model_name, |
| 269 | + eval_model, |
| 270 | + batch_size=args.batch_size, |
| 271 | + validation_dataset_path=args.evaluation_dataset, |
| 272 | + eval_dtype=_DTYPE_MAP.get(args.dtype, None), |
| 273 | + ) |
| 274 | + case _: |
| 275 | + raise AssertionError(f"Unknown evaluator {evaluator_name}") |
| 276 | + evaluators.append(evaluator) |
| 277 | + |
| 278 | + # Run evaluators |
| 279 | + metrics: dict[str, Any] = {} |
| 280 | + for evaluator in evaluators: |
| 281 | + result = evaluator.evaluate() |
| 282 | + metrics |= result |
| 283 | + |
| 284 | + return metrics |
| 285 | + |
| 286 | + |
| 287 | +def main() -> None: |
| 288 | + try: |
| 289 | + args = _get_args() |
| 290 | + except ValueError as e: |
| 291 | + logging.error(f"Argument error: {e}") |
| 292 | + sys.exit(1) |
| 293 | + |
| 294 | + # if we have custom ops, register them before processing the model |
| 295 | + if args.so_library is not None: |
| 296 | + logging.info(f"Loading custom ops from {args.so_library}") |
| 297 | + torch.ops.load_library(args.so_library) |
| 298 | + |
| 299 | + # Get the model and its example inputs |
| 300 | + original_model, example_inputs = get_model_and_inputs_from_name( |
| 301 | + args.model_name, None |
| 302 | + ) |
| 303 | + |
| 304 | + # Use original model as reference to compare against |
| 305 | + ref_model = original_model.eval() |
| 306 | + eval_model = ref_model |
| 307 | + eval_inputs = example_inputs |
| 308 | + |
| 309 | + # Cast model and inputs to eval_dtype if specified |
| 310 | + if args.dtype is not None: |
| 311 | + eval_dtype = _DTYPE_MAP[args.dtype] |
| 312 | + eval_model = copy.deepcopy(original_model).to(eval_dtype).eval() |
| 313 | + eval_inputs = tuple( |
| 314 | + inp.to(eval_dtype) if isinstance(inp, torch.Tensor) else inp |
| 315 | + for inp in example_inputs |
| 316 | + ) |
| 317 | + |
| 318 | + # Export the model |
| 319 | + exported_program = torch.export.export(eval_model, eval_inputs) |
| 320 | + |
| 321 | + model_name = os.path.basename(os.path.splitext(args.model_name)[0]) |
| 322 | + if args.intermediates: |
| 323 | + os.makedirs(args.intermediates, exist_ok=True) |
| 324 | + |
| 325 | + # We only support Python3.10 and above, so use a later pickle protocol |
| 326 | + torch.export.save( |
| 327 | + exported_program, |
| 328 | + f"{args.intermediates}/{model_name}_exported_program.pt2", |
| 329 | + pickle_protocol=5, |
| 330 | + ) |
| 331 | + |
| 332 | + compile_spec = _get_compile_spec(args) |
| 333 | + |
| 334 | + # Quantize the model if requested |
| 335 | + if args.quant_mode is not None: |
| 336 | + calibration_samples = None |
| 337 | + if ( |
| 338 | + "imagenet" in args.evaluators |
| 339 | + and args.calibration_data is not None |
| 340 | + and Path(args.calibration_data).is_dir() |
| 341 | + ): |
| 342 | + calibration_samples = _build_imagenet_calibration_samples( |
| 343 | + args.calibration_data, CALIBRATION_MAX_SAMPLES |
| 344 | + ) |
| 345 | + else: |
| 346 | + calibration_samples = load_calibration_samples( |
| 347 | + args.calibration_data, example_inputs |
| 348 | + ) |
| 349 | + |
| 350 | + match args.quant_mode: |
| 351 | + case "a16w8": |
| 352 | + quant_mode = QuantMode.A16W8 |
| 353 | + case "int8": |
| 354 | + quant_mode = QuantMode.INT8 |
| 355 | + case _: |
| 356 | + raise AssertionError(f"Unknown quantization mode: {args.quant_mode}") |
| 357 | + |
| 358 | + eval_model, exported_program = quantize_model( |
| 359 | + exported_program.module(), |
| 360 | + eval_inputs, |
| 361 | + compile_spec, |
| 362 | + model_name, |
| 363 | + True, |
| 364 | + quant_mode, |
| 365 | + calibration_samples, |
| 366 | + ) |
| 367 | + |
| 368 | + # Delegate the model to Arm backend if requested |
| 369 | + if args.delegate: |
| 370 | + partitioner = create_partitioner(compile_spec) |
| 371 | + edge = to_edge_transform_and_lower( |
| 372 | + exported_program, |
| 373 | + partitioner=[partitioner], |
| 374 | + compile_config=EdgeCompileConfig( |
| 375 | + _check_ir_validity=False, |
| 376 | + ), |
| 377 | + ) |
| 378 | + exported_program = edge.exported_program() |
| 379 | + eval_model = exported_program.module() |
| 380 | + |
| 381 | + dump_delegation_info(edge, args.intermediates) |
| 382 | + |
| 383 | + # Evaluate the model |
| 384 | + metrics = _evaluate(args, model_name, ref_model, eval_model, example_inputs) |
| 385 | + |
| 386 | + # Dump result as JSON |
| 387 | + output = {"name": model_name, "target": args.target, "metrics": metrics} |
| 388 | + with open(args.output, "w") as f: |
| 389 | + json.dump(output, f, indent=4) |
| 390 | + |
| 391 | + |
| 392 | +if __name__ == "__main__": |
| 393 | + main() |
0 commit comments