diff --git a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml index 99d14fa1f410..8b1b826696ff 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml +++ b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml @@ -3,7 +3,7 @@ name = "dnn-benchmarking" version = "0.1.0" description = "Benchmarking and validation tool for hipDNN graphs" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.12" license = {text = "MIT"} authors = [ {name = "Advanced Micro Devices, Inc."}, @@ -13,9 +13,6 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/ab_runner_cli.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/ab_runner_cli.py deleted file mode 100644 index e986ad193822..000000000000 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/ab_runner_cli.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT - -"""A/B comparison CLI runner.""" - -import argparse -from pathlib import Path -from typing import Literal, Optional - -from ..common.exceptions import ExecutionError, GraphLoadError -from ..config.benchmark_config import ABTestConfig, BenchmarkConfig, ValidationConfig -from ..execution.ab_runner import ABRunner -from ..graph.loader import GraphLoader -from ..reporting.reporter import Reporter -from ..reporting.statistics import CombinedBenchmarkStats - - -def run_ab_benchmark( - config: BenchmarkConfig, - ab_config: ABTestConfig, - reporter: Reporter, - seed: Optional[int] = None, - gpu_backend: Literal["torch", "auto", "none"] = "auto", - validation_config: Optional[ValidationConfig] = None, -) -> int: - """Run A/B comparison workflow. - - Args: - config: Benchmark configuration. - ab_config: A/B test configuration. - reporter: Reporter instance for console output. - seed: Optional random seed for reproducibility. - gpu_backend: GPU timer backend to use (torch, auto, none). - validation_config: Optional validation configuration for reference checking. - - Returns: - Exit code (0 for success, 1 for error, 2 for comparison failure). - """ - - try: - ab_config.validate_paths() - - loader = GraphLoader() - graph_json = loader.load_json(config.graph_path) - loader.validate(graph_json) - - graph_name = loader.get_graph_name(graph_json) - - reporter.print_ab_header(config, ab_config, graph_name) - - runner = ABRunner( - graph_json, - config, - ab_config, - gpu_backend=gpu_backend, - validation_config=validation_config, - ) - result = runner.run(seed=seed) - - stats_a = CombinedBenchmarkStats.from_result(result.result_a) - stats_b = CombinedBenchmarkStats.from_result(result.result_b) - - reporter.print_ab_combined_stats( - stats_a, - stats_b, - result.init_time_a_ms, - result.init_time_b_ms, - ) - - reporter.print_ab_comparison( - result.passed, - result.max_abs_diff, - result.max_rel_diff, - ab_config.rtol, - ab_config.atol, - ) - - if validation_config is not None and validation_config.enabled: - reporter.print_ab_validation( - result.validation_a, - result.validation_b, - validation_config.rtol, - validation_config.atol, - ) - - reporter.print_footer() - - validation_passed = True - if result.validation_a is not None and not result.validation_a.passed: - validation_passed = False - if result.validation_b is not None and not result.validation_b.passed: - validation_passed = False - - return 0 if (result.passed and validation_passed) else 2 - - except GraphLoadError as e: - reporter.print_error(f"Graph load error: {e}") - return 1 - - except ExecutionError as e: - reporter.print_error(f"Execution error: {e}") - return 1 - - except ValueError as e: - reporter.print_error(f"Configuration error: {e}") - return 1 - - except Exception as e: - reporter.print_error(f"Unexpected error: {e}") - return 1 - - -def run_ab_cli(args: argparse.Namespace, graph_path: Path, reporter: Reporter) -> int: - """Validate A/B CLI args, build configs, and delegate to run_ab_benchmark.""" - - if args.AId is None or args.BId is None: - reporter.print_error( - "A/B testing requires both --AId and --BId to be specified" - ) - return 1 - - if args.engine: - reporter.print_error( - "--engine is not supported in A/B testing mode " - "(use --AId and --BId instead)" - ) - return 1 - - try: - config = BenchmarkConfig( - graph_path=graph_path, - warmup_iters=args.warmup, - benchmark_iters=args.iters, - engine_id=args.AId, - ) - except ValueError as e: - reporter.print_error(f"Configuration error: {e}") - return 1 - - try: - ab_config = ABTestConfig( - a_path=args.APath, - a_id=args.AId, - b_path=args.BPath, - b_id=args.BId, - rtol=args.rtol, - atol=args.atol, - ) - except ValueError as e: - reporter.print_error(f"A/B configuration error: {e}") - return 1 - - validation_config = None - if args.validate != "none": - try: - validation_config = ValidationConfig( - provider=args.validate, - rtol=args.rtol, - atol=args.atol, - ) - except ValueError as e: - reporter.print_error(f"Validation configuration error: {e}") - return 1 - - return run_ab_benchmark( - config, - ab_config, - reporter, - seed=args.seed, - gpu_backend="auto", - validation_config=validation_config, - ) diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/internal_profiling.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/internal_profiling.py index 376808e0add6..fe27a69b2cee 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/internal_profiling.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/internal_profiling.py @@ -24,7 +24,7 @@ from ..common.exceptions import GraphLoadError from ..config.benchmark_config import MetricsConfig, SuiteConfig -from ..execution.suite_runner import _run_single_provider_engine +from ..execution.suite_runner import run_single_provider_engine, set_plugin_path from ..graph.loader import GraphLoader @@ -49,9 +49,18 @@ def run_internal_profiling(args: argparse.Namespace) -> int: ) return 1 + plugin_path = None + if args.plugin_path: + if len(args.plugin_path) != 1: + print( + "internal-profiling-run: expected exactly one --plugin-path", + file=sys.stderr, + ) + return 1 + plugin_path = args.plugin_path[0] + try: - if args.plugin_path is not None: - hipdnn.set_engine_plugin_paths([str(args.plugin_path)]) + set_plugin_path(hipdnn, plugin_path) handle = hipdnn.Handle() except RuntimeError as e: print( @@ -74,11 +83,9 @@ def run_internal_profiling(args: argparse.Namespace) -> int: # the inner pass; the parent already collected basic metrics on the # timed pass. # - # `plugin_path` is forwarded so the child's SuiteConfig matches the - # parent's. hipdnn.set_engine_plugin_paths above is what actually - # loads the plugin today, but any future code that reads - # config.plugin_path from inside _run_single_provider_engine would - # otherwise silently see None in the child. + # `plugin_path` is forwarded so the child SuiteConfig matches the + # parent's selected engine/plugin row. The outer suite runner passes + # exactly one plugin path for this single-engine subprocess. suite_config = SuiteConfig( warmup_iters=args.warmup, benchmark_iters=args.iters, @@ -88,11 +95,11 @@ def run_internal_profiling(args: argparse.Namespace) -> int: reference_provider="none", verbose=False, metrics=MetricsConfig(tier="off"), - plugin_path=args.plugin_path, + plugin_paths=[plugin_path] if plugin_path is not None else None, ) try: - result = _run_single_provider_engine( + result = run_single_provider_engine( graph_path=graph_path, graph_json_str=json.dumps(graph_json), graph_name=graph_json.get("name", graph_path.stem), @@ -101,6 +108,7 @@ def run_internal_profiling(args: argparse.Namespace) -> int: handle=handle, provider="profiling-inner", engine_id=engine_id, + plugin_path=plugin_path, ref_provider=None, validation_requested=False, graph_json=graph_json, diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/main.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/main.py index 0f0b4f12799c..28f1f63d5cff 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/main.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/main.py @@ -26,7 +26,6 @@ from ..common.exceptions import GraphLoadError from ..reporting.reporter import Reporter -from .ab_runner_cli import run_ab_cli from .internal_profiling import run_internal_profiling from .parser import create_parser from .pytorch_runner_cli import run_pytorch_cli @@ -82,15 +81,7 @@ def main() -> int: return 1 try: - if args.AId is not None or args.BId is not None: - if len(resolved_files) > 1: - reporter.print_error( - "A/B testing requires a single graph file, not a glob pattern" - ) - return 1 - return run_ab_cli(args, Path(resolved_files[0]), reporter) - - elif args.backend == "pytorch": + if args.backend == "pytorch": if len(resolved_files) > 1: reporter.print_error( "Suite mode is not supported with --backend pytorch" diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/parser.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/parser.py index c90e91d64716..4ee19973866b 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/parser.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/parser.py @@ -13,14 +13,16 @@ def _parse_engine_list(s: str) -> List[int]: Engine IDs are deterministic FNV-1a hashes of the engine name and may be negative when interpreted as signed int64, so we accept any int. - Duplicates are removed while preserving first-seen order. + Duplicate IDs are preserved because each comma-delimited entry is an + ordered execution selection; this allows comparing the same engine ID + from different plugin paths. Examples: "1" -> [1] "1,2,3" -> [1, 2, 3] "1, 2" -> [1, 2] - "1,1,2" -> [1, 2] - "3,1,3,2" -> [3, 1, 2] + "1,1,2" -> [1, 1, 2] + "3,1,3,2" -> [3, 1, 3, 2] "-4567890123456789012" -> [-4567890123456789012] """ parts = [p.strip() for p in s.split(",")] @@ -31,14 +33,16 @@ def _parse_engine_list(s: str) -> List[int]: ids = [int(p) for p in parts] except ValueError: raise argparse.ArgumentTypeError(f"--engine expects integer ID(s), got {s!r}") - # Deduplicate while preserving first-seen order - seen: set = set() - deduped: List[int] = [] - for i in ids: - if i not in seen: - seen.add(i) - deduped.append(i) - return deduped + return ids + + +def _parse_plugin_path_list(s: str) -> List[Path]: + """Parse --plugin-path as a comma-separated list of plugin directories.""" + parts = [p.strip() for p in s.split(",")] + parts = [p for p in parts if p] + if not parts: + raise argparse.ArgumentTypeError("--plugin-path requires at least one path") + return [Path(p) for p in parts] def create_parser() -> argparse.ArgumentParser: @@ -61,7 +65,7 @@ def create_parser() -> argparse.ArgumentParser: dnn-benchmark --graph ./graphs/conv1_fwd.json --warmup 20 --iters 200 dnn-benchmark -g ./graphs/conv1_fwd.json -e 1 dnn-benchmark -g ./graphs/conv1_fwd.json -v # verbose per-engine output - dnn-benchmark -g ./graphs/conv1_fwd.json -e 1,2 # compare engines 1 and 2 + dnn-benchmark -g ./graphs/conv1_fwd.json -e 1,2 PyTorch Backend (GPU via PyTorch): dnn-benchmark -g ./graph.json --backend pytorch @@ -71,9 +75,13 @@ def create_parser() -> argparse.ArgumentParser: dnn-benchmark -g ./graph.json --validate pytorch dnn-benchmark -g ./graph.json --validate pytorch --rtol 1e-3 -A/B Testing: - dnn-benchmark -g ./graph.json --AId 1 --BId 2 - dnn-benchmark -g ./graph.json --APath /path/pluginA --AId 1 --BPath /path/pluginB --BId 2 +Engine Comparison: + dnn-benchmark -g ./graph.json --engine 1,2,3 + dnn-benchmark -g ./graph.json --engine 1,2 --plugin-path /path/pluginA,/path/pluginB + +Engine IDs: + hipdnn_list_engines --plugin-dir /path/to/hipdnn_plugins/engines + (shipped with hipDNN tools, e.g. /opt/rocm/bin/hipdnn_list_engines) Suite Mode (multiple graphs): dnn-benchmark -g graphs/ # all .json/.tar.gz files in directory @@ -166,38 +174,8 @@ def create_parser() -> argparse.ArgumentParser: "(default: summary table)", ) - # A/B Testing arguments - ab_group = parser.add_argument_group("A/B Testing") - ab_group.add_argument( - "--APath", - type=Path, - default=None, - metavar="PATH", - help="Plugin path for configuration A (default: use system default)", - ) - ab_group.add_argument( - "--AId", - type=int, - default=None, - metavar="ID", - help="Engine ID for configuration A", - ) - ab_group.add_argument( - "--BPath", - type=Path, - default=None, - metavar="PATH", - help="Plugin path for configuration B (default: use system default)", - ) - ab_group.add_argument( - "--BId", - type=int, - default=None, - metavar="ID", - help="Engine ID for configuration B", - ) - # Comparison tolerances (used by A/B testing, validation, and suite mode) - comparison_group = parser.add_argument_group("Comparison") + # Reference comparison tolerances + comparison_group = parser.add_argument_group("Reference Comparison") comparison_group.add_argument( "--rtol", type=float, @@ -229,10 +207,14 @@ def create_parser() -> argparse.ArgumentParser: suite_group = parser.add_argument_group("Suite Options") suite_group.add_argument( "--plugin-path", - type=Path, + type=_parse_plugin_path_list, default=None, - metavar="DIR", - help="Path to directory containing hipDNN engine plugin .so files", + metavar="PATHS", + help=( + "Directory containing hipDNN engine plugin .so files, or a " + "comma-separated list matching --engine order. A single path is " + "shared by all selected engines." + ), ) # Metrics options diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/suite_runner_cli.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/suite_runner_cli.py index 35c14ec24f30..bb3746bb9e76 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/suite_runner_cli.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/suite_runner_cli.py @@ -9,7 +9,7 @@ from ..common.exceptions import ExecutionError, GraphLoadError from ..config.benchmark_config import MetricsConfig, SuiteConfig -from ..execution.suite_runner import run_graph_all_providers +from ..execution.suite_runner import run_graph_all_providers, set_plugin_path from ..graph.loader import GraphLoader from ..reporting.reporter import Reporter from ..reporting.suite_results import ( @@ -61,7 +61,6 @@ def run_suite_benchmark( graph_paths: List[Path], config: SuiteConfig, output_path: Optional[Path], - plugin_path: Optional[Path], reporter: Reporter, tarball_source: Optional[str] = None, ) -> int: @@ -71,7 +70,6 @@ def run_suite_benchmark( graph_paths: List of resolved graph file paths to benchmark. config: Suite configuration. output_path: Optional path to export results as JSON. - plugin_path: Optional path to plugin .so directory. reporter: Reporter instance for console output. tarball_source: Optional tarball source path for display. @@ -105,9 +103,14 @@ def run_suite_benchmark( try: import hipdnn_frontend as hipdnn - if plugin_path is not None: - hipdnn.set_engine_plugin_paths([str(plugin_path)]) - handle = hipdnn.Handle() + plugin_paths = config.plugin_paths + per_engine_plugin_paths = plugin_paths is not None and len(plugin_paths) > 1 + + if not per_engine_plugin_paths: + set_plugin_path(hipdnn, config.plugin_path) + handle = hipdnn.Handle() + else: + handle = None except ImportError: reporter.print_hipdnn_init_newline() reporter.print_error( @@ -131,6 +134,8 @@ def run_suite_benchmark( reporter.print_no_engines_applicable() if config.verbose: reporter.print_verbose_graph_result(gr, config) + else: + reporter.print_graph_result_table(gr) graph_results.append(gr) suite_result = SuiteResult.from_graph_results(graph_results, total_graphs=total) @@ -179,6 +184,7 @@ def run_suite_cli( "source requested (--pmc, --emit-trace, --perf, " "--roofline); the directory will not be written to" ) + plugin_paths = args.plugin_path config = SuiteConfig( warmup_iters=args.warmup, benchmark_iters=args.iters, @@ -190,7 +196,7 @@ def run_suite_cli( reference_provider=args.validate, verbose=args.verbose, metrics=metrics_config, - plugin_path=args.plugin_path, + plugin_paths=plugin_paths, ) except ValueError as e: reporter.print_error(f"Suite configuration error: {e}") @@ -200,7 +206,6 @@ def run_suite_cli( graph_paths=graph_paths, config=config, output_path=args.output, - plugin_path=args.plugin_path, reporter=reporter, tarball_source=tarball_source, ) diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/__init__.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/__init__.py index 1e97d877f697..021519940bd9 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/__init__.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/__init__.py @@ -3,6 +3,18 @@ """Configuration module for dnn-benchmarking.""" -from .benchmark_config import ABTestConfig, BenchmarkConfig, ValidationConfig +from .benchmark_config import ( + BenchmarkConfig, + EngineSelection, + MetricsConfig, + SuiteConfig, + ValidationConfig, +) -__all__ = ["ABTestConfig", "BenchmarkConfig", "ValidationConfig"] +__all__ = [ + "BenchmarkConfig", + "EngineSelection", + "MetricsConfig", + "SuiteConfig", + "ValidationConfig", +] diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/benchmark_config.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/benchmark_config.py index f98f0d42a873..c430b3b61fd9 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/benchmark_config.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/benchmark_config.py @@ -41,52 +41,6 @@ def __post_init__(self) -> None: raise ValueError("benchmark_iters must be positive") -@dataclass -class ABTestConfig: - """Configuration for A/B testing mode. - - Attributes: - a_path: Plugin path for configuration A (None = default). - a_id: Engine ID for configuration A. - b_path: Plugin path for configuration B (None = default). - b_id: Engine ID for configuration B. - rtol: Relative tolerance for np.allclose comparison. - atol: Absolute tolerance for np.allclose comparison. - """ - - a_path: Optional[Path] = None - a_id: int = 1 - b_path: Optional[Path] = None - b_id: int = 1 - rtol: float = 1e-5 - atol: float = 1e-8 - - def __post_init__(self) -> None: - """Validate configuration values.""" - if isinstance(self.a_path, str): - self.a_path = Path(self.a_path) - if isinstance(self.b_path, str): - self.b_path = Path(self.b_path) - - # a_id / b_id are FNV-1a engine ID hashes that may be negative when - # interpreted as signed int64; do not bound-check them. - if self.rtol < 0: - raise ValueError("rtol must be non-negative") - if self.atol < 0: - raise ValueError("atol must be non-negative") - - def validate_paths(self) -> None: - """Validate that plugin paths exist if specified. - - Raises: - ValueError: If a specified path does not exist. - """ - if self.a_path is not None and not self.a_path.exists(): - raise ValueError(f"Plugin path A does not exist: {self.a_path}") - if self.b_path is not None and not self.b_path.exists(): - raise ValueError(f"Plugin path B does not exist: {self.b_path}") - - @dataclass class ValidationConfig: """Configuration for reference validation. @@ -266,6 +220,19 @@ def extra_runs_per_engine(self) -> int: ) +@dataclass(frozen=True) +class EngineSelection: + """One ordered engine execution selection. + + The plugin path is attached to the selection row rather than looked up by + engine ID so repeated engine IDs can be benchmarked against different + plugin builds. + """ + + engine_id: int + plugin_path: Optional[Path] = None + + @dataclass class SuiteConfig: """Configuration for suite execution mode. @@ -277,7 +244,7 @@ class SuiteConfig: warmup_iters: Number of warmup iterations per provider/engine. benchmark_iters: Number of benchmark iterations for timing. seed: Optional random seed for reproducible inputs. - engine_filter: If set, only iterate engine IDs in this list. + engine_filter: If set, ordered engine selections to run. rtol: Relative tolerance for correctness comparison. atol: Absolute tolerance for correctness comparison. gpu_backend: GPU timer backend to use. @@ -297,10 +264,7 @@ class SuiteConfig: reference_provider: str = "none" verbose: bool = False metrics: MetricsConfig = field(default_factory=MetricsConfig) - # Forwarded to the orchestrator's inner subprocess so the child - # picks up the same plugin .so directory the parent loaded. Not used - # outside of the opt-in profiling path. - plugin_path: Optional[Path] = None + plugin_paths: Optional[List[Path]] = None def __post_init__(self) -> None: """Validate configuration values.""" @@ -316,6 +280,20 @@ def __post_init__(self) -> None: if len(self.engine_filter) == 0: raise ValueError("engine_filter must be non-empty when set") # engine IDs are FNV-1a hashes -- may be negative as signed int64. + if self.plugin_paths is not None: + if len(self.plugin_paths) == 0: + raise ValueError("plugin_paths must be non-empty when set") + self.plugin_paths = [Path(p) for p in self.plugin_paths] + + if len(self.plugin_paths) > 1: + if self.engine_filter is None: + raise ValueError( + "--plugin-path with multiple entries requires --engine" + ) + if len(self.plugin_paths) != len(self.engine_filter): + raise ValueError( + "--plugin-path entry count must be 1 or match --engine count" + ) valid_gpu_backends = {"torch", "auto", "none"} if self.gpu_backend not in valid_gpu_backends: raise ValueError( @@ -328,3 +306,38 @@ def __post_init__(self) -> None: f"Invalid reference_provider: '{self.reference_provider}'. " f"Valid options: {valid_reference_providers}" ) + + @property + def plugin_path(self) -> Optional[Path]: + """Return the shared plugin path when exactly one path is configured.""" + if self.plugin_paths is None or len(self.plugin_paths) != 1: + return None + return self.plugin_paths[0] + + def engine_selections_for(self, engine_ids: List[int]) -> List[EngineSelection]: + """Return ordered engine selections for the provided engine IDs. + + ``engine_ids`` is either the explicit ``--engine`` list, where duplicate + IDs are meaningful selections, or the backend-discovered engine list. + Multiple plugin paths are only valid with an explicit engine list and + are associated positionally with that list. + """ + if self.plugin_paths is None: + return [EngineSelection(engine_id) for engine_id in engine_ids] + + if len(self.plugin_paths) == 1: + plugin_path = self.plugin_paths[0] + return [ + EngineSelection(engine_id, plugin_path=plugin_path) + for engine_id in engine_ids + ] + + if self.engine_filter is None or len(engine_ids) != len(self.plugin_paths): + raise ValueError( + "--plugin-path entry count must be 1 or match --engine count" + ) + + return [ + EngineSelection(engine_id, plugin_path=plugin_path) + for engine_id, plugin_path in zip(engine_ids, self.plugin_paths) + ] diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/__init__.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/__init__.py index 712504d7af47..fb962112eaad 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/__init__.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/__init__.py @@ -3,7 +3,6 @@ """Execution module for dnn-benchmarking.""" -from .ab_runner import ABRunner, ABTestResult from .buffer_manager import BufferManager from .executor import Executor @@ -23,8 +22,6 @@ ) __all__ = [ - "ABRunner", - "ABTestResult", "BufferManager", "Executor", "GpuTimer", diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/ab_runner.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/ab_runner.py deleted file mode 100644 index e23d46323fb8..000000000000 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/ab_runner.py +++ /dev/null @@ -1,325 +0,0 @@ -# Copyright © Advanced Micro Devices, Inc., or its affiliates. -# SPDX-License-Identifier: MIT - -"""A/B testing runner for comparing plugin/engine configurations.""" - -import json -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, List, Literal, Optional, Tuple - -import numpy as np - -from ..common.exceptions import ExecutionError -from ..config.benchmark_config import ABTestConfig, BenchmarkConfig, ValidationConfig -from ..graph.loader import GraphLoader -from ..graph.tensor_info import TensorInfo -from ..reporting.statistics import BenchmarkResult -from ..validation.comparison import ArrayComparator, ComparisonResult -from .buffer_manager import BufferManager -from .executor import Executor - - -@dataclass -class ValidationResult: - """Result of reference validation for a single configuration. - - Attributes: - passed: Whether validation passed. - max_abs_diff: Maximum absolute difference from reference. - max_rel_diff: Maximum relative difference from reference. - provider_name: Name of the reference provider used. - """ - - passed: bool - max_abs_diff: float - max_rel_diff: float - provider_name: str - - -@dataclass -class ABTestResult: - """Result of A/B test comparison. - - Attributes: - result_a: Full benchmark result for configuration A (includes E2E and kernel timings). - result_b: Full benchmark result for configuration B (includes E2E and kernel timings). - init_time_a_ms: Graph initialization time for A in milliseconds. - init_time_b_ms: Graph initialization time for B in milliseconds. - passed: Whether outputs match within tolerance. - max_abs_diff: Maximum absolute difference between outputs. - max_rel_diff: Maximum relative difference between outputs. - validation_a: Optional reference validation result for configuration A. - validation_b: Optional reference validation result for configuration B. - """ - - result_a: BenchmarkResult - result_b: BenchmarkResult - init_time_a_ms: float - init_time_b_ms: float - passed: bool - max_abs_diff: float - max_rel_diff: float - validation_a: Optional[ValidationResult] = None - validation_b: Optional[ValidationResult] = None - - -class ABRunner: - """Runs A/B comparison between two plugin/engine configurations. - - This class handles: - - Setting plugin paths for each configuration - - Executing the same graph with different engines - - Comparing outputs using np.allclose - - Collecting timing statistics for both configurations (E2E and kernel) - - Optional reference validation for each configuration - """ - - def __init__( - self, - graph_json: Dict[str, Any], - config: BenchmarkConfig, - ab_config: ABTestConfig, - gpu_backend: Literal["torch", "auto", "none"] = "auto", - validation_config: Optional[ValidationConfig] = None, - ) -> None: - """Initialize A/B runner. - - Args: - graph_json: The graph as a parsed JSON dictionary. - config: Benchmark configuration (warmup/iters). - ab_config: A/B test configuration (paths, engine IDs, tolerances). - gpu_backend: GPU timer backend to use (torch, auto, none). - validation_config: Optional validation configuration for reference checking. - """ - self._graph_json = graph_json - self._config = config - self._ab_config = ab_config - self._gpu_backend = gpu_backend - self._validation_config = validation_config - - def _set_plugin_path(self, plugin_path: Optional[Path]) -> None: - """Set plugin path using hipdnn_frontend API. - - Args: - plugin_path: Path to plugin directory, or None for default. - """ - import hipdnn_frontend as hipdnn - - if plugin_path is not None: - # Use ABSOLUTE mode to ensure only this plugin is used - hipdnn.set_engine_plugin_paths( - [str(plugin_path)], hipdnn.PluginLoadingMode.ABSOLUTE - ) - - def _run_single( - self, - plugin_path: Optional[Path], - engine_id: int, - buffer_manager: BufferManager, - config_name: str = "", - ) -> Tuple[Dict[int, np.ndarray], BenchmarkResult, float]: - """Execute graph with specific plugin/engine configuration. - - Args: - plugin_path: Path to plugin directory, or None for default. - engine_id: Engine ID to use. - buffer_manager: Buffer manager with allocated tensors. - config_name: Name for this configuration (e.g., "A" or "B"). - - Returns: - Tuple of (outputs_dict, benchmark_result, init_time_ms) where - outputs_dict maps tensor UID to numpy array for all output tensors. - """ - import hipdnn_frontend as hipdnn - - # Set plugin path before creating Handle - self._set_plugin_path(plugin_path) - - handle = hipdnn.Handle() - executor = Executor( - json.dumps(self._graph_json), self._config, gpu_backend=self._gpu_backend - ) - executor.prepare(handle, engine_id=engine_id) - init_time_ms = executor.init_time_ms - - variant_pack = buffer_manager.create_variant_pack() - executor.warmup(handle, variant_pack) - result = executor.benchmark(handle, variant_pack, graph_name=config_name) - - # Get all output data - copy to avoid overwriting - output_tensors = buffer_manager.get_output_tensors() - if not output_tensors: - raise ExecutionError("No output tensors found in graph") - - outputs: Dict[int, np.ndarray] = {} - for tensor in output_tensors: - data = buffer_manager.get_output_data(tensor.uid) - if data is None: - raise ExecutionError( - f"Failed to retrieve output data for tensor uid={tensor.uid}" - ) - outputs[tensor.uid] = data.copy() - - return outputs, result, init_time_ms - - def _validate_output( - self, - outputs: Dict[int, np.ndarray], - tensor_infos: List[TensorInfo], - buffer_manager: BufferManager, - config_name: str, - ) -> Optional[ValidationResult]: - """Validate output against reference provider. - - Args: - outputs: Dict mapping tensor UID to output data from execution. - tensor_infos: List of tensor info objects. - buffer_manager: Buffer manager with input data. - config_name: Name of the configuration being validated. - - Returns: - ValidationResult if validation was performed, None otherwise. - """ - if self._validation_config is None or not self._validation_config.enabled: - return None - - from ..validation import ReferenceProviderRegistry - - try: - provider = ReferenceProviderRegistry.get_provider( - self._validation_config.provider - ) - - if not provider.is_available(): - return None - - if not provider.supports_graph(self._graph_json): - return None - - # Collect input data - input_data: Dict[int, np.ndarray] = {} - for tensor_info in tensor_infos: - if not tensor_info.is_virtual and not tensor_info.is_output: - data = buffer_manager.get_input_data(tensor_info.uid) - if data is not None: - input_data[tensor_info.uid] = data - - # Compute reference - reference_outputs = provider.compute_reference(self._graph_json, input_data) - - # Compare all output tensors, track worst-case diffs - comparator = ArrayComparator( - rtol=self._validation_config.rtol, atol=self._validation_config.atol - ) - all_passed = True - worst_abs = 0.0 - worst_rel = 0.0 - - for uid, output_data in outputs.items(): - ref_output = reference_outputs.get(uid) - if ref_output is None: - continue - comparison = comparator.compare( - output_data, - ref_output.data, - config_name, - self._validation_config.provider, - ) - if not comparison.passed: - all_passed = False - worst_abs = max(worst_abs, comparison.max_abs_diff) - worst_rel = max(worst_rel, comparison.max_rel_diff) - - return ValidationResult( - passed=all_passed, - max_abs_diff=worst_abs, - max_rel_diff=worst_rel, - provider_name=self._validation_config.provider, - ) - - except (ValueError, NotImplementedError, ImportError): - return None - - def run(self, seed: Optional[int] = 42) -> ABTestResult: - """Run A/B comparison. - - Args: - seed: Random seed for reproducible input data. - - Returns: - ABTestResult with full benchmark results and comparison. - """ - loader = GraphLoader() - tensor_infos = loader.extract_tensor_info(self._graph_json) - - validation_a: Optional[ValidationResult] = None - validation_b: Optional[ValidationResult] = None - - with BufferManager(tensor_infos) as buffer_manager: - buffer_manager.allocate_all() - buffer_manager.fill_inputs_random(seed=seed) - - # Run configuration A - buffer_manager.zero_outputs() - outputs_a, result_a, init_a = self._run_single( - self._ab_config.a_path, self._ab_config.a_id, buffer_manager, "A" - ) - - # Validate A if configured - validation_a = self._validate_output( - outputs_a, tensor_infos, buffer_manager, "A" - ) - - # Synchronize GPU to ensure Config A's work is complete before B starts - # This prevents stream state contamination in timing measurements - try: - import torch - - if torch.cuda.is_available(): - torch.cuda.synchronize() - except ImportError: - pass - - # Run configuration B (same inputs) - buffer_manager.zero_outputs() - outputs_b, result_b, init_b = self._run_single( - self._ab_config.b_path, self._ab_config.b_id, buffer_manager, "B" - ) - - # Validate B if configured - validation_b = self._validate_output( - outputs_b, tensor_infos, buffer_manager, "B" - ) - - # Compare all output tensors, track worst-case diffs across all outputs - comparator = ArrayComparator( - rtol=self._ab_config.rtol, atol=self._ab_config.atol - ) - all_passed = True - worst_abs = 0.0 - worst_rel = 0.0 - - for uid in outputs_a: - if uid not in outputs_b: - all_passed = False - worst_abs = float("inf") - worst_rel = float("inf") - continue - comparison = comparator.compare(outputs_a[uid], outputs_b[uid], "A", "B") - if not comparison.passed: - all_passed = False - worst_abs = max(worst_abs, comparison.max_abs_diff) - worst_rel = max(worst_rel, comparison.max_rel_diff) - - return ABTestResult( - result_a=result_a, - result_b=result_b, - init_time_a_ms=init_a, - init_time_b_ms=init_b, - passed=all_passed, - max_abs_diff=worst_abs, - max_rel_diff=worst_rel, - validation_a=validation_a, - validation_b=validation_b, - ) diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/suite_runner.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/suite_runner.py index 4325852b432a..a246fee5f693 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/suite_runner.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/suite_runner.py @@ -3,9 +3,9 @@ """Suite runner for per-graph engine iteration with granular timing. -Iterates the engine IDs discovered for a graph via -``Graph.get_ranked_engine_ids`` (real runtime discovery, no hardcoded engine -lists). For each engine, captures separated CPU build time, GPU kernel time, +Uses explicit ``--engine`` IDs in caller order when provided; otherwise +discovers ranked engine IDs for the graph via ``Graph.get_ranked_engine_ids``. +For each engine, captures separated CPU build time, GPU kernel time, and E2E wall-clock time. Performs correctness validation by comparing GPU output against a reference provider via ArrayComparator. """ @@ -78,6 +78,39 @@ def _resolve_engine_name(engine_id: int) -> str: return f"engine_{engine_id:#x}" +def set_plugin_path( + hipdnn: Any, plugin_path: Optional[Path], loading_mode: Optional[Any] = None +) -> None: + """Set the process-wide hipDNN plugin search path for the next handle.""" + if plugin_path is None: + return + paths = [str(plugin_path)] + if loading_mode is None: + hipdnn.set_engine_plugin_paths(paths) + else: + hipdnn.set_engine_plugin_paths(paths, loading_mode) + + +def _engine_setup_error_result( + provider: str, + engine_id: int, + plugin_path: Optional[Path], + config: SuiteConfig, + error_message: str, +) -> ProviderEngineResult: + """Build a per-engine error row for plugin-path/handle setup failures.""" + return ProviderEngineResult( + provider=provider, + engine_id=engine_id, + status="error", + plugin_path=str(plugin_path) if plugin_path is not None else None, + error_message=error_message, + correctness=CorrectnessResult.failed( + rtol=config.rtol, atol=config.atol, error_message=error_message + ), + ) + + def _get_reference_provider( config: SuiteConfig, graph_json: Dict[str, Any] ) -> Optional[ReferenceProvider]: @@ -251,59 +284,64 @@ def run_graph_all_providers( validation_requested = config.reference_provider != "none" - # Discover engines via real backend heuristics. A discovery failure - # is a graph-level error (record it and stop iterating engines), but - # "no engine configurations available" / "not supported" messages are - # really an unsupported-graph signal -- record as skipped so the - # suite exit code stays 0 when nothing is wrong, just nothing to run. - discovery_config = BenchmarkConfig( - graph_path=graph_path, - warmup_iters=config.warmup_iters, - benchmark_iters=config.benchmark_iters, - ) - try: - discovery_executor = Executor( - graph_json_str=graph_json_str, - config=discovery_config, - gpu_backend=config.gpu_backend, - ) - engine_ids = discovery_executor.discover_engines(handle) - except UnsupportedGraphError as e: - return GraphResult( - graph_name=graph_name, - graph_path=str(graph_path), - results=[ - ProviderEngineResult( - provider="unknown", - engine_id=0, - status="skipped", - skip_reason=str(e), - correctness=CorrectnessResult.failed( - rtol=config.rtol, atol=config.atol, error_message=str(e) - ), - ) - ], - ) - except (ExecutionError, RuntimeError) as e: - msg = str(e) - return GraphResult( - graph_name=graph_name, - graph_path=str(graph_path), - results=[ - ProviderEngineResult( - provider="unknown", - engine_id=0, - status="error", - error_message=f"Engine discovery failed: {msg}", - correctness=CorrectnessResult.failed( - rtol=config.rtol, atol=config.atol, error_message=msg - ), - ) - ], + if config.engine_filter is not None: + # Explicit --engine is a selection, not a post-discovery filter. Keep the + # caller's order so per-engine plugin paths are deterministic. + engine_ids = list(config.engine_filter) + else: + # Discover engines via real backend heuristics. A discovery failure is a + # graph-level error (record it and stop iterating engines), but "no + # engine configurations available" / "not supported" messages are + # really an unsupported-graph signal. + discovery_config = BenchmarkConfig( + graph_path=graph_path, + warmup_iters=config.warmup_iters, + benchmark_iters=config.benchmark_iters, ) + try: + if handle is None: + import hipdnn_frontend as hipdnn - if config.engine_filter is not None: - engine_ids = [e for e in engine_ids if e in config.engine_filter] + handle = hipdnn.Handle() + discovery_executor = Executor( + graph_json_str=graph_json_str, + config=discovery_config, + gpu_backend=config.gpu_backend, + ) + engine_ids = discovery_executor.discover_engines(handle) + except UnsupportedGraphError as e: + return GraphResult( + graph_name=graph_name, + graph_path=str(graph_path), + results=[ + ProviderEngineResult( + provider="unknown", + engine_id=0, + status="skipped", + skip_reason=str(e), + correctness=CorrectnessResult.failed( + rtol=config.rtol, atol=config.atol, error_message=str(e) + ), + ) + ], + ) + except (ExecutionError, RuntimeError) as e: + msg = str(e) + return GraphResult( + graph_name=graph_name, + graph_path=str(graph_path), + results=[ + ProviderEngineResult( + provider="unknown", + engine_id=0, + status="error", + error_message=f"Engine discovery failed: {msg}", + correctness=CorrectnessResult.failed( + rtol=config.rtol, atol=config.atol, error_message=msg + ), + ) + ], + ) if not engine_ids: return GraphResult( @@ -317,12 +355,12 @@ def run_graph_all_providers( error_message=( "No engines discovered for graph" if config.engine_filter is None - else "No discovered engines matched --engine filter" + else "No engines selected for graph" ), ) ], ) - + engine_selections = config.engine_selections_for(engine_ids) ref_provider = _get_reference_provider(config, graph_json) # Compute analytical metrics once per graph — they're a function of @@ -343,20 +381,50 @@ def run_graph_all_providers( warn_once("analytical", f"compute_io_bytes failed for {graph_name}: {e}") pe_results: List[ProviderEngineResult] = [] - for engine_id in engine_ids: + for selection in engine_selections: + engine_id = selection.engine_id + engine_plugin_path = selection.plugin_path engine_name = _resolve_engine_name(engine_id) - if reporter is not None: - reporter.print_engine_start(engine_name) + engine_handle = handle with Timer() as t: - pe_result = _run_single_provider_engine( + if engine_handle is None: + try: + import hipdnn_frontend as hipdnn + + set_plugin_path( + hipdnn, + engine_plugin_path, + hipdnn.PluginLoadingMode.ABSOLUTE, + ) + engine_handle = hipdnn.Handle() + except (ImportError, RuntimeError, ValueError, OSError) as e: + pe_result = _engine_setup_error_result( + provider=engine_name, + engine_id=engine_id, + plugin_path=engine_plugin_path, + config=config, + error_message=str(e), + ) + pe_result.elapsed_time_ms = t.elapsed_ms + if reporter is not None: + reporter.print_engine_start(engine_name) + reporter.print_engine_result(pe_result) + pe_results.append(pe_result) + continue + + if reporter is not None: + reporter.print_engine_start(engine_name) + + pe_result = run_single_provider_engine( graph_path=graph_path, graph_json_str=graph_json_str, graph_name=graph_name, tensor_infos=tensor_infos, config=config, - handle=handle, + handle=engine_handle, provider=engine_name, engine_id=engine_id, + plugin_path=engine_plugin_path, ref_provider=ref_provider, validation_requested=validation_requested, graph_json=graph_json, @@ -365,6 +433,8 @@ def run_graph_all_providers( analytical_io_bytes=analytical_io_bytes, ) pe_result.elapsed_time_ms = t.elapsed_ms + if engine_plugin_path is not None: + pe_result.plugin_path = str(engine_plugin_path) if reporter is not None: reporter.print_engine_result(pe_result) pe_results.append(pe_result) @@ -388,7 +458,7 @@ def _collect_basic_metrics_post_loop( """Populate the basic always-on metric fields on ``result``. Called once after the timed loop when ``metrics.tier == "basic"``. - Pulled out of :func:`_run_single_provider_engine` to keep that + Pulled out of :func:`run_single_provider_engine` to keep that function focused on the timed loop itself; the basic-tier book keeping is otherwise just a long sequence of conditionals on intermediate results. @@ -437,7 +507,7 @@ def _collect_basic_metrics_post_loop( warn_once("gpu_smi", f"vram snapshot failed: {e}") -def _run_single_provider_engine( +def run_single_provider_engine( graph_path: Path, graph_json_str: str, graph_name: str, @@ -446,6 +516,7 @@ def _run_single_provider_engine( handle: Any, provider: str, engine_id: int, + plugin_path: Optional[Path], ref_provider: Optional[ReferenceProvider], validation_requested: bool, graph_json: Dict[str, Any], @@ -586,7 +657,7 @@ def _run_single_provider_engine( warmup_iters=config.warmup_iters, benchmark_iters=config.benchmark_iters, metrics_config=config.metrics, - plugin_path=config.plugin_path, + plugin_path=plugin_path, ) if extra: result.extra_metrics = extra diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graph/resolver.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graph/resolver.py index 9a20c77feb53..321b1ccfc46f 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graph/resolver.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graph/resolver.py @@ -41,7 +41,9 @@ def extract_tarball(tarball_path: str) -> Tuple[tempfile.TemporaryDirectory, Lis tmpdir = tempfile.TemporaryDirectory(prefix="dnn_benchmarking_") try: with tarfile.open(tarball_path) as tf: - json_members = [m for m in tf.getmembers() if m.name.endswith(".json")] + json_members = [ + m for m in tf.getmembers() if m.name.endswith(".json") and m.isfile() + ] if not json_members: raise GraphLoadError(f"No .json files found in tarball: {tarball_path}") tf.extractall(path=tmpdir.name, members=json_members, filter="data") diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/reporter.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/reporter.py index 53633ab976ed..698c07c9be71 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/reporter.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/reporter.py @@ -5,9 +5,9 @@ import sys from pathlib import Path -from typing import Any, List, Optional, TextIO +from typing import List, Optional, TextIO -from ..config.benchmark_config import ABTestConfig, BenchmarkConfig, SuiteConfig +from ..config.benchmark_config import BenchmarkConfig, SuiteConfig from .statistics import BenchmarkStats, CombinedBenchmarkStats from .suite_results import ( CorrectnessResult, @@ -127,6 +127,7 @@ def _print_stats_block(self, stats: BenchmarkStats) -> None: stats: Benchmark statistics. """ self._print(f" Mean: {stats.mean_ms:.3f} ms") + self._print(f" Median: {stats.median_ms:.3f} ms") self._print(f" Std Dev: {stats.std_ms:.3f} ms") self._print(f" Min: {stats.min_ms:.3f} ms") self._print(f" Max: {stats.max_ms:.3f} ms") @@ -182,242 +183,6 @@ def _print_line(self, char: str) -> None: """ print(char * self.WIDTH, file=self._output) - # A/B Testing Methods - - def print_ab_header( - self, config: BenchmarkConfig, ab_config: ABTestConfig, graph_name: str - ) -> None: - """Print A/B test configuration header. - - Args: - config: Benchmark configuration. - ab_config: A/B test configuration. - graph_name: Name of the graph being benchmarked. - """ - self._print_line("=") - self._print(f"hipDNN A/B Test: {graph_name}") - self._print_line("=") - self._print(f"Graph: {config.graph_path}") - self._print(f"Warmup: {config.warmup_iters} iterations") - self._print(f"Benchmark: {config.benchmark_iters} iterations") - self._print_line("-") - self._print("Configuration A:") - if ab_config.a_path: - self._print(f" Plugin Path: {ab_config.a_path}") - else: - self._print(" Plugin Path: (default)") - self._print(f" Engine ID: {ab_config.a_id}") - self._print("Configuration B:") - if ab_config.b_path: - self._print(f" Plugin Path: {ab_config.b_path}") - else: - self._print(" Plugin Path: (default)") - self._print(f" Engine ID: {ab_config.b_id}") - self._print_line("-") - self._print("") - - def print_ab_stats( - self, - stats_a: BenchmarkStats, - stats_b: BenchmarkStats, - init_time_a_ms: float, - init_time_b_ms: float, - ) -> None: - """Print side-by-side comparison of A vs B statistics. - - Args: - stats_a: Statistics for configuration A. - stats_b: Statistics for configuration B. - init_time_a_ms: Init time for A in milliseconds. - init_time_b_ms: Init time for B in milliseconds. - """ - # Header - self._print(f"{'':20} {'A':>15} {'B':>15}") - self._print_line("-") - - # Init times - self._print( - f"{'Init Time:':20} {init_time_a_ms:>12.2f} ms {init_time_b_ms:>12.2f} ms" - ) - - # Execution stats - self._print( - f"{'Mean:':20} {stats_a.mean_ms:>12.3f} ms {stats_b.mean_ms:>12.3f} ms" - ) - self._print( - f"{'Std Dev:':20} {stats_a.std_ms:>12.3f} ms {stats_b.std_ms:>12.3f} ms" - ) - self._print( - f"{'Min:':20} {stats_a.min_ms:>12.3f} ms {stats_b.min_ms:>12.3f} ms" - ) - self._print( - f"{'Max:':20} {stats_a.max_ms:>12.3f} ms {stats_b.max_ms:>12.3f} ms" - ) - self._print( - f"{'P95:':20} {stats_a.p95_ms:>12.3f} ms {stats_b.p95_ms:>12.3f} ms" - ) - self._print( - f"{'P99:':20} {stats_a.p99_ms:>12.3f} ms {stats_b.p99_ms:>12.3f} ms" - ) - self._print_line("-") - - # Calculate speedup - if stats_a.mean_ms > 0 and stats_b.mean_ms > 0: - if stats_a.mean_ms > stats_b.mean_ms: - speedup = (stats_a.mean_ms - stats_b.mean_ms) / stats_a.mean_ms * 100 - self._print(f"Speedup: B is {speedup:.1f}% faster") - elif stats_b.mean_ms > stats_a.mean_ms: - speedup = (stats_b.mean_ms - stats_a.mean_ms) / stats_b.mean_ms * 100 - self._print(f"Speedup: A is {speedup:.1f}% faster") - else: - self._print("Speedup: A and B are equal") - - self._print("") - - def print_ab_combined_stats( - self, - stats_a: CombinedBenchmarkStats, - stats_b: CombinedBenchmarkStats, - init_time_a_ms: float, - init_time_b_ms: float, - ) -> None: - """Print side-by-side comparison of A vs B with both E2E and kernel stats. - - Args: - stats_a: Combined statistics for configuration A. - stats_b: Combined statistics for configuration B. - init_time_a_ms: Init time for A in milliseconds. - init_time_b_ms: Init time for B in milliseconds. - """ - # E2E Stats section - self._print("E2E Execution Statistics:") - self._print(f"{'':20} {'A':>15} {'B':>15}") - self._print_line("-") - - # Init times - self._print( - f"{'Init Time:':20} {init_time_a_ms:>12.2f} ms {init_time_b_ms:>12.2f} ms" - ) - - # E2E execution stats - self._print_ab_stats_block(stats_a.e2e_stats, stats_b.e2e_stats) - self._print("") - - # Kernel Stats section (if available) - if stats_a.kernel_stats and stats_b.kernel_stats: - self._print("Kernel Execution Statistics:") - self._print(f"{'':20} {'A':>15} {'B':>15}") - self._print_line("-") - self._print_ab_stats_block(stats_a.kernel_stats, stats_b.kernel_stats) - self._print("") - - # Calculate kernel speedup - ka, kb = stats_a.kernel_stats, stats_b.kernel_stats - if ka.mean_ms > 0 and kb.mean_ms > 0: - if ka.mean_ms > kb.mean_ms: - speedup = (ka.mean_ms - kb.mean_ms) / ka.mean_ms * 100 - self._print(f"Kernel Speedup: B is {speedup:.1f}% faster") - elif kb.mean_ms > ka.mean_ms: - speedup = (kb.mean_ms - ka.mean_ms) / kb.mean_ms * 100 - self._print(f"Kernel Speedup: A is {speedup:.1f}% faster") - else: - self._print("Kernel Speedup: A and B are equal") - self._print("") - else: - self._print("Kernel Timing: Not available") - self._print("") - - def _print_ab_stats_block( - self, stats_a: BenchmarkStats, stats_b: BenchmarkStats - ) -> None: - """Print a side-by-side statistics block for A/B comparison. - - Args: - stats_a: Statistics for configuration A. - stats_b: Statistics for configuration B. - """ - self._print( - f"{'Mean:':20} {stats_a.mean_ms:>12.3f} ms {stats_b.mean_ms:>12.3f} ms" - ) - self._print( - f"{'Std Dev:':20} {stats_a.std_ms:>12.3f} ms {stats_b.std_ms:>12.3f} ms" - ) - self._print( - f"{'Min:':20} {stats_a.min_ms:>12.3f} ms {stats_b.min_ms:>12.3f} ms" - ) - self._print( - f"{'Max:':20} {stats_a.max_ms:>12.3f} ms {stats_b.max_ms:>12.3f} ms" - ) - self._print( - f"{'P95:':20} {stats_a.p95_ms:>12.3f} ms {stats_b.p95_ms:>12.3f} ms" - ) - self._print( - f"{'P99:':20} {stats_a.p99_ms:>12.3f} ms {stats_b.p99_ms:>12.3f} ms" - ) - - def print_ab_comparison( - self, - passed: bool, - max_abs_diff: float, - max_rel_diff: float, - rtol: float, - atol: float, - ) -> None: - """Print A/B accuracy comparison result. - - Args: - passed: Whether comparison passed. - max_abs_diff: Maximum absolute difference. - max_rel_diff: Maximum relative difference. - rtol: Relative tolerance used. - atol: Absolute tolerance used. - """ - status = "PASSED" if passed else "FAILED" - self._print(f"Accuracy Comparison: {status}") - self._print(f" (rtol={rtol:.0e}, atol={atol:.0e})") - if not passed: - self._print(f" Max abs diff: {max_abs_diff:.2e}") - self._print(f" Max rel diff: {max_rel_diff:.2e}") - - def print_ab_validation( - self, - validation_a: Optional[Any], - validation_b: Optional[Any], - rtol: float, - atol: float, - ) -> None: - """Print reference validation results for A/B test. - - Args: - validation_a: ValidationResult for configuration A, or None. - validation_b: ValidationResult for configuration B, or None. - rtol: Relative tolerance used. - atol: Absolute tolerance used. - """ - if validation_a is None and validation_b is None: - return - - self._print("") - self._print("Reference Validation:") - - if validation_a is not None: - status_a = "PASSED" if validation_a.passed else "FAILED" - self._print(f" Config A vs {validation_a.provider_name}: {status_a}") - if not validation_a.passed: - self._print(f" Max abs diff: {validation_a.max_abs_diff:.2e}") - self._print(f" Max rel diff: {validation_a.max_rel_diff:.2e}") - - if validation_b is not None: - status_b = "PASSED" if validation_b.passed else "FAILED" - self._print(f" Config B vs {validation_b.provider_name}: {status_b}") - if not validation_b.passed: - self._print(f" Max abs diff: {validation_b.max_abs_diff:.2e}") - self._print(f" Max rel diff: {validation_b.max_rel_diff:.2e}") - - self._print(f" (rtol={rtol:.0e}, atol={atol:.0e})") - - # Reference Validation Methods - # Suite Methods def print_hipdnn_init_start(self) -> None: @@ -602,6 +367,66 @@ def _pe_outcome(pe: ProviderEngineResult) -> str: return "skipped" return "errored" + def print_graph_result_table(self, graph_result: GraphResult) -> None: + """Render one compact summary row per engine for a graph.""" + if not graph_result.results: + return + + include_plugin = any(pe.plugin_path for pe in graph_result.results) + headers = ["engine", "status"] + if include_plugin: + headers.append("plugin_path") + headers.extend( + [ + "kernel_mean_ms", + "kernel_median_ms", + "e2e_mean_ms", + "e2e_median_ms", + ] + ) + rows: List[List[str]] = [] + for pe in graph_result.results: + row = [pe.provider, self._pe_status(pe)] + if include_plugin: + row.append(pe.plugin_path or "") + row.extend( + [ + self._fmt_stat(pe.gpu_kernel_stats, "mean_ms"), + self._fmt_stat(pe.gpu_kernel_stats, "median_ms"), + self._fmt_stat(pe.e2e_stats, "mean_ms"), + self._fmt_stat(pe.e2e_stats, "median_ms"), + ] + ) + rows.append(row) + + widths = [ + max(len(headers[i]), *(len(row[i]) for row in rows)) + for i in range(len(headers)) + ] + self._print("Results:") + self._print(" " + " ".join(h.ljust(widths[i]) for i, h in enumerate(headers))) + self._print(" " + " ".join("-" * width for width in widths)) + for row in rows: + self._print( + " " + " ".join(row[i].ljust(widths[i]) for i in range(len(row))) + ) + self._print("") + + @staticmethod + def _pe_status(pe: ProviderEngineResult) -> str: + if pe.status != "success": + return pe.status + if pe.correctness is not None and pe.correctness.tolerance_match is False: + return "failed" + return "passed" + + @staticmethod + def _fmt_stat(stats: Optional[BenchmarkStats], name: str) -> str: + if stats is None: + return "n/a" + value = getattr(stats, name) + return f"{value:.3f}" + def print_verbose_graph_result( self, graph_result: GraphResult, suite_config: SuiteConfig ) -> None: diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/statistics.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/statistics.py index ab365ddb8034..5ee5f7284939 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/statistics.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/statistics.py @@ -29,6 +29,7 @@ class BenchmarkStats: Attributes: mean_ms: Mean execution time in milliseconds. + median_ms: Median execution time in milliseconds. std_ms: Standard deviation of execution time in milliseconds. min_ms: Minimum execution time in milliseconds. max_ms: Maximum execution time in milliseconds. @@ -44,6 +45,7 @@ class BenchmarkStats: p95_ms: float p99_ms: float total_ms: float = 0.0 + median_ms: float = 0.0 @classmethod def from_timings(cls, timings: List[float]) -> "BenchmarkStats": @@ -65,6 +67,7 @@ def from_timings(cls, timings: List[float]) -> "BenchmarkStats": return cls( mean_ms=float(np.mean(arr)), + median_ms=float(np.median(arr)), std_ms=float(np.std(arr, ddof=1)) if len(arr) > 1 else 0.0, min_ms=float(np.min(arr)), max_ms=float(np.max(arr)), @@ -77,6 +80,7 @@ def to_dict(self) -> Dict[str, float]: """Convert to dictionary for JSON serialization.""" return { "mean_ms": self.mean_ms, + "median_ms": self.median_ms, "std_ms": self.std_ms, "min_ms": self.min_ms, "max_ms": self.max_ms, diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/suite_results.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/suite_results.py index 89858fda5a34..7484e29a23ad 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/suite_results.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/suite_results.py @@ -151,6 +151,7 @@ class ProviderEngineResult: provider: str engine_id: int status: Literal["success", "error", "skipped"] + plugin_path: Optional[str] = None cpu_build_time_ms: Optional[float] = None gpu_kernel_stats: Optional[BenchmarkStats] = None e2e_stats: Optional[BenchmarkStats] = None @@ -195,6 +196,8 @@ def to_dict(self) -> Dict[str, Any]: "engine_id": self.engine_id, "status": self.status, } + if self.plugin_path is not None: + d["plugin_path"] = self.plugin_path # extra_metrics is exclusively populated by the opt-in # profiling orchestrator, which the suite runner only fires on # the success path. Asserting the invariant here makes it @@ -207,7 +210,7 @@ def to_dict(self) -> Dict[str, Any]: f"extra_metrics is set on status={self.status!r}; " "the orchestrator only runs on success today, so this " "indicates either a new caller or a regression in the " - "success-gating in suite_runner._run_single_provider_engine" + "success-gating in suite_runner.run_single_provider_engine" ) if self.status == "success": d["cpu_build_time_ms"] = self.cpu_build_time_ms diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/comparison.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/comparison.py index ea728edb7298..64b9c6123c5c 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/comparison.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/comparison.py @@ -3,7 +3,7 @@ """Unified comparison logic for array validation. -Extracts comparison logic used by both A/B testing and reference validation. +Shared by reference validation and any direct array comparisons. """ from dataclasses import dataclass @@ -33,7 +33,7 @@ class ArrayComparator: """Compares numpy arrays with tolerance-based matching. Handles NaN/Inf detection, shape validation, and difference calculation. - Used by both A/B testing and reference validation. + Used by reference validation and direct output comparisons. """ def __init__(self, rtol: float = 1e-5, atol: float = 1e-8) -> None: diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/providers/cpu_plugin_provider.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/providers/cpu_plugin_provider.py index 4b473f76180a..840ee343320d 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/providers/cpu_plugin_provider.py +++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/providers/cpu_plugin_provider.py @@ -60,7 +60,7 @@ def compute_reference( """Compute reference using CPU plugin. This would use the same execution path as GPU but with CPU engine. - Similar to how ABRunner runs two configurations. + Similar to running the same graph through two engine selections. Args: graph_json: The graph as a parsed JSON dictionary. diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_internal_profiling.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_internal_profiling.py index fa2a41697c46..7d3377de876b 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_internal_profiling.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_internal_profiling.py @@ -4,7 +4,7 @@ """Tests for the hidden --internal-profiling-run sub-mode. The sub-mode must short-circuit gpu_check, skip Reporter output, and -delegate to suite_runner._run_single_provider_engine for the named +delegate to suite_runner.run_single_provider_engine for the named (graph, engine). These tests focus on the wiring (parser flags, quiet reporter, error paths) rather than running an actual workload. """ @@ -65,7 +65,7 @@ def test_missing_graph_or_engine_returns_error(self, capsys): class TestRunInternalProfilingSuccessPath: """Positive-path coverage. Mocks hipdnn_frontend + GraphLoader + - _run_single_provider_engine so the test stays hermetic on a CI box + run_single_provider_engine so the test stays hermetic on a CI box with no ROCm or GPU. Verifies the wiring the profiler relies on: MetricsConfig(tier='off'), plugin_path forwarding, single-engine filter, and that a success result returns rc=0.""" @@ -85,7 +85,7 @@ def _success_args(self, tmp_path, plugin_path=None): def _patch_stack(self, monkeypatch, captured): """Wire up the three external dependencies as MagicMocks. - Records the SuiteConfig that _run_single_provider_engine is + Records the SuiteConfig that run_single_provider_engine is called with so the test can assert on tier / engine_filter / plugin_path forwarding. """ @@ -109,7 +109,7 @@ def fake_run(**kwargs): result.status = "success" return result - monkeypatch.setattr(internal_profiling, "_run_single_provider_engine", fake_run) + monkeypatch.setattr(internal_profiling, "run_single_provider_engine", fake_run) def test_success_builds_tier_off_suite_config_and_returns_zero( self, tmp_path, monkeypatch @@ -139,7 +139,7 @@ def test_success_forwards_plugin_path(self, tmp_path, monkeypatch): self._patch_stack(monkeypatch, captured) plugin = tmp_path / "plugin.so" rc = internal_profiling.run_internal_profiling( - self._success_args(tmp_path, plugin_path=plugin) + self._success_args(tmp_path, plugin_path=[plugin]) ) assert rc == 0 # Two forwarding paths must both fire: set_engine_plugin_paths @@ -151,6 +151,20 @@ def test_success_forwards_plugin_path(self, tmp_path, monkeypatch): cfg: SuiteConfig = captured["run_kwargs"]["config"] assert cfg.plugin_path == plugin + def test_multiple_plugin_paths_return_error(self, tmp_path, monkeypatch, capsys): + captured: dict = {} + self._patch_stack(monkeypatch, captured) + + rc = internal_profiling.run_internal_profiling( + self._success_args( + tmp_path, + plugin_path=[tmp_path / "plugin-a", tmp_path / "plugin-b"], + ) + ) + + assert rc == 1 + assert "expected exactly one --plugin-path" in capsys.readouterr().err + def test_non_success_status_returns_one(self, tmp_path, monkeypatch, capsys): from unittest.mock import MagicMock @@ -165,7 +179,7 @@ def test_non_success_status_returns_one(self, tmp_path, monkeypatch, capsys): # in place — we only need to swap the runner. monkeypatch.setattr( internal_profiling, - "_run_single_provider_engine", + "run_single_provider_engine", lambda **kw: bad_result, ) @@ -182,7 +196,7 @@ def test_execution_exception_returns_one(self, tmp_path, monkeypatch, capsys): def raising(**kw): raise RuntimeError("kernel exploded") - monkeypatch.setattr(internal_profiling, "_run_single_provider_engine", raising) + monkeypatch.setattr(internal_profiling, "run_single_provider_engine", raising) rc = internal_profiling.run_internal_profiling(self._success_args(tmp_path)) assert rc == 1 diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_suite_cli.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_suite_cli.py index c4b1c5c1bca8..39ee83ceecc6 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_suite_cli.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_suite_cli.py @@ -80,14 +80,21 @@ def test_engine_flag_default_none(self) -> None: args = parser.parse_args(["--graph", "g.json"]) assert args.engine is None - def test_engine_flag_deduplicates_preserving_order(self) -> None: - """--engine 1,1,1 -> [1]; '3,1,3,2' -> [3, 1, 2] (first-seen order).""" + def test_engine_flag_preserves_duplicates(self) -> None: + """--engine entries are ordered execution selections, not a set.""" parser = create_parser() args = parser.parse_args(["--graph", "g.json", "--engine", "1,1,1"]) - assert args.engine == [1] + assert args.engine == [1, 1, 1] args = parser.parse_args(["--graph", "g.json", "--engine", "3,1,3,2"]) - assert args.engine == [3, 1, 2] + assert args.engine == [3, 1, 3, 2] + + def test_plugin_path_accepts_comma_separated_list(self) -> None: + parser = create_parser() + args = parser.parse_args( + ["--graph", "g.json", "--plugin-path", "/plugins/a,/plugins/b"] + ) + assert args.plugin_path == [Path("/plugins/a"), Path("/plugins/b")] def test_verbose_flag_default_false(self) -> None: """No -v / --verbose => args.verbose is False.""" @@ -214,6 +221,92 @@ def test_engine_list_propagates_to_suite_config( suite_config = mock_benchmark.call_args.kwargs["config"] assert suite_config.engine_filter == [1, 2] + @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True) + @patch("dnn_benchmarking.cli.suite_runner_cli.run_suite_benchmark") + def test_plugin_paths_propagate_to_suite_config( + self, mock_benchmark: MagicMock, mock_gpu: MagicMock + ) -> None: + mock_benchmark.return_value = 0 + + with tempfile.TemporaryDirectory() as tmpdir: + paths = self._create_graph_files(Path(tmpdir), 1) + + from dnn_benchmarking.cli.main import main + + with patch( + "sys.argv", + [ + "dnn-benchmark", + "--graph", + paths[0], + "--engine", + "2,1", + "--plugin-path", + "/plugins/b,/plugins/a", + ], + ): + main() + + suite_config = mock_benchmark.call_args.kwargs["config"] + assert suite_config.engine_filter == [2, 1] + assert suite_config.plugin_paths == [Path("/plugins/b"), Path("/plugins/a")] + + @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True) + @patch("dnn_benchmarking.cli.suite_runner_cli.run_suite_benchmark") + def test_same_engine_plugin_paths_propagate_as_ordered_selections( + self, mock_benchmark: MagicMock, mock_gpu: MagicMock + ) -> None: + mock_benchmark.return_value = 0 + + with tempfile.TemporaryDirectory() as tmpdir: + paths = self._create_graph_files(Path(tmpdir), 1) + + from dnn_benchmarking.cli.main import main + + with patch( + "sys.argv", + [ + "dnn-benchmark", + "--graph", + paths[0], + "--engine", + "1,1", + "--plugin-path", + "/plugins/a,/plugins/b", + ], + ): + main() + + suite_config = mock_benchmark.call_args.kwargs["config"] + selections = suite_config.engine_selections_for(suite_config.engine_filter) + assert suite_config.engine_filter == [1, 1] + assert [s.plugin_path for s in selections] == [ + Path("/plugins/a"), + Path("/plugins/b"), + ] + + def test_plugin_path_count_mismatch_rejected_at_cli_layer(self) -> None: + from dnn_benchmarking.cli.suite_runner_cli import run_suite_cli + + parser = create_parser() + args = parser.parse_args( + [ + "--graph", + "g.json", + "--engine", + "1,2,3", + "--plugin-path", + "/plugins/a,/plugins/b", + ] + ) + reporter = MagicMock(spec=Reporter) + + rc = run_suite_cli(args, graph_paths=[Path("g.json")], reporter=reporter) + + assert rc == 1 + reporter.print_error.assert_called_once() + assert "entry count" in reporter.print_error.call_args[0][0] + @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True) @patch("dnn_benchmarking.cli.main.run_pytorch_cli") @patch("dnn_benchmarking.cli.main.run_suite_cli") @@ -414,7 +507,6 @@ def test_all_pass_returns_zero_exit_code( graph_paths=paths, config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) @@ -445,7 +537,6 @@ def test_one_failure_still_processes_second( graph_paths=paths, config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) @@ -491,7 +582,6 @@ def test_correctness_failure_returns_two( graph_paths=paths, config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) @@ -520,7 +610,6 @@ def test_json_output_written_when_output_specified( graph_paths=paths, config=config, output_path=output_file, - plugin_path=None, reporter=Reporter(), ) @@ -554,7 +643,6 @@ def test_no_json_output_when_output_not_specified( graph_paths=paths, config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) @@ -584,7 +672,6 @@ def test_warmup_iters_passed_per_graph( graph_paths=paths, config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) @@ -623,7 +710,6 @@ def test_empty_nodes_graph_records_error_and_continues( graph_paths=paths, config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) @@ -656,7 +742,6 @@ def test_graph_load_error_continues_to_next( graph_paths=paths, config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) @@ -664,63 +749,14 @@ def test_graph_load_error_continues_to_next( assert result == 1 -class TestEngineFlagModeRejection: - """--engine list is incompatible with A/B and PyTorch single-engine modes.""" +class TestBackendEngineRouting: + """Tests for engine selection rules across execution backends.""" def _create_graph(self, tmpdir: Path) -> Path: p = tmpdir / "g.json" p.write_text(json.dumps({"name": "g", "nodes": [], "tensors": []})) return p - @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True) - def test_engine_list_with_ab_mode_rejected(self, mock_gpu: MagicMock) -> None: - from dnn_benchmarking.cli.main import main - - with tempfile.TemporaryDirectory() as tmpdir: - graph = self._create_graph(Path(tmpdir)) - with patch( - "sys.argv", - [ - "dnn-benchmark", - "--graph", - str(graph), - "--engine", - "1,2", - "--AId", - "1", - "--BId", - "2", - ], - ): - result = main() - assert result == 1 - - @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True) - def test_single_engine_with_ab_mode_also_rejected( - self, mock_gpu: MagicMock - ) -> None: - """Even a single-element --engine list is rejected in A/B (it has --AId/--BId).""" - from dnn_benchmarking.cli.main import main - - with tempfile.TemporaryDirectory() as tmpdir: - graph = self._create_graph(Path(tmpdir)) - with patch( - "sys.argv", - [ - "dnn-benchmark", - "--graph", - str(graph), - "--engine", - "5", - "--AId", - "1", - "--BId", - "2", - ], - ): - result = main() - assert result == 1 - @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True) def test_engine_list_with_pytorch_backend_rejected( self, mock_gpu: MagicMock @@ -803,7 +839,6 @@ def test_unregistered_reference_provider_fails_at_startup( graph_paths=[graph], config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) @@ -830,7 +865,6 @@ def test_unavailable_reference_provider_fails_at_startup( graph_paths=[graph], config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) @@ -931,7 +965,6 @@ def test_available_reference_provider_proceeds_to_graph_iteration( graph_paths=[graph], config=config, output_path=None, - plugin_path=None, reporter=Reporter(), ) diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/config/test_benchmark_config.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/config/test_benchmark_config.py index d695172b9c53..f9345b23567c 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/config/test_benchmark_config.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/config/test_benchmark_config.py @@ -7,7 +7,7 @@ import pytest -from dnn_benchmarking.config import ABTestConfig, BenchmarkConfig, ValidationConfig +from dnn_benchmarking.config import BenchmarkConfig, SuiteConfig, ValidationConfig class TestBenchmarkConfig: @@ -72,96 +72,61 @@ def test_negative_engine_id_accepted(self) -> None: assert config.engine_id == -4567890123456789012 -class TestABTestConfig: - """Tests for ABTestConfig dataclass.""" +class TestSuiteConfigPluginPaths: + """Tests for SuiteConfig engine/plugin path selection.""" - def test_default_values(self) -> None: - """Test that defaults are applied correctly.""" - config = ABTestConfig() - - assert config.a_path is None - assert config.a_id == 1 - assert config.b_path is None - assert config.b_id == 1 - assert config.rtol == 1e-5 - assert config.atol == 1e-8 - - def test_custom_values(self) -> None: - """Test that custom values are stored correctly.""" - config = ABTestConfig( - a_path=Path("/path/to/pluginA"), - a_id=1, - b_path=Path("/path/to/pluginB"), - b_id=2, - rtol=1e-3, - atol=1e-6, + def test_single_plugin_path_applies_to_all_engines(self) -> None: + config = SuiteConfig( + engine_filter=[1, 2], + plugin_paths=[Path("/plugins/a")], ) - - assert config.a_path == Path("/path/to/pluginA") - assert config.a_id == 1 - assert config.b_path == Path("/path/to/pluginB") - assert config.b_id == 2 - assert config.rtol == 1e-3 - assert config.atol == 1e-6 - - def test_string_path_converted_to_path(self) -> None: - """Test that string paths are converted to Path objects.""" - config = ABTestConfig( - a_path="/path/to/pluginA", # type: ignore - b_path="/path/to/pluginB", # type: ignore + selections = config.engine_selections_for([1, 2]) + + assert [s.engine_id for s in selections] == [1, 2] + assert [s.plugin_path for s in selections] == [ + Path("/plugins/a"), + Path("/plugins/a"), + ] + assert config.plugin_path == Path("/plugins/a") + + def test_multiple_plugin_paths_follow_engine_order(self) -> None: + config = SuiteConfig( + engine_filter=[2, 1], + plugin_paths=[Path("/plugins/b"), Path("/plugins/a")], + ) + selections = config.engine_selections_for([2, 1]) + + assert [s.engine_id for s in selections] == [2, 1] + assert [s.plugin_path for s in selections] == [ + Path("/plugins/b"), + Path("/plugins/a"), + ] + assert config.plugin_path is None + + def test_repeated_engine_ids_keep_distinct_plugin_paths(self) -> None: + config = SuiteConfig( + engine_filter=[1, 1], + plugin_paths=[Path("/plugins/a"), Path("/plugins/b")], ) - assert isinstance(config.a_path, Path) - assert isinstance(config.b_path, Path) - assert config.a_path == Path("/path/to/pluginA") - assert config.b_path == Path("/path/to/pluginB") + selections = config.engine_selections_for([1, 1]) - def test_negative_ids_accepted(self) -> None: - """a_id / b_id may be negative (FNV-1a engine ID hashes).""" - config = ABTestConfig(a_id=-1, b_id=-2) - assert config.a_id == -1 - assert config.b_id == -2 + assert [s.engine_id for s in selections] == [1, 1] + assert [s.plugin_path for s in selections] == [ + Path("/plugins/a"), + Path("/plugins/b"), + ] - def test_negative_rtol_raises(self) -> None: - """Test that negative rtol raises ValueError.""" - with pytest.raises(ValueError, match="rtol must be non-negative"): - ABTestConfig(rtol=-1e-5) + def test_multiple_plugin_paths_require_engine_filter(self) -> None: + with pytest.raises(ValueError, match="requires --engine"): + SuiteConfig(plugin_paths=[Path("/plugins/a"), Path("/plugins/b")]) - def test_negative_atol_raises(self) -> None: - """Test that negative atol raises ValueError.""" - with pytest.raises(ValueError, match="atol must be non-negative"): - ABTestConfig(atol=-1e-8) - - def test_validate_paths_with_existing_paths(self, tmp_path: Path) -> None: - """Test validate_paths succeeds with existing paths.""" - plugin_a = tmp_path / "pluginA" - plugin_b = tmp_path / "pluginB" - plugin_a.mkdir() - plugin_b.mkdir() - - config = ABTestConfig(a_path=plugin_a, b_path=plugin_b) - # Should not raise - config.validate_paths() - - def test_validate_paths_with_none_paths(self) -> None: - """Test validate_paths succeeds with None paths.""" - config = ABTestConfig() - # Should not raise - config.validate_paths() - - def test_validate_paths_nonexistent_a_path(self, tmp_path: Path) -> None: - """Test validate_paths raises for nonexistent a_path.""" - config = ABTestConfig(a_path=tmp_path / "nonexistent") - - with pytest.raises(ValueError, match="Plugin path A does not exist"): - config.validate_paths() - - def test_validate_paths_nonexistent_b_path(self, tmp_path: Path) -> None: - """Test validate_paths raises for nonexistent b_path.""" - config = ABTestConfig(b_path=tmp_path / "nonexistent") - - with pytest.raises(ValueError, match="Plugin path B does not exist"): - config.validate_paths() + def test_plugin_path_count_must_match_engine_count(self) -> None: + with pytest.raises(ValueError, match="entry count"): + SuiteConfig( + engine_filter=[1, 2, 3], + plugin_paths=[Path("/plugins/a"), Path("/plugins/b")], + ) class TestValidationConfig: diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/execution/test_suite_runner.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/execution/test_suite_runner.py index 940f6dfee34a..9503b9d130f9 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/execution/test_suite_runner.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/execution/test_suite_runner.py @@ -4,7 +4,7 @@ """Unit tests for suite_runner module.""" from pathlib import Path -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, call, patch import pytest @@ -333,10 +333,22 @@ def test_no_engines_unsupported_error_recorded_as_skipped(self, mock_exec_cls): assert r.status == "skipped" assert "No engine configurations" in (r.skip_reason or "") + @patch("dnn_benchmarking.execution.suite_runner._resolve_engine_name") + @patch("dnn_benchmarking.execution.suite_runner._get_reference_provider") @patch("dnn_benchmarking.execution.suite_runner.Executor") - def test_engine_filter_excludes_everything(self, mock_exec_cls): - """When engine_filter excludes every discovered engine, surface as error.""" + @patch("dnn_benchmarking.execution.suite_runner.BufferManager") + def test_engine_filter_runs_explicit_id_without_discovery( + self, + mock_bm_cls, + mock_exec_cls, + mock_get_ref, + mock_resolve_name, + ): + """Explicit --engine IDs run in CLI order without discovery filtering.""" + mock_resolve_name.side_effect = lambda eid: f"engine_{eid}" + mock_get_ref.return_value = None mock_exec_cls.side_effect = _make_exec_factory(engine_ids=[0, 1]) + mock_bm_cls.return_value = _make_bm_mock() result = run_graph_all_providers( graph_path=Path("test.json"), @@ -347,8 +359,8 @@ def test_engine_filter_excludes_everything(self, mock_exec_cls): ) assert len(result.results) == 1 - assert result.results[0].status == "error" - assert "filter" in result.results[0].error_message.lower() + assert result.results[0].status == "success" + assert result.results[0].engine_id == 99 class TestSuiteConfigValidation: @@ -454,7 +466,7 @@ def test_engine_filter_list_keeps_intersection( mock_get_ref, mock_resolve_name, ): - """engine_filter=[1, 3, 99]: engines 1 and 3 run; 99 (not discovered) is dropped.""" + """engine_filter=[1, 3, 99] runs exactly those IDs in caller order.""" mock_resolve_name.side_effect = lambda eid: f"engine_{eid}" mock_get_ref.return_value = None @@ -469,8 +481,91 @@ def test_engine_filter_list_keeps_intersection( handle=MagicMock(), ) - engine_ids = sorted(r.engine_id for r in result.results) - assert engine_ids == [1, 3] + engine_ids = [r.engine_id for r in result.results] + assert engine_ids == [1, 3, 99] + + @patch("dnn_benchmarking.execution.suite_runner._resolve_engine_name") + @patch("dnn_benchmarking.execution.suite_runner._get_reference_provider") + @patch("dnn_benchmarking.execution.suite_runner.Executor") + @patch("dnn_benchmarking.execution.suite_runner.BufferManager") + def test_same_engine_runs_with_distinct_plugin_paths( + self, + mock_bm_cls, + mock_exec_cls, + mock_get_ref, + mock_resolve_name, + ): + """Repeated engine IDs are separate ordered selections.""" + mock_resolve_name.side_effect = lambda eid: f"engine_{eid}" + mock_get_ref.return_value = None + mock_exec_cls.side_effect = _make_exec_factory(has_kernel_timings=True) + mock_bm_cls.return_value = _make_bm_mock() + hipdnn = MagicMock() + hipdnn.PluginLoadingMode.ABSOLUTE = "absolute" + hipdnn.Handle.side_effect = [MagicMock(), MagicMock()] + + with patch.dict("sys.modules", {"hipdnn_frontend": hipdnn}): + result = run_graph_all_providers( + graph_path=Path("test.json"), + graph_json=_make_graph_json(), + tensor_infos=[_make_tensor_info(1)], + config=_make_config( + engine_filter=[1, 1], + plugin_paths=[Path("/plugins/a"), Path("/plugins/b")], + ), + handle=None, + ) + + assert [r.engine_id for r in result.results] == [1, 1] + assert [r.plugin_path for r in result.results] == [ + "/plugins/a", + "/plugins/b", + ] + hipdnn.set_engine_plugin_paths.assert_has_calls( + [ + call(["/plugins/a"], "absolute"), + call(["/plugins/b"], "absolute"), + ] + ) + + @patch("dnn_benchmarking.execution.suite_runner._resolve_engine_name") + @patch("dnn_benchmarking.execution.suite_runner._get_reference_provider") + @patch("dnn_benchmarking.execution.suite_runner.Executor") + @patch("dnn_benchmarking.execution.suite_runner.BufferManager") + def test_per_engine_handle_creation_failure_records_error_result( + self, + mock_bm_cls, + mock_exec_cls, + mock_get_ref, + mock_resolve_name, + ): + """A later per-engine handle failure records an error row and continues.""" + mock_resolve_name.side_effect = lambda eid: f"engine_{eid}" + mock_get_ref.return_value = None + mock_exec_cls.side_effect = _make_exec_factory(has_kernel_timings=True) + mock_bm_cls.return_value = _make_bm_mock() + hipdnn = MagicMock() + hipdnn.PluginLoadingMode.ABSOLUTE = "absolute" + hipdnn.Handle.side_effect = [MagicMock(), RuntimeError("bad plugin")] + + with patch.dict("sys.modules", {"hipdnn_frontend": hipdnn}): + result = run_graph_all_providers( + graph_path=Path("test.json"), + graph_json=_make_graph_json(), + tensor_infos=[_make_tensor_info(1)], + config=_make_config( + engine_filter=[1, 2], + plugin_paths=[Path("/plugins/a"), Path("/plugins/b")], + ), + handle=None, + ) + + assert [r.status for r in result.results] == ["success", "error"] + assert result.results[0].plugin_path == "/plugins/a" + assert result.results[1].plugin_path == "/plugins/b" + assert "bad plugin" in (result.results[1].error_message or "") + assert result.results[1].correctness is not None + assert result.results[1].correctness.execution_success is False class TestNoRetryOnFailure: diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_reporter.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_reporter.py index ceaa11482c34..f71841d6e6a5 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_reporter.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_reporter.py @@ -6,8 +6,9 @@ import io from pathlib import Path -from dnn_benchmarking.config import ABTestConfig, BenchmarkConfig +from dnn_benchmarking.config import BenchmarkConfig from dnn_benchmarking.reporting import BenchmarkStats, Reporter +from dnn_benchmarking.reporting.suite_results import GraphResult, ProviderEngineResult class TestReporter: @@ -123,160 +124,89 @@ def test_print_error(self) -> None: assert "ERROR: Something went wrong" in result -class TestReporterAB: - """Tests for Reporter A/B testing methods.""" +class TestReporterEngineTable: + """Tests for the compact per-engine result table.""" - def test_print_ab_header(self) -> None: - """Test A/B header output format.""" + def test_print_graph_result_table_without_comparison_columns(self) -> None: output = io.StringIO() reporter = Reporter(output=output) - - config = BenchmarkConfig( - graph_path=Path("/test/graph.json"), - warmup_iters=10, - benchmark_iters=100, - ) - ab_config = ABTestConfig( - a_path=Path("/path/to/pluginA"), - a_id=1, - b_path=Path("/path/to/pluginB"), - b_id=2, + graph = GraphResult( + graph_name="g", + graph_path="/tmp/g.json", + results=[ + ProviderEngineResult( + provider="engine_1", + engine_id=1, + status="success", + gpu_kernel_stats=BenchmarkStats( + mean_ms=1.0, + median_ms=0.9, + std_ms=0.1, + min_ms=0.8, + max_ms=1.2, + p95_ms=1.1, + p99_ms=1.2, + ), + e2e_stats=BenchmarkStats( + mean_ms=2.0, + median_ms=1.8, + std_ms=0.2, + min_ms=1.6, + max_ms=2.4, + p95_ms=2.2, + p99_ms=2.4, + ), + ) + ], ) - reporter.print_ab_header(config, ab_config, "test_conv_fwd") + reporter.print_graph_result_table(graph) result = output.getvalue() - assert "hipDNN A/B Test: test_conv_fwd" in result - assert "/test/graph.json" in result - assert "Configuration A:" in result - assert "Configuration B:" in result - assert "/path/to/pluginA" in result - assert "/path/to/pluginB" in result - assert "Engine ID: 1" in result - assert "Engine ID: 2" in result - - def test_print_ab_header_default_paths(self) -> None: - """Test A/B header with default plugin paths.""" - output = io.StringIO() - reporter = Reporter(output=output) + assert "kernel_mean_ms" in result + assert "kernel_median_ms" in result + assert "e2e_mean_ms" in result + assert "e2e_median_ms" in result + assert "delta_pct" not in result - config = BenchmarkConfig( - graph_path=Path("/test/graph.json"), - ) - ab_config = ABTestConfig(a_id=1, b_id=2) - - reporter.print_ab_header(config, ab_config, "test_conv_fwd") - - result = output.getvalue() - assert "(default)" in result - - def test_print_ab_stats(self) -> None: - """Test A/B statistics output format.""" + def test_print_graph_result_table_with_plugin_path_column(self) -> None: output = io.StringIO() reporter = Reporter(output=output) - - stats_a = BenchmarkStats( - mean_ms=1.234, - std_ms=0.045, - min_ms=1.156, - max_ms=1.456, - p95_ms=1.312, - p99_ms=1.398, - ) - stats_b = BenchmarkStats( - mean_ms=1.100, - std_ms=0.035, - min_ms=1.050, - max_ms=1.200, - p95_ms=1.180, - p99_ms=1.195, + graph = GraphResult( + graph_name="g", + graph_path="/tmp/g.json", + results=[ + ProviderEngineResult( + provider="engine_2", + engine_id=2, + status="success", + plugin_path="/plugins/b", + gpu_kernel_stats=BenchmarkStats( + mean_ms=1.0, + median_ms=0.9, + std_ms=0.1, + min_ms=0.8, + max_ms=1.2, + p95_ms=1.1, + p99_ms=1.2, + ), + e2e_stats=BenchmarkStats( + mean_ms=2.0, + median_ms=1.8, + std_ms=0.2, + min_ms=1.6, + max_ms=2.4, + p95_ms=2.2, + p99_ms=2.4, + ), + ) + ], ) - reporter.print_ab_stats(stats_a, stats_b, 45.0, 42.0) + reporter.print_graph_result_table(graph) result = output.getvalue() - assert "A" in result - assert "B" in result - assert "Init Time:" in result - assert "Mean:" in result - assert "Speedup:" in result - - def test_print_ab_stats_speedup_b_faster(self) -> None: - """Test A/B stats shows B is faster.""" - output = io.StringIO() - reporter = Reporter(output=output) - - stats_a = BenchmarkStats( - mean_ms=2.0, - std_ms=0.1, - min_ms=1.9, - max_ms=2.1, - p95_ms=2.0, - p99_ms=2.1, - ) - stats_b = BenchmarkStats( - mean_ms=1.0, - std_ms=0.1, - min_ms=0.9, - max_ms=1.1, - p95_ms=1.0, - p99_ms=1.1, - ) - - reporter.print_ab_stats(stats_a, stats_b, 45.0, 42.0) - - result = output.getvalue() - assert "B is" in result - assert "faster" in result - - def test_print_ab_stats_speedup_a_faster(self) -> None: - """Test A/B stats shows A is faster.""" - output = io.StringIO() - reporter = Reporter(output=output) - - stats_a = BenchmarkStats( - mean_ms=1.0, - std_ms=0.1, - min_ms=0.9, - max_ms=1.1, - p95_ms=1.0, - p99_ms=1.1, - ) - stats_b = BenchmarkStats( - mean_ms=2.0, - std_ms=0.1, - min_ms=1.9, - max_ms=2.1, - p95_ms=2.0, - p99_ms=2.1, - ) - - reporter.print_ab_stats(stats_a, stats_b, 45.0, 42.0) - - result = output.getvalue() - assert "A is" in result - assert "faster" in result - - def test_print_ab_comparison_passed(self) -> None: - """Test A/B comparison passed output.""" - output = io.StringIO() - reporter = Reporter(output=output) - - reporter.print_ab_comparison(True, 1e-7, 1e-6, 1e-5, 1e-8) - - result = output.getvalue() - assert "PASSED" in result - assert "rtol=" in result - assert "atol=" in result - - def test_print_ab_comparison_failed(self) -> None: - """Test A/B comparison failed output.""" - output = io.StringIO() - reporter = Reporter(output=output) - - reporter.print_ab_comparison(False, 0.1, 0.05, 1e-5, 1e-8) - - result = output.getvalue() - assert "FAILED" in result - assert "Max abs diff:" in result - assert "Max rel diff:" in result + assert "plugin_path" in result + assert "/plugins/b" in result + assert "delta_pct" not in result + assert "%" not in result diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_suite_results.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_suite_results.py index 0ec157ead2e9..d077ab6fe982 100644 --- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_suite_results.py +++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_suite_results.py @@ -34,6 +34,7 @@ def test_to_dict(self): d = stats.to_dict() assert d == { "mean_ms": 1.0, + "median_ms": 0.0, "std_ms": 0.1, "min_ms": 0.5, "max_ms": 1.5, @@ -148,6 +149,31 @@ def test_success_serializes_with_timing_and_correctness(self): assert "correctness" in d assert d["gpu_kernel_stats"]["mean_ms"] == 1.0 + def test_success_serializes_plugin_path(self): + stats = BenchmarkStats( + mean_ms=1.0, + std_ms=0.1, + min_ms=0.5, + max_ms=1.5, + p95_ms=1.4, + p99_ms=1.49, + median_ms=0.9, + ) + pe = ProviderEngineResult( + provider="miopen", + engine_id=1, + status="success", + plugin_path="/plugins/a", + cpu_build_time_ms=10.5, + gpu_kernel_stats=stats, + e2e_stats=stats, + ) + + d = pe.to_dict() + + assert d["plugin_path"] == "/plugins/a" + assert "comparison_to_baseline" not in d + def test_error_serializes_without_timing(self): """ProviderEngineResult with status='error' serializes with status, error_message, no timing data."""