diff --git a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml
index 99d14fa1f410..8b1b826696ff 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml
+++ b/projects/hipdnn/tools/dnn-benchmarking/pyproject.toml
@@ -3,7 +3,7 @@ name = "dnn-benchmarking"
 version = "0.1.0"
 description = "Benchmarking and validation tool for hipDNN graphs"
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.12"
 license = {text = "MIT"}
 authors = [
     {name = "Advanced Micro Devices, Inc."},
@@ -13,9 +13,6 @@ classifiers = [
     "Intended Audience :: Developers",
     "License :: OSI Approved :: MIT License",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
     "Topic :: Scientific/Engineering",
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/ab_runner_cli.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/ab_runner_cli.py
deleted file mode 100644
index e986ad193822..000000000000
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/ab_runner_cli.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# Copyright © Advanced Micro Devices, Inc., or its affiliates.
-# SPDX-License-Identifier:  MIT
-
-"""A/B comparison CLI runner."""
-
-import argparse
-from pathlib import Path
-from typing import Literal, Optional
-
-from ..common.exceptions import ExecutionError, GraphLoadError
-from ..config.benchmark_config import ABTestConfig, BenchmarkConfig, ValidationConfig
-from ..execution.ab_runner import ABRunner
-from ..graph.loader import GraphLoader
-from ..reporting.reporter import Reporter
-from ..reporting.statistics import CombinedBenchmarkStats
-
-
-def run_ab_benchmark(
-    config: BenchmarkConfig,
-    ab_config: ABTestConfig,
-    reporter: Reporter,
-    seed: Optional[int] = None,
-    gpu_backend: Literal["torch", "auto", "none"] = "auto",
-    validation_config: Optional[ValidationConfig] = None,
-) -> int:
-    """Run A/B comparison workflow.
-
-    Args:
-        config: Benchmark configuration.
-        ab_config: A/B test configuration.
-        reporter: Reporter instance for console output.
-        seed: Optional random seed for reproducibility.
-        gpu_backend: GPU timer backend to use (torch, auto, none).
-        validation_config: Optional validation configuration for reference checking.
-
-    Returns:
-        Exit code (0 for success, 1 for error, 2 for comparison failure).
-    """
-
-    try:
-        ab_config.validate_paths()
-
-        loader = GraphLoader()
-        graph_json = loader.load_json(config.graph_path)
-        loader.validate(graph_json)
-
-        graph_name = loader.get_graph_name(graph_json)
-
-        reporter.print_ab_header(config, ab_config, graph_name)
-
-        runner = ABRunner(
-            graph_json,
-            config,
-            ab_config,
-            gpu_backend=gpu_backend,
-            validation_config=validation_config,
-        )
-        result = runner.run(seed=seed)
-
-        stats_a = CombinedBenchmarkStats.from_result(result.result_a)
-        stats_b = CombinedBenchmarkStats.from_result(result.result_b)
-
-        reporter.print_ab_combined_stats(
-            stats_a,
-            stats_b,
-            result.init_time_a_ms,
-            result.init_time_b_ms,
-        )
-
-        reporter.print_ab_comparison(
-            result.passed,
-            result.max_abs_diff,
-            result.max_rel_diff,
-            ab_config.rtol,
-            ab_config.atol,
-        )
-
-        if validation_config is not None and validation_config.enabled:
-            reporter.print_ab_validation(
-                result.validation_a,
-                result.validation_b,
-                validation_config.rtol,
-                validation_config.atol,
-            )
-
-        reporter.print_footer()
-
-        validation_passed = True
-        if result.validation_a is not None and not result.validation_a.passed:
-            validation_passed = False
-        if result.validation_b is not None and not result.validation_b.passed:
-            validation_passed = False
-
-        return 0 if (result.passed and validation_passed) else 2
-
-    except GraphLoadError as e:
-        reporter.print_error(f"Graph load error: {e}")
-        return 1
-
-    except ExecutionError as e:
-        reporter.print_error(f"Execution error: {e}")
-        return 1
-
-    except ValueError as e:
-        reporter.print_error(f"Configuration error: {e}")
-        return 1
-
-    except Exception as e:
-        reporter.print_error(f"Unexpected error: {e}")
-        return 1
-
-
-def run_ab_cli(args: argparse.Namespace, graph_path: Path, reporter: Reporter) -> int:
-    """Validate A/B CLI args, build configs, and delegate to run_ab_benchmark."""
-
-    if args.AId is None or args.BId is None:
-        reporter.print_error(
-            "A/B testing requires both --AId and --BId to be specified"
-        )
-        return 1
-
-    if args.engine:
-        reporter.print_error(
-            "--engine is not supported in A/B testing mode "
-            "(use --AId and --BId instead)"
-        )
-        return 1
-
-    try:
-        config = BenchmarkConfig(
-            graph_path=graph_path,
-            warmup_iters=args.warmup,
-            benchmark_iters=args.iters,
-            engine_id=args.AId,
-        )
-    except ValueError as e:
-        reporter.print_error(f"Configuration error: {e}")
-        return 1
-
-    try:
-        ab_config = ABTestConfig(
-            a_path=args.APath,
-            a_id=args.AId,
-            b_path=args.BPath,
-            b_id=args.BId,
-            rtol=args.rtol,
-            atol=args.atol,
-        )
-    except ValueError as e:
-        reporter.print_error(f"A/B configuration error: {e}")
-        return 1
-
-    validation_config = None
-    if args.validate != "none":
-        try:
-            validation_config = ValidationConfig(
-                provider=args.validate,
-                rtol=args.rtol,
-                atol=args.atol,
-            )
-        except ValueError as e:
-            reporter.print_error(f"Validation configuration error: {e}")
-            return 1
-
-    return run_ab_benchmark(
-        config,
-        ab_config,
-        reporter,
-        seed=args.seed,
-        gpu_backend="auto",
-        validation_config=validation_config,
-    )
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/internal_profiling.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/internal_profiling.py
index 376808e0add6..fe27a69b2cee 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/internal_profiling.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/internal_profiling.py
@@ -24,7 +24,7 @@
 
 from ..common.exceptions import GraphLoadError
 from ..config.benchmark_config import MetricsConfig, SuiteConfig
-from ..execution.suite_runner import _run_single_provider_engine
+from ..execution.suite_runner import run_single_provider_engine, set_plugin_path
 from ..graph.loader import GraphLoader
 
 
@@ -49,9 +49,18 @@ def run_internal_profiling(args: argparse.Namespace) -> int:
         )
         return 1
 
+    plugin_path = None
+    if args.plugin_path:
+        if len(args.plugin_path) != 1:
+            print(
+                "internal-profiling-run: expected exactly one --plugin-path",
+                file=sys.stderr,
+            )
+            return 1
+        plugin_path = args.plugin_path[0]
+
     try:
-        if args.plugin_path is not None:
-            hipdnn.set_engine_plugin_paths([str(args.plugin_path)])
+        set_plugin_path(hipdnn, plugin_path)
         handle = hipdnn.Handle()
     except RuntimeError as e:
         print(
@@ -74,11 +83,9 @@ def run_internal_profiling(args: argparse.Namespace) -> int:
     # the inner pass; the parent already collected basic metrics on the
     # timed pass.
     #
-    # `plugin_path` is forwarded so the child's SuiteConfig matches the
-    # parent's. hipdnn.set_engine_plugin_paths above is what actually
-    # loads the plugin today, but any future code that reads
-    # config.plugin_path from inside _run_single_provider_engine would
-    # otherwise silently see None in the child.
+    # `plugin_path` is forwarded so the child SuiteConfig matches the
+    # parent's selected engine/plugin row. The outer suite runner passes
+    # exactly one plugin path for this single-engine subprocess.
     suite_config = SuiteConfig(
         warmup_iters=args.warmup,
         benchmark_iters=args.iters,
@@ -88,11 +95,11 @@ def run_internal_profiling(args: argparse.Namespace) -> int:
         reference_provider="none",
         verbose=False,
         metrics=MetricsConfig(tier="off"),
-        plugin_path=args.plugin_path,
+        plugin_paths=[plugin_path] if plugin_path is not None else None,
     )
 
     try:
-        result = _run_single_provider_engine(
+        result = run_single_provider_engine(
             graph_path=graph_path,
             graph_json_str=json.dumps(graph_json),
             graph_name=graph_json.get("name", graph_path.stem),
@@ -101,6 +108,7 @@ def run_internal_profiling(args: argparse.Namespace) -> int:
             handle=handle,
             provider="profiling-inner",
             engine_id=engine_id,
+            plugin_path=plugin_path,
             ref_provider=None,
             validation_requested=False,
             graph_json=graph_json,
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/main.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/main.py
index 0f0b4f12799c..28f1f63d5cff 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/main.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/main.py
@@ -26,7 +26,6 @@
 
 from ..common.exceptions import GraphLoadError
 from ..reporting.reporter import Reporter
-from .ab_runner_cli import run_ab_cli
 from .internal_profiling import run_internal_profiling
 from .parser import create_parser
 from .pytorch_runner_cli import run_pytorch_cli
@@ -82,15 +81,7 @@ def main() -> int:
         return 1
 
     try:
-        if args.AId is not None or args.BId is not None:
-            if len(resolved_files) > 1:
-                reporter.print_error(
-                    "A/B testing requires a single graph file, not a glob pattern"
-                )
-                return 1
-            return run_ab_cli(args, Path(resolved_files[0]), reporter)
-
-        elif args.backend == "pytorch":
+        if args.backend == "pytorch":
             if len(resolved_files) > 1:
                 reporter.print_error(
                     "Suite mode is not supported with --backend pytorch"
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/parser.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/parser.py
index c90e91d64716..4ee19973866b 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/parser.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/parser.py
@@ -13,14 +13,16 @@ def _parse_engine_list(s: str) -> List[int]:
 
     Engine IDs are deterministic FNV-1a hashes of the engine name and may
     be negative when interpreted as signed int64, so we accept any int.
-    Duplicates are removed while preserving first-seen order.
+    Duplicate IDs are preserved because each comma-delimited entry is an
+    ordered execution selection; this allows comparing the same engine ID
+    from different plugin paths.
 
     Examples:
       "1"                      -> [1]
       "1,2,3"                  -> [1, 2, 3]
       "1, 2"                   -> [1, 2]
-      "1,1,2"                  -> [1, 2]
-      "3,1,3,2"                -> [3, 1, 2]
+      "1,1,2"                  -> [1, 1, 2]
+      "3,1,3,2"                -> [3, 1, 3, 2]
       "-4567890123456789012"   -> [-4567890123456789012]
     """
     parts = [p.strip() for p in s.split(",")]
@@ -31,14 +33,16 @@ def _parse_engine_list(s: str) -> List[int]:
         ids = [int(p) for p in parts]
     except ValueError:
         raise argparse.ArgumentTypeError(f"--engine expects integer ID(s), got {s!r}")
-    # Deduplicate while preserving first-seen order
-    seen: set = set()
-    deduped: List[int] = []
-    for i in ids:
-        if i not in seen:
-            seen.add(i)
-            deduped.append(i)
-    return deduped
+    return ids
+
+
+def _parse_plugin_path_list(s: str) -> List[Path]:
+    """Parse --plugin-path as a comma-separated list of plugin directories."""
+    parts = [p.strip() for p in s.split(",")]
+    parts = [p for p in parts if p]
+    if not parts:
+        raise argparse.ArgumentTypeError("--plugin-path requires at least one path")
+    return [Path(p) for p in parts]
 
 
 def create_parser() -> argparse.ArgumentParser:
@@ -61,7 +65,7 @@ def create_parser() -> argparse.ArgumentParser:
   dnn-benchmark --graph ./graphs/conv1_fwd.json --warmup 20 --iters 200
   dnn-benchmark -g ./graphs/conv1_fwd.json -e 1
   dnn-benchmark -g ./graphs/conv1_fwd.json -v        # verbose per-engine output
-  dnn-benchmark -g ./graphs/conv1_fwd.json -e 1,2    # compare engines 1 and 2
+  dnn-benchmark -g ./graphs/conv1_fwd.json -e 1,2
 
 PyTorch Backend (GPU via PyTorch):
   dnn-benchmark -g ./graph.json --backend pytorch
@@ -71,9 +75,13 @@ def create_parser() -> argparse.ArgumentParser:
   dnn-benchmark -g ./graph.json --validate pytorch
   dnn-benchmark -g ./graph.json --validate pytorch --rtol 1e-3
 
-A/B Testing:
-  dnn-benchmark -g ./graph.json --AId 1 --BId 2
-  dnn-benchmark -g ./graph.json --APath /path/pluginA --AId 1 --BPath /path/pluginB --BId 2
+Engine Comparison:
+  dnn-benchmark -g ./graph.json --engine 1,2,3
+  dnn-benchmark -g ./graph.json --engine 1,2 --plugin-path /path/pluginA,/path/pluginB
+
+Engine IDs:
+  hipdnn_list_engines --plugin-dir /path/to/hipdnn_plugins/engines
+  (shipped with hipDNN tools, e.g. /opt/rocm/bin/hipdnn_list_engines)
 
 Suite Mode (multiple graphs):
   dnn-benchmark -g graphs/                           # all .json/.tar.gz files in directory
@@ -166,38 +174,8 @@ def create_parser() -> argparse.ArgumentParser:
         "(default: summary table)",
     )
 
-    # A/B Testing arguments
-    ab_group = parser.add_argument_group("A/B Testing")
-    ab_group.add_argument(
-        "--APath",
-        type=Path,
-        default=None,
-        metavar="PATH",
-        help="Plugin path for configuration A (default: use system default)",
-    )
-    ab_group.add_argument(
-        "--AId",
-        type=int,
-        default=None,
-        metavar="ID",
-        help="Engine ID for configuration A",
-    )
-    ab_group.add_argument(
-        "--BPath",
-        type=Path,
-        default=None,
-        metavar="PATH",
-        help="Plugin path for configuration B (default: use system default)",
-    )
-    ab_group.add_argument(
-        "--BId",
-        type=int,
-        default=None,
-        metavar="ID",
-        help="Engine ID for configuration B",
-    )
-    # Comparison tolerances (used by A/B testing, validation, and suite mode)
-    comparison_group = parser.add_argument_group("Comparison")
+    # Reference comparison tolerances
+    comparison_group = parser.add_argument_group("Reference Comparison")
     comparison_group.add_argument(
         "--rtol",
         type=float,
@@ -229,10 +207,14 @@ def create_parser() -> argparse.ArgumentParser:
     suite_group = parser.add_argument_group("Suite Options")
     suite_group.add_argument(
         "--plugin-path",
-        type=Path,
+        type=_parse_plugin_path_list,
         default=None,
-        metavar="DIR",
-        help="Path to directory containing hipDNN engine plugin .so files",
+        metavar="PATHS",
+        help=(
+            "Directory containing hipDNN engine plugin .so files, or a "
+            "comma-separated list matching --engine order. A single path is "
+            "shared by all selected engines."
+        ),
     )
 
     # Metrics options
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/suite_runner_cli.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/suite_runner_cli.py
index 35c14ec24f30..bb3746bb9e76 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/suite_runner_cli.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/cli/suite_runner_cli.py
@@ -9,7 +9,7 @@
 
 from ..common.exceptions import ExecutionError, GraphLoadError
 from ..config.benchmark_config import MetricsConfig, SuiteConfig
-from ..execution.suite_runner import run_graph_all_providers
+from ..execution.suite_runner import run_graph_all_providers, set_plugin_path
 from ..graph.loader import GraphLoader
 from ..reporting.reporter import Reporter
 from ..reporting.suite_results import (
@@ -61,7 +61,6 @@ def run_suite_benchmark(
     graph_paths: List[Path],
     config: SuiteConfig,
     output_path: Optional[Path],
-    plugin_path: Optional[Path],
     reporter: Reporter,
     tarball_source: Optional[str] = None,
 ) -> int:
@@ -71,7 +70,6 @@ def run_suite_benchmark(
         graph_paths: List of resolved graph file paths to benchmark.
         config: Suite configuration.
         output_path: Optional path to export results as JSON.
-        plugin_path: Optional path to plugin .so directory.
         reporter: Reporter instance for console output.
         tarball_source: Optional tarball source path for display.
 
@@ -105,9 +103,14 @@ def run_suite_benchmark(
     try:
         import hipdnn_frontend as hipdnn
 
-        if plugin_path is not None:
-            hipdnn.set_engine_plugin_paths([str(plugin_path)])
-        handle = hipdnn.Handle()
+        plugin_paths = config.plugin_paths
+        per_engine_plugin_paths = plugin_paths is not None and len(plugin_paths) > 1
+
+        if not per_engine_plugin_paths:
+            set_plugin_path(hipdnn, config.plugin_path)
+            handle = hipdnn.Handle()
+        else:
+            handle = None
     except ImportError:
         reporter.print_hipdnn_init_newline()
         reporter.print_error(
@@ -131,6 +134,8 @@ def run_suite_benchmark(
             reporter.print_no_engines_applicable()
         if config.verbose:
             reporter.print_verbose_graph_result(gr, config)
+        else:
+            reporter.print_graph_result_table(gr)
         graph_results.append(gr)
 
     suite_result = SuiteResult.from_graph_results(graph_results, total_graphs=total)
@@ -179,6 +184,7 @@ def run_suite_cli(
                 "source requested (--pmc, --emit-trace, --perf, "
                 "--roofline); the directory will not be written to"
             )
+        plugin_paths = args.plugin_path
         config = SuiteConfig(
             warmup_iters=args.warmup,
             benchmark_iters=args.iters,
@@ -190,7 +196,7 @@ def run_suite_cli(
             reference_provider=args.validate,
             verbose=args.verbose,
             metrics=metrics_config,
-            plugin_path=args.plugin_path,
+            plugin_paths=plugin_paths,
         )
     except ValueError as e:
         reporter.print_error(f"Suite configuration error: {e}")
@@ -200,7 +206,6 @@ def run_suite_cli(
         graph_paths=graph_paths,
         config=config,
         output_path=args.output,
-        plugin_path=args.plugin_path,
         reporter=reporter,
         tarball_source=tarball_source,
     )
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/__init__.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/__init__.py
index 1e97d877f697..021519940bd9 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/__init__.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/__init__.py
@@ -3,6 +3,18 @@
 
 """Configuration module for dnn-benchmarking."""
 
-from .benchmark_config import ABTestConfig, BenchmarkConfig, ValidationConfig
+from .benchmark_config import (
+    BenchmarkConfig,
+    EngineSelection,
+    MetricsConfig,
+    SuiteConfig,
+    ValidationConfig,
+)
 
-__all__ = ["ABTestConfig", "BenchmarkConfig", "ValidationConfig"]
+__all__ = [
+    "BenchmarkConfig",
+    "EngineSelection",
+    "MetricsConfig",
+    "SuiteConfig",
+    "ValidationConfig",
+]
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/benchmark_config.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/benchmark_config.py
index f98f0d42a873..c430b3b61fd9 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/benchmark_config.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/config/benchmark_config.py
@@ -41,52 +41,6 @@ def __post_init__(self) -> None:
             raise ValueError("benchmark_iters must be positive")
 
 
-@dataclass
-class ABTestConfig:
-    """Configuration for A/B testing mode.
-
-    Attributes:
-        a_path: Plugin path for configuration A (None = default).
-        a_id: Engine ID for configuration A.
-        b_path: Plugin path for configuration B (None = default).
-        b_id: Engine ID for configuration B.
-        rtol: Relative tolerance for np.allclose comparison.
-        atol: Absolute tolerance for np.allclose comparison.
-    """
-
-    a_path: Optional[Path] = None
-    a_id: int = 1
-    b_path: Optional[Path] = None
-    b_id: int = 1
-    rtol: float = 1e-5
-    atol: float = 1e-8
-
-    def __post_init__(self) -> None:
-        """Validate configuration values."""
-        if isinstance(self.a_path, str):
-            self.a_path = Path(self.a_path)
-        if isinstance(self.b_path, str):
-            self.b_path = Path(self.b_path)
-
-        # a_id / b_id are FNV-1a engine ID hashes that may be negative when
-        # interpreted as signed int64; do not bound-check them.
-        if self.rtol < 0:
-            raise ValueError("rtol must be non-negative")
-        if self.atol < 0:
-            raise ValueError("atol must be non-negative")
-
-    def validate_paths(self) -> None:
-        """Validate that plugin paths exist if specified.
-
-        Raises:
-            ValueError: If a specified path does not exist.
-        """
-        if self.a_path is not None and not self.a_path.exists():
-            raise ValueError(f"Plugin path A does not exist: {self.a_path}")
-        if self.b_path is not None and not self.b_path.exists():
-            raise ValueError(f"Plugin path B does not exist: {self.b_path}")
-
-
 @dataclass
 class ValidationConfig:
     """Configuration for reference validation.
@@ -266,6 +220,19 @@ def extra_runs_per_engine(self) -> int:
         )
 
 
+@dataclass(frozen=True)
+class EngineSelection:
+    """One ordered engine execution selection.
+
+    The plugin path is attached to the selection row rather than looked up by
+    engine ID so repeated engine IDs can be benchmarked against different
+    plugin builds.
+    """
+
+    engine_id: int
+    plugin_path: Optional[Path] = None
+
+
 @dataclass
 class SuiteConfig:
     """Configuration for suite execution mode.
@@ -277,7 +244,7 @@ class SuiteConfig:
         warmup_iters: Number of warmup iterations per provider/engine.
         benchmark_iters: Number of benchmark iterations for timing.
         seed: Optional random seed for reproducible inputs.
-        engine_filter: If set, only iterate engine IDs in this list.
+        engine_filter: If set, ordered engine selections to run.
         rtol: Relative tolerance for correctness comparison.
         atol: Absolute tolerance for correctness comparison.
         gpu_backend: GPU timer backend to use.
@@ -297,10 +264,7 @@ class SuiteConfig:
     reference_provider: str = "none"
     verbose: bool = False
     metrics: MetricsConfig = field(default_factory=MetricsConfig)
-    # Forwarded to the orchestrator's inner subprocess so the child
-    # picks up the same plugin .so directory the parent loaded. Not used
-    # outside of the opt-in profiling path.
-    plugin_path: Optional[Path] = None
+    plugin_paths: Optional[List[Path]] = None
 
     def __post_init__(self) -> None:
         """Validate configuration values."""
@@ -316,6 +280,20 @@ def __post_init__(self) -> None:
             if len(self.engine_filter) == 0:
                 raise ValueError("engine_filter must be non-empty when set")
             # engine IDs are FNV-1a hashes -- may be negative as signed int64.
+        if self.plugin_paths is not None:
+            if len(self.plugin_paths) == 0:
+                raise ValueError("plugin_paths must be non-empty when set")
+            self.plugin_paths = [Path(p) for p in self.plugin_paths]
+
+            if len(self.plugin_paths) > 1:
+                if self.engine_filter is None:
+                    raise ValueError(
+                        "--plugin-path with multiple entries requires --engine"
+                    )
+                if len(self.plugin_paths) != len(self.engine_filter):
+                    raise ValueError(
+                        "--plugin-path entry count must be 1 or match --engine count"
+                    )
         valid_gpu_backends = {"torch", "auto", "none"}
         if self.gpu_backend not in valid_gpu_backends:
             raise ValueError(
@@ -328,3 +306,38 @@ def __post_init__(self) -> None:
                 f"Invalid reference_provider: '{self.reference_provider}'. "
                 f"Valid options: {valid_reference_providers}"
             )
+
+    @property
+    def plugin_path(self) -> Optional[Path]:
+        """Return the shared plugin path when exactly one path is configured."""
+        if self.plugin_paths is None or len(self.plugin_paths) != 1:
+            return None
+        return self.plugin_paths[0]
+
+    def engine_selections_for(self, engine_ids: List[int]) -> List[EngineSelection]:
+        """Return ordered engine selections for the provided engine IDs.
+
+        ``engine_ids`` is either the explicit ``--engine`` list, where duplicate
+        IDs are meaningful selections, or the backend-discovered engine list.
+        Multiple plugin paths are only valid with an explicit engine list and
+        are associated positionally with that list.
+        """
+        if self.plugin_paths is None:
+            return [EngineSelection(engine_id) for engine_id in engine_ids]
+
+        if len(self.plugin_paths) == 1:
+            plugin_path = self.plugin_paths[0]
+            return [
+                EngineSelection(engine_id, plugin_path=plugin_path)
+                for engine_id in engine_ids
+            ]
+
+        if self.engine_filter is None or len(engine_ids) != len(self.plugin_paths):
+            raise ValueError(
+                "--plugin-path entry count must be 1 or match --engine count"
+            )
+
+        return [
+            EngineSelection(engine_id, plugin_path=plugin_path)
+            for engine_id, plugin_path in zip(engine_ids, self.plugin_paths)
+        ]
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/__init__.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/__init__.py
index 712504d7af47..fb962112eaad 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/__init__.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/__init__.py
@@ -3,7 +3,6 @@
 
 """Execution module for dnn-benchmarking."""
 
-from .ab_runner import ABRunner, ABTestResult
 from .buffer_manager import BufferManager
 from .executor import Executor
 
@@ -23,8 +22,6 @@
 )
 
 __all__ = [
-    "ABRunner",
-    "ABTestResult",
     "BufferManager",
     "Executor",
     "GpuTimer",
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/ab_runner.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/ab_runner.py
deleted file mode 100644
index e23d46323fb8..000000000000
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/ab_runner.py
+++ /dev/null
@@ -1,325 +0,0 @@
-# Copyright © Advanced Micro Devices, Inc., or its affiliates.
-# SPDX-License-Identifier:  MIT
-
-"""A/B testing runner for comparing plugin/engine configurations."""
-
-import json
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Dict, List, Literal, Optional, Tuple
-
-import numpy as np
-
-from ..common.exceptions import ExecutionError
-from ..config.benchmark_config import ABTestConfig, BenchmarkConfig, ValidationConfig
-from ..graph.loader import GraphLoader
-from ..graph.tensor_info import TensorInfo
-from ..reporting.statistics import BenchmarkResult
-from ..validation.comparison import ArrayComparator, ComparisonResult
-from .buffer_manager import BufferManager
-from .executor import Executor
-
-
-@dataclass
-class ValidationResult:
-    """Result of reference validation for a single configuration.
-
-    Attributes:
-        passed: Whether validation passed.
-        max_abs_diff: Maximum absolute difference from reference.
-        max_rel_diff: Maximum relative difference from reference.
-        provider_name: Name of the reference provider used.
-    """
-
-    passed: bool
-    max_abs_diff: float
-    max_rel_diff: float
-    provider_name: str
-
-
-@dataclass
-class ABTestResult:
-    """Result of A/B test comparison.
-
-    Attributes:
-        result_a: Full benchmark result for configuration A (includes E2E and kernel timings).
-        result_b: Full benchmark result for configuration B (includes E2E and kernel timings).
-        init_time_a_ms: Graph initialization time for A in milliseconds.
-        init_time_b_ms: Graph initialization time for B in milliseconds.
-        passed: Whether outputs match within tolerance.
-        max_abs_diff: Maximum absolute difference between outputs.
-        max_rel_diff: Maximum relative difference between outputs.
-        validation_a: Optional reference validation result for configuration A.
-        validation_b: Optional reference validation result for configuration B.
-    """
-
-    result_a: BenchmarkResult
-    result_b: BenchmarkResult
-    init_time_a_ms: float
-    init_time_b_ms: float
-    passed: bool
-    max_abs_diff: float
-    max_rel_diff: float
-    validation_a: Optional[ValidationResult] = None
-    validation_b: Optional[ValidationResult] = None
-
-
-class ABRunner:
-    """Runs A/B comparison between two plugin/engine configurations.
-
-    This class handles:
-    - Setting plugin paths for each configuration
-    - Executing the same graph with different engines
-    - Comparing outputs using np.allclose
-    - Collecting timing statistics for both configurations (E2E and kernel)
-    - Optional reference validation for each configuration
-    """
-
-    def __init__(
-        self,
-        graph_json: Dict[str, Any],
-        config: BenchmarkConfig,
-        ab_config: ABTestConfig,
-        gpu_backend: Literal["torch", "auto", "none"] = "auto",
-        validation_config: Optional[ValidationConfig] = None,
-    ) -> None:
-        """Initialize A/B runner.
-
-        Args:
-            graph_json: The graph as a parsed JSON dictionary.
-            config: Benchmark configuration (warmup/iters).
-            ab_config: A/B test configuration (paths, engine IDs, tolerances).
-            gpu_backend: GPU timer backend to use (torch, auto, none).
-            validation_config: Optional validation configuration for reference checking.
-        """
-        self._graph_json = graph_json
-        self._config = config
-        self._ab_config = ab_config
-        self._gpu_backend = gpu_backend
-        self._validation_config = validation_config
-
-    def _set_plugin_path(self, plugin_path: Optional[Path]) -> None:
-        """Set plugin path using hipdnn_frontend API.
-
-        Args:
-            plugin_path: Path to plugin directory, or None for default.
-        """
-        import hipdnn_frontend as hipdnn
-
-        if plugin_path is not None:
-            # Use ABSOLUTE mode to ensure only this plugin is used
-            hipdnn.set_engine_plugin_paths(
-                [str(plugin_path)], hipdnn.PluginLoadingMode.ABSOLUTE
-            )
-
-    def _run_single(
-        self,
-        plugin_path: Optional[Path],
-        engine_id: int,
-        buffer_manager: BufferManager,
-        config_name: str = "",
-    ) -> Tuple[Dict[int, np.ndarray], BenchmarkResult, float]:
-        """Execute graph with specific plugin/engine configuration.
-
-        Args:
-            plugin_path: Path to plugin directory, or None for default.
-            engine_id: Engine ID to use.
-            buffer_manager: Buffer manager with allocated tensors.
-            config_name: Name for this configuration (e.g., "A" or "B").
-
-        Returns:
-            Tuple of (outputs_dict, benchmark_result, init_time_ms) where
-            outputs_dict maps tensor UID to numpy array for all output tensors.
-        """
-        import hipdnn_frontend as hipdnn
-
-        # Set plugin path before creating Handle
-        self._set_plugin_path(plugin_path)
-
-        handle = hipdnn.Handle()
-        executor = Executor(
-            json.dumps(self._graph_json), self._config, gpu_backend=self._gpu_backend
-        )
-        executor.prepare(handle, engine_id=engine_id)
-        init_time_ms = executor.init_time_ms
-
-        variant_pack = buffer_manager.create_variant_pack()
-        executor.warmup(handle, variant_pack)
-        result = executor.benchmark(handle, variant_pack, graph_name=config_name)
-
-        # Get all output data - copy to avoid overwriting
-        output_tensors = buffer_manager.get_output_tensors()
-        if not output_tensors:
-            raise ExecutionError("No output tensors found in graph")
-
-        outputs: Dict[int, np.ndarray] = {}
-        for tensor in output_tensors:
-            data = buffer_manager.get_output_data(tensor.uid)
-            if data is None:
-                raise ExecutionError(
-                    f"Failed to retrieve output data for tensor uid={tensor.uid}"
-                )
-            outputs[tensor.uid] = data.copy()
-
-        return outputs, result, init_time_ms
-
-    def _validate_output(
-        self,
-        outputs: Dict[int, np.ndarray],
-        tensor_infos: List[TensorInfo],
-        buffer_manager: BufferManager,
-        config_name: str,
-    ) -> Optional[ValidationResult]:
-        """Validate output against reference provider.
-
-        Args:
-            outputs: Dict mapping tensor UID to output data from execution.
-            tensor_infos: List of tensor info objects.
-            buffer_manager: Buffer manager with input data.
-            config_name: Name of the configuration being validated.
-
-        Returns:
-            ValidationResult if validation was performed, None otherwise.
-        """
-        if self._validation_config is None or not self._validation_config.enabled:
-            return None
-
-        from ..validation import ReferenceProviderRegistry
-
-        try:
-            provider = ReferenceProviderRegistry.get_provider(
-                self._validation_config.provider
-            )
-
-            if not provider.is_available():
-                return None
-
-            if not provider.supports_graph(self._graph_json):
-                return None
-
-            # Collect input data
-            input_data: Dict[int, np.ndarray] = {}
-            for tensor_info in tensor_infos:
-                if not tensor_info.is_virtual and not tensor_info.is_output:
-                    data = buffer_manager.get_input_data(tensor_info.uid)
-                    if data is not None:
-                        input_data[tensor_info.uid] = data
-
-            # Compute reference
-            reference_outputs = provider.compute_reference(self._graph_json, input_data)
-
-            # Compare all output tensors, track worst-case diffs
-            comparator = ArrayComparator(
-                rtol=self._validation_config.rtol, atol=self._validation_config.atol
-            )
-            all_passed = True
-            worst_abs = 0.0
-            worst_rel = 0.0
-
-            for uid, output_data in outputs.items():
-                ref_output = reference_outputs.get(uid)
-                if ref_output is None:
-                    continue
-                comparison = comparator.compare(
-                    output_data,
-                    ref_output.data,
-                    config_name,
-                    self._validation_config.provider,
-                )
-                if not comparison.passed:
-                    all_passed = False
-                worst_abs = max(worst_abs, comparison.max_abs_diff)
-                worst_rel = max(worst_rel, comparison.max_rel_diff)
-
-            return ValidationResult(
-                passed=all_passed,
-                max_abs_diff=worst_abs,
-                max_rel_diff=worst_rel,
-                provider_name=self._validation_config.provider,
-            )
-
-        except (ValueError, NotImplementedError, ImportError):
-            return None
-
-    def run(self, seed: Optional[int] = 42) -> ABTestResult:
-        """Run A/B comparison.
-
-        Args:
-            seed: Random seed for reproducible input data.
-
-        Returns:
-            ABTestResult with full benchmark results and comparison.
-        """
-        loader = GraphLoader()
-        tensor_infos = loader.extract_tensor_info(self._graph_json)
-
-        validation_a: Optional[ValidationResult] = None
-        validation_b: Optional[ValidationResult] = None
-
-        with BufferManager(tensor_infos) as buffer_manager:
-            buffer_manager.allocate_all()
-            buffer_manager.fill_inputs_random(seed=seed)
-
-            # Run configuration A
-            buffer_manager.zero_outputs()
-            outputs_a, result_a, init_a = self._run_single(
-                self._ab_config.a_path, self._ab_config.a_id, buffer_manager, "A"
-            )
-
-            # Validate A if configured
-            validation_a = self._validate_output(
-                outputs_a, tensor_infos, buffer_manager, "A"
-            )
-
-            # Synchronize GPU to ensure Config A's work is complete before B starts
-            # This prevents stream state contamination in timing measurements
-            try:
-                import torch
-
-                if torch.cuda.is_available():
-                    torch.cuda.synchronize()
-            except ImportError:
-                pass
-
-            # Run configuration B (same inputs)
-            buffer_manager.zero_outputs()
-            outputs_b, result_b, init_b = self._run_single(
-                self._ab_config.b_path, self._ab_config.b_id, buffer_manager, "B"
-            )
-
-            # Validate B if configured
-            validation_b = self._validate_output(
-                outputs_b, tensor_infos, buffer_manager, "B"
-            )
-
-        # Compare all output tensors, track worst-case diffs across all outputs
-        comparator = ArrayComparator(
-            rtol=self._ab_config.rtol, atol=self._ab_config.atol
-        )
-        all_passed = True
-        worst_abs = 0.0
-        worst_rel = 0.0
-
-        for uid in outputs_a:
-            if uid not in outputs_b:
-                all_passed = False
-                worst_abs = float("inf")
-                worst_rel = float("inf")
-                continue
-            comparison = comparator.compare(outputs_a[uid], outputs_b[uid], "A", "B")
-            if not comparison.passed:
-                all_passed = False
-            worst_abs = max(worst_abs, comparison.max_abs_diff)
-            worst_rel = max(worst_rel, comparison.max_rel_diff)
-
-        return ABTestResult(
-            result_a=result_a,
-            result_b=result_b,
-            init_time_a_ms=init_a,
-            init_time_b_ms=init_b,
-            passed=all_passed,
-            max_abs_diff=worst_abs,
-            max_rel_diff=worst_rel,
-            validation_a=validation_a,
-            validation_b=validation_b,
-        )
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/suite_runner.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/suite_runner.py
index 4325852b432a..a246fee5f693 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/suite_runner.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/execution/suite_runner.py
@@ -3,9 +3,9 @@
 
 """Suite runner for per-graph engine iteration with granular timing.
 
-Iterates the engine IDs discovered for a graph via
-``Graph.get_ranked_engine_ids`` (real runtime discovery, no hardcoded engine
-lists). For each engine, captures separated CPU build time, GPU kernel time,
+Uses explicit ``--engine`` IDs in caller order when provided; otherwise
+discovers ranked engine IDs for the graph via ``Graph.get_ranked_engine_ids``.
+For each engine, captures separated CPU build time, GPU kernel time,
 and E2E wall-clock time. Performs correctness validation by comparing GPU
 output against a reference provider via ArrayComparator.
 """
@@ -78,6 +78,39 @@ def _resolve_engine_name(engine_id: int) -> str:
     return f"engine_{engine_id:#x}"
 
 
+def set_plugin_path(
+    hipdnn: Any, plugin_path: Optional[Path], loading_mode: Optional[Any] = None
+) -> None:
+    """Set the process-wide hipDNN plugin search path for the next handle."""
+    if plugin_path is None:
+        return
+    paths = [str(plugin_path)]
+    if loading_mode is None:
+        hipdnn.set_engine_plugin_paths(paths)
+    else:
+        hipdnn.set_engine_plugin_paths(paths, loading_mode)
+
+
+def _engine_setup_error_result(
+    provider: str,
+    engine_id: int,
+    plugin_path: Optional[Path],
+    config: SuiteConfig,
+    error_message: str,
+) -> ProviderEngineResult:
+    """Build a per-engine error row for plugin-path/handle setup failures."""
+    return ProviderEngineResult(
+        provider=provider,
+        engine_id=engine_id,
+        status="error",
+        plugin_path=str(plugin_path) if plugin_path is not None else None,
+        error_message=error_message,
+        correctness=CorrectnessResult.failed(
+            rtol=config.rtol, atol=config.atol, error_message=error_message
+        ),
+    )
+
+
 def _get_reference_provider(
     config: SuiteConfig, graph_json: Dict[str, Any]
 ) -> Optional[ReferenceProvider]:
@@ -251,59 +284,64 @@ def run_graph_all_providers(
 
     validation_requested = config.reference_provider != "none"
 
-    # Discover engines via real backend heuristics. A discovery failure
-    # is a graph-level error (record it and stop iterating engines), but
-    # "no engine configurations available" / "not supported" messages are
-    # really an unsupported-graph signal -- record as skipped so the
-    # suite exit code stays 0 when nothing is wrong, just nothing to run.
-    discovery_config = BenchmarkConfig(
-        graph_path=graph_path,
-        warmup_iters=config.warmup_iters,
-        benchmark_iters=config.benchmark_iters,
-    )
-    try:
-        discovery_executor = Executor(
-            graph_json_str=graph_json_str,
-            config=discovery_config,
-            gpu_backend=config.gpu_backend,
-        )
-        engine_ids = discovery_executor.discover_engines(handle)
-    except UnsupportedGraphError as e:
-        return GraphResult(
-            graph_name=graph_name,
-            graph_path=str(graph_path),
-            results=[
-                ProviderEngineResult(
-                    provider="unknown",
-                    engine_id=0,
-                    status="skipped",
-                    skip_reason=str(e),
-                    correctness=CorrectnessResult.failed(
-                        rtol=config.rtol, atol=config.atol, error_message=str(e)
-                    ),
-                )
-            ],
-        )
-    except (ExecutionError, RuntimeError) as e:
-        msg = str(e)
-        return GraphResult(
-            graph_name=graph_name,
-            graph_path=str(graph_path),
-            results=[
-                ProviderEngineResult(
-                    provider="unknown",
-                    engine_id=0,
-                    status="error",
-                    error_message=f"Engine discovery failed: {msg}",
-                    correctness=CorrectnessResult.failed(
-                        rtol=config.rtol, atol=config.atol, error_message=msg
-                    ),
-                )
-            ],
+    if config.engine_filter is not None:
+        # Explicit --engine is a selection, not a post-discovery filter. Keep the
+        # caller's order so per-engine plugin paths are deterministic.
+        engine_ids = list(config.engine_filter)
+    else:
+        # Discover engines via real backend heuristics. A discovery failure is a
+        # graph-level error (record it and stop iterating engines), but "no
+        # engine configurations available" / "not supported" messages are
+        # really an unsupported-graph signal.
+        discovery_config = BenchmarkConfig(
+            graph_path=graph_path,
+            warmup_iters=config.warmup_iters,
+            benchmark_iters=config.benchmark_iters,
         )
+        try:
+            if handle is None:
+                import hipdnn_frontend as hipdnn
 
-    if config.engine_filter is not None:
-        engine_ids = [e for e in engine_ids if e in config.engine_filter]
+                handle = hipdnn.Handle()
+            discovery_executor = Executor(
+                graph_json_str=graph_json_str,
+                config=discovery_config,
+                gpu_backend=config.gpu_backend,
+            )
+            engine_ids = discovery_executor.discover_engines(handle)
+        except UnsupportedGraphError as e:
+            return GraphResult(
+                graph_name=graph_name,
+                graph_path=str(graph_path),
+                results=[
+                    ProviderEngineResult(
+                        provider="unknown",
+                        engine_id=0,
+                        status="skipped",
+                        skip_reason=str(e),
+                        correctness=CorrectnessResult.failed(
+                            rtol=config.rtol, atol=config.atol, error_message=str(e)
+                        ),
+                    )
+                ],
+            )
+        except (ExecutionError, RuntimeError) as e:
+            msg = str(e)
+            return GraphResult(
+                graph_name=graph_name,
+                graph_path=str(graph_path),
+                results=[
+                    ProviderEngineResult(
+                        provider="unknown",
+                        engine_id=0,
+                        status="error",
+                        error_message=f"Engine discovery failed: {msg}",
+                        correctness=CorrectnessResult.failed(
+                            rtol=config.rtol, atol=config.atol, error_message=msg
+                        ),
+                    )
+                ],
+            )
 
     if not engine_ids:
         return GraphResult(
@@ -317,12 +355,12 @@ def run_graph_all_providers(
                     error_message=(
                         "No engines discovered for graph"
                         if config.engine_filter is None
-                        else "No discovered engines matched --engine filter"
+                        else "No engines selected for graph"
                     ),
                 )
             ],
         )
-
+    engine_selections = config.engine_selections_for(engine_ids)
     ref_provider = _get_reference_provider(config, graph_json)
 
     # Compute analytical metrics once per graph — they're a function of
@@ -343,20 +381,50 @@ def run_graph_all_providers(
             warn_once("analytical", f"compute_io_bytes failed for {graph_name}: {e}")
 
     pe_results: List[ProviderEngineResult] = []
-    for engine_id in engine_ids:
+    for selection in engine_selections:
+        engine_id = selection.engine_id
+        engine_plugin_path = selection.plugin_path
         engine_name = _resolve_engine_name(engine_id)
-        if reporter is not None:
-            reporter.print_engine_start(engine_name)
+        engine_handle = handle
         with Timer() as t:
-            pe_result = _run_single_provider_engine(
+            if engine_handle is None:
+                try:
+                    import hipdnn_frontend as hipdnn
+
+                    set_plugin_path(
+                        hipdnn,
+                        engine_plugin_path,
+                        hipdnn.PluginLoadingMode.ABSOLUTE,
+                    )
+                    engine_handle = hipdnn.Handle()
+                except (ImportError, RuntimeError, ValueError, OSError) as e:
+                    pe_result = _engine_setup_error_result(
+                        provider=engine_name,
+                        engine_id=engine_id,
+                        plugin_path=engine_plugin_path,
+                        config=config,
+                        error_message=str(e),
+                    )
+                    pe_result.elapsed_time_ms = t.elapsed_ms
+                    if reporter is not None:
+                        reporter.print_engine_start(engine_name)
+                        reporter.print_engine_result(pe_result)
+                    pe_results.append(pe_result)
+                    continue
+
+            if reporter is not None:
+                reporter.print_engine_start(engine_name)
+
+            pe_result = run_single_provider_engine(
                 graph_path=graph_path,
                 graph_json_str=graph_json_str,
                 graph_name=graph_name,
                 tensor_infos=tensor_infos,
                 config=config,
-                handle=handle,
+                handle=engine_handle,
                 provider=engine_name,
                 engine_id=engine_id,
+                plugin_path=engine_plugin_path,
                 ref_provider=ref_provider,
                 validation_requested=validation_requested,
                 graph_json=graph_json,
@@ -365,6 +433,8 @@ def run_graph_all_providers(
                 analytical_io_bytes=analytical_io_bytes,
             )
         pe_result.elapsed_time_ms = t.elapsed_ms
+        if engine_plugin_path is not None:
+            pe_result.plugin_path = str(engine_plugin_path)
         if reporter is not None:
             reporter.print_engine_result(pe_result)
         pe_results.append(pe_result)
@@ -388,7 +458,7 @@ def _collect_basic_metrics_post_loop(
     """Populate the basic always-on metric fields on ``result``.
 
     Called once after the timed loop when ``metrics.tier == "basic"``.
-    Pulled out of :func:`_run_single_provider_engine` to keep that
+    Pulled out of :func:`run_single_provider_engine` to keep that
     function focused on the timed loop itself; the basic-tier book
     keeping is otherwise just a long sequence of conditionals on
     intermediate results.
@@ -437,7 +507,7 @@ def _collect_basic_metrics_post_loop(
         warn_once("gpu_smi", f"vram snapshot failed: {e}")
 
 
-def _run_single_provider_engine(
+def run_single_provider_engine(
     graph_path: Path,
     graph_json_str: str,
     graph_name: str,
@@ -446,6 +516,7 @@ def _run_single_provider_engine(
     handle: Any,
     provider: str,
     engine_id: int,
+    plugin_path: Optional[Path],
     ref_provider: Optional[ReferenceProvider],
     validation_requested: bool,
     graph_json: Dict[str, Any],
@@ -586,7 +657,7 @@ def _run_single_provider_engine(
                     warmup_iters=config.warmup_iters,
                     benchmark_iters=config.benchmark_iters,
                     metrics_config=config.metrics,
-                    plugin_path=config.plugin_path,
+                    plugin_path=plugin_path,
                 )
                 if extra:
                     result.extra_metrics = extra
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graph/resolver.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graph/resolver.py
index 9a20c77feb53..321b1ccfc46f 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graph/resolver.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/graph/resolver.py
@@ -41,7 +41,9 @@ def extract_tarball(tarball_path: str) -> Tuple[tempfile.TemporaryDirectory, Lis
     tmpdir = tempfile.TemporaryDirectory(prefix="dnn_benchmarking_")
     try:
         with tarfile.open(tarball_path) as tf:
-            json_members = [m for m in tf.getmembers() if m.name.endswith(".json")]
+            json_members = [
+                m for m in tf.getmembers() if m.name.endswith(".json") and m.isfile()
+            ]
             if not json_members:
                 raise GraphLoadError(f"No .json files found in tarball: {tarball_path}")
             tf.extractall(path=tmpdir.name, members=json_members, filter="data")
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/reporter.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/reporter.py
index 53633ab976ed..698c07c9be71 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/reporter.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/reporter.py
@@ -5,9 +5,9 @@
 
 import sys
 from pathlib import Path
-from typing import Any, List, Optional, TextIO
+from typing import List, Optional, TextIO
 
-from ..config.benchmark_config import ABTestConfig, BenchmarkConfig, SuiteConfig
+from ..config.benchmark_config import BenchmarkConfig, SuiteConfig
 from .statistics import BenchmarkStats, CombinedBenchmarkStats
 from .suite_results import (
     CorrectnessResult,
@@ -127,6 +127,7 @@ def _print_stats_block(self, stats: BenchmarkStats) -> None:
             stats: Benchmark statistics.
         """
         self._print(f"  Mean:                 {stats.mean_ms:.3f} ms")
+        self._print(f"  Median:               {stats.median_ms:.3f} ms")
         self._print(f"  Std Dev:              {stats.std_ms:.3f} ms")
         self._print(f"  Min:                  {stats.min_ms:.3f} ms")
         self._print(f"  Max:                  {stats.max_ms:.3f} ms")
@@ -182,242 +183,6 @@ def _print_line(self, char: str) -> None:
         """
         print(char * self.WIDTH, file=self._output)
 
-    # A/B Testing Methods
-
-    def print_ab_header(
-        self, config: BenchmarkConfig, ab_config: ABTestConfig, graph_name: str
-    ) -> None:
-        """Print A/B test configuration header.
-
-        Args:
-            config: Benchmark configuration.
-            ab_config: A/B test configuration.
-            graph_name: Name of the graph being benchmarked.
-        """
-        self._print_line("=")
-        self._print(f"hipDNN A/B Test: {graph_name}")
-        self._print_line("=")
-        self._print(f"Graph:      {config.graph_path}")
-        self._print(f"Warmup:     {config.warmup_iters} iterations")
-        self._print(f"Benchmark:  {config.benchmark_iters} iterations")
-        self._print_line("-")
-        self._print("Configuration A:")
-        if ab_config.a_path:
-            self._print(f"  Plugin Path: {ab_config.a_path}")
-        else:
-            self._print("  Plugin Path: (default)")
-        self._print(f"  Engine ID:   {ab_config.a_id}")
-        self._print("Configuration B:")
-        if ab_config.b_path:
-            self._print(f"  Plugin Path: {ab_config.b_path}")
-        else:
-            self._print("  Plugin Path: (default)")
-        self._print(f"  Engine ID:   {ab_config.b_id}")
-        self._print_line("-")
-        self._print("")
-
-    def print_ab_stats(
-        self,
-        stats_a: BenchmarkStats,
-        stats_b: BenchmarkStats,
-        init_time_a_ms: float,
-        init_time_b_ms: float,
-    ) -> None:
-        """Print side-by-side comparison of A vs B statistics.
-
-        Args:
-            stats_a: Statistics for configuration A.
-            stats_b: Statistics for configuration B.
-            init_time_a_ms: Init time for A in milliseconds.
-            init_time_b_ms: Init time for B in milliseconds.
-        """
-        # Header
-        self._print(f"{'':20} {'A':>15} {'B':>15}")
-        self._print_line("-")
-
-        # Init times
-        self._print(
-            f"{'Init Time:':20} {init_time_a_ms:>12.2f} ms {init_time_b_ms:>12.2f} ms"
-        )
-
-        # Execution stats
-        self._print(
-            f"{'Mean:':20} {stats_a.mean_ms:>12.3f} ms {stats_b.mean_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'Std Dev:':20} {stats_a.std_ms:>12.3f} ms {stats_b.std_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'Min:':20} {stats_a.min_ms:>12.3f} ms {stats_b.min_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'Max:':20} {stats_a.max_ms:>12.3f} ms {stats_b.max_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'P95:':20} {stats_a.p95_ms:>12.3f} ms {stats_b.p95_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'P99:':20} {stats_a.p99_ms:>12.3f} ms {stats_b.p99_ms:>12.3f} ms"
-        )
-        self._print_line("-")
-
-        # Calculate speedup
-        if stats_a.mean_ms > 0 and stats_b.mean_ms > 0:
-            if stats_a.mean_ms > stats_b.mean_ms:
-                speedup = (stats_a.mean_ms - stats_b.mean_ms) / stats_a.mean_ms * 100
-                self._print(f"Speedup:            B is {speedup:.1f}% faster")
-            elif stats_b.mean_ms > stats_a.mean_ms:
-                speedup = (stats_b.mean_ms - stats_a.mean_ms) / stats_b.mean_ms * 100
-                self._print(f"Speedup:            A is {speedup:.1f}% faster")
-            else:
-                self._print("Speedup:            A and B are equal")
-
-        self._print("")
-
-    def print_ab_combined_stats(
-        self,
-        stats_a: CombinedBenchmarkStats,
-        stats_b: CombinedBenchmarkStats,
-        init_time_a_ms: float,
-        init_time_b_ms: float,
-    ) -> None:
-        """Print side-by-side comparison of A vs B with both E2E and kernel stats.
-
-        Args:
-            stats_a: Combined statistics for configuration A.
-            stats_b: Combined statistics for configuration B.
-            init_time_a_ms: Init time for A in milliseconds.
-            init_time_b_ms: Init time for B in milliseconds.
-        """
-        # E2E Stats section
-        self._print("E2E Execution Statistics:")
-        self._print(f"{'':20} {'A':>15} {'B':>15}")
-        self._print_line("-")
-
-        # Init times
-        self._print(
-            f"{'Init Time:':20} {init_time_a_ms:>12.2f} ms {init_time_b_ms:>12.2f} ms"
-        )
-
-        # E2E execution stats
-        self._print_ab_stats_block(stats_a.e2e_stats, stats_b.e2e_stats)
-        self._print("")
-
-        # Kernel Stats section (if available)
-        if stats_a.kernel_stats and stats_b.kernel_stats:
-            self._print("Kernel Execution Statistics:")
-            self._print(f"{'':20} {'A':>15} {'B':>15}")
-            self._print_line("-")
-            self._print_ab_stats_block(stats_a.kernel_stats, stats_b.kernel_stats)
-            self._print("")
-
-            # Calculate kernel speedup
-            ka, kb = stats_a.kernel_stats, stats_b.kernel_stats
-            if ka.mean_ms > 0 and kb.mean_ms > 0:
-                if ka.mean_ms > kb.mean_ms:
-                    speedup = (ka.mean_ms - kb.mean_ms) / ka.mean_ms * 100
-                    self._print(f"Kernel Speedup:     B is {speedup:.1f}% faster")
-                elif kb.mean_ms > ka.mean_ms:
-                    speedup = (kb.mean_ms - ka.mean_ms) / kb.mean_ms * 100
-                    self._print(f"Kernel Speedup:     A is {speedup:.1f}% faster")
-                else:
-                    self._print("Kernel Speedup:     A and B are equal")
-                self._print("")
-        else:
-            self._print("Kernel Timing: Not available")
-            self._print("")
-
-    def _print_ab_stats_block(
-        self, stats_a: BenchmarkStats, stats_b: BenchmarkStats
-    ) -> None:
-        """Print a side-by-side statistics block for A/B comparison.
-
-        Args:
-            stats_a: Statistics for configuration A.
-            stats_b: Statistics for configuration B.
-        """
-        self._print(
-            f"{'Mean:':20} {stats_a.mean_ms:>12.3f} ms {stats_b.mean_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'Std Dev:':20} {stats_a.std_ms:>12.3f} ms {stats_b.std_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'Min:':20} {stats_a.min_ms:>12.3f} ms {stats_b.min_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'Max:':20} {stats_a.max_ms:>12.3f} ms {stats_b.max_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'P95:':20} {stats_a.p95_ms:>12.3f} ms {stats_b.p95_ms:>12.3f} ms"
-        )
-        self._print(
-            f"{'P99:':20} {stats_a.p99_ms:>12.3f} ms {stats_b.p99_ms:>12.3f} ms"
-        )
-
-    def print_ab_comparison(
-        self,
-        passed: bool,
-        max_abs_diff: float,
-        max_rel_diff: float,
-        rtol: float,
-        atol: float,
-    ) -> None:
-        """Print A/B accuracy comparison result.
-
-        Args:
-            passed: Whether comparison passed.
-            max_abs_diff: Maximum absolute difference.
-            max_rel_diff: Maximum relative difference.
-            rtol: Relative tolerance used.
-            atol: Absolute tolerance used.
-        """
-        status = "PASSED" if passed else "FAILED"
-        self._print(f"Accuracy Comparison: {status}")
-        self._print(f"  (rtol={rtol:.0e}, atol={atol:.0e})")
-        if not passed:
-            self._print(f"  Max abs diff: {max_abs_diff:.2e}")
-            self._print(f"  Max rel diff: {max_rel_diff:.2e}")
-
-    def print_ab_validation(
-        self,
-        validation_a: Optional[Any],
-        validation_b: Optional[Any],
-        rtol: float,
-        atol: float,
-    ) -> None:
-        """Print reference validation results for A/B test.
-
-        Args:
-            validation_a: ValidationResult for configuration A, or None.
-            validation_b: ValidationResult for configuration B, or None.
-            rtol: Relative tolerance used.
-            atol: Absolute tolerance used.
-        """
-        if validation_a is None and validation_b is None:
-            return
-
-        self._print("")
-        self._print("Reference Validation:")
-
-        if validation_a is not None:
-            status_a = "PASSED" if validation_a.passed else "FAILED"
-            self._print(f"  Config A vs {validation_a.provider_name}: {status_a}")
-            if not validation_a.passed:
-                self._print(f"    Max abs diff: {validation_a.max_abs_diff:.2e}")
-                self._print(f"    Max rel diff: {validation_a.max_rel_diff:.2e}")
-
-        if validation_b is not None:
-            status_b = "PASSED" if validation_b.passed else "FAILED"
-            self._print(f"  Config B vs {validation_b.provider_name}: {status_b}")
-            if not validation_b.passed:
-                self._print(f"    Max abs diff: {validation_b.max_abs_diff:.2e}")
-                self._print(f"    Max rel diff: {validation_b.max_rel_diff:.2e}")
-
-        self._print(f"  (rtol={rtol:.0e}, atol={atol:.0e})")
-
-    # Reference Validation Methods
-
     # Suite Methods
 
     def print_hipdnn_init_start(self) -> None:
@@ -602,6 +367,66 @@ def _pe_outcome(pe: ProviderEngineResult) -> str:
             return "skipped"
         return "errored"
 
+    def print_graph_result_table(self, graph_result: GraphResult) -> None:
+        """Render one compact summary row per engine for a graph."""
+        if not graph_result.results:
+            return
+
+        include_plugin = any(pe.plugin_path for pe in graph_result.results)
+        headers = ["engine", "status"]
+        if include_plugin:
+            headers.append("plugin_path")
+        headers.extend(
+            [
+                "kernel_mean_ms",
+                "kernel_median_ms",
+                "e2e_mean_ms",
+                "e2e_median_ms",
+            ]
+        )
+        rows: List[List[str]] = []
+        for pe in graph_result.results:
+            row = [pe.provider, self._pe_status(pe)]
+            if include_plugin:
+                row.append(pe.plugin_path or "")
+            row.extend(
+                [
+                    self._fmt_stat(pe.gpu_kernel_stats, "mean_ms"),
+                    self._fmt_stat(pe.gpu_kernel_stats, "median_ms"),
+                    self._fmt_stat(pe.e2e_stats, "mean_ms"),
+                    self._fmt_stat(pe.e2e_stats, "median_ms"),
+                ]
+            )
+            rows.append(row)
+
+        widths = [
+            max(len(headers[i]), *(len(row[i]) for row in rows))
+            for i in range(len(headers))
+        ]
+        self._print("Results:")
+        self._print("  " + "  ".join(h.ljust(widths[i]) for i, h in enumerate(headers)))
+        self._print("  " + "  ".join("-" * width for width in widths))
+        for row in rows:
+            self._print(
+                "  " + "  ".join(row[i].ljust(widths[i]) for i in range(len(row)))
+            )
+        self._print("")
+
+    @staticmethod
+    def _pe_status(pe: ProviderEngineResult) -> str:
+        if pe.status != "success":
+            return pe.status
+        if pe.correctness is not None and pe.correctness.tolerance_match is False:
+            return "failed"
+        return "passed"
+
+    @staticmethod
+    def _fmt_stat(stats: Optional[BenchmarkStats], name: str) -> str:
+        if stats is None:
+            return "n/a"
+        value = getattr(stats, name)
+        return f"{value:.3f}"
+
     def print_verbose_graph_result(
         self, graph_result: GraphResult, suite_config: SuiteConfig
     ) -> None:
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/statistics.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/statistics.py
index ab365ddb8034..5ee5f7284939 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/statistics.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/statistics.py
@@ -29,6 +29,7 @@ class BenchmarkStats:
 
     Attributes:
         mean_ms: Mean execution time in milliseconds.
+        median_ms: Median execution time in milliseconds.
         std_ms: Standard deviation of execution time in milliseconds.
         min_ms: Minimum execution time in milliseconds.
         max_ms: Maximum execution time in milliseconds.
@@ -44,6 +45,7 @@ class BenchmarkStats:
     p95_ms: float
     p99_ms: float
     total_ms: float = 0.0
+    median_ms: float = 0.0
 
     @classmethod
     def from_timings(cls, timings: List[float]) -> "BenchmarkStats":
@@ -65,6 +67,7 @@ def from_timings(cls, timings: List[float]) -> "BenchmarkStats":
 
         return cls(
             mean_ms=float(np.mean(arr)),
+            median_ms=float(np.median(arr)),
             std_ms=float(np.std(arr, ddof=1)) if len(arr) > 1 else 0.0,
             min_ms=float(np.min(arr)),
             max_ms=float(np.max(arr)),
@@ -77,6 +80,7 @@ def to_dict(self) -> Dict[str, float]:
         """Convert to dictionary for JSON serialization."""
         return {
             "mean_ms": self.mean_ms,
+            "median_ms": self.median_ms,
             "std_ms": self.std_ms,
             "min_ms": self.min_ms,
             "max_ms": self.max_ms,
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/suite_results.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/suite_results.py
index 89858fda5a34..7484e29a23ad 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/suite_results.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/reporting/suite_results.py
@@ -151,6 +151,7 @@ class ProviderEngineResult:
     provider: str
     engine_id: int
     status: Literal["success", "error", "skipped"]
+    plugin_path: Optional[str] = None
     cpu_build_time_ms: Optional[float] = None
     gpu_kernel_stats: Optional[BenchmarkStats] = None
     e2e_stats: Optional[BenchmarkStats] = None
@@ -195,6 +196,8 @@ def to_dict(self) -> Dict[str, Any]:
             "engine_id": self.engine_id,
             "status": self.status,
         }
+        if self.plugin_path is not None:
+            d["plugin_path"] = self.plugin_path
         # extra_metrics is exclusively populated by the opt-in
         # profiling orchestrator, which the suite runner only fires on
         # the success path. Asserting the invariant here makes it
@@ -207,7 +210,7 @@ def to_dict(self) -> Dict[str, Any]:
                 f"extra_metrics is set on status={self.status!r}; "
                 "the orchestrator only runs on success today, so this "
                 "indicates either a new caller or a regression in the "
-                "success-gating in suite_runner._run_single_provider_engine"
+                "success-gating in suite_runner.run_single_provider_engine"
             )
         if self.status == "success":
             d["cpu_build_time_ms"] = self.cpu_build_time_ms
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/comparison.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/comparison.py
index ea728edb7298..64b9c6123c5c 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/comparison.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/comparison.py
@@ -3,7 +3,7 @@
 
 """Unified comparison logic for array validation.
 
-Extracts comparison logic used by both A/B testing and reference validation.
+Shared by reference validation and any direct array comparisons.
 """
 
 from dataclasses import dataclass
@@ -33,7 +33,7 @@ class ArrayComparator:
     """Compares numpy arrays with tolerance-based matching.
 
     Handles NaN/Inf detection, shape validation, and difference calculation.
-    Used by both A/B testing and reference validation.
+    Used by reference validation and direct output comparisons.
     """
 
     def __init__(self, rtol: float = 1e-5, atol: float = 1e-8) -> None:
diff --git a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/providers/cpu_plugin_provider.py b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/providers/cpu_plugin_provider.py
index 4b473f76180a..840ee343320d 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/providers/cpu_plugin_provider.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/src/dnn_benchmarking/validation/providers/cpu_plugin_provider.py
@@ -60,7 +60,7 @@ def compute_reference(
         """Compute reference using CPU plugin.
 
         This would use the same execution path as GPU but with CPU engine.
-        Similar to how ABRunner runs two configurations.
+        Similar to running the same graph through two engine selections.
 
         Args:
             graph_json: The graph as a parsed JSON dictionary.
diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_internal_profiling.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_internal_profiling.py
index fa2a41697c46..7d3377de876b 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_internal_profiling.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_internal_profiling.py
@@ -4,7 +4,7 @@
 """Tests for the hidden --internal-profiling-run sub-mode.
 
 The sub-mode must short-circuit gpu_check, skip Reporter output, and
-delegate to suite_runner._run_single_provider_engine for the named
+delegate to suite_runner.run_single_provider_engine for the named
 (graph, engine). These tests focus on the wiring (parser flags,
 quiet reporter, error paths) rather than running an actual workload.
 """
@@ -65,7 +65,7 @@ def test_missing_graph_or_engine_returns_error(self, capsys):
 
 class TestRunInternalProfilingSuccessPath:
     """Positive-path coverage. Mocks hipdnn_frontend + GraphLoader +
-    _run_single_provider_engine so the test stays hermetic on a CI box
+    run_single_provider_engine so the test stays hermetic on a CI box
     with no ROCm or GPU. Verifies the wiring the profiler relies on:
     MetricsConfig(tier='off'), plugin_path forwarding, single-engine
     filter, and that a success result returns rc=0."""
@@ -85,7 +85,7 @@ def _success_args(self, tmp_path, plugin_path=None):
     def _patch_stack(self, monkeypatch, captured):
         """Wire up the three external dependencies as MagicMocks.
 
-        Records the SuiteConfig that _run_single_provider_engine is
+        Records the SuiteConfig that run_single_provider_engine is
         called with so the test can assert on tier / engine_filter /
         plugin_path forwarding.
         """
@@ -109,7 +109,7 @@ def fake_run(**kwargs):
             result.status = "success"
             return result
 
-        monkeypatch.setattr(internal_profiling, "_run_single_provider_engine", fake_run)
+        monkeypatch.setattr(internal_profiling, "run_single_provider_engine", fake_run)
 
     def test_success_builds_tier_off_suite_config_and_returns_zero(
         self, tmp_path, monkeypatch
@@ -139,7 +139,7 @@ def test_success_forwards_plugin_path(self, tmp_path, monkeypatch):
         self._patch_stack(monkeypatch, captured)
         plugin = tmp_path / "plugin.so"
         rc = internal_profiling.run_internal_profiling(
-            self._success_args(tmp_path, plugin_path=plugin)
+            self._success_args(tmp_path, plugin_path=[plugin])
         )
         assert rc == 0
         # Two forwarding paths must both fire: set_engine_plugin_paths
@@ -151,6 +151,20 @@ def test_success_forwards_plugin_path(self, tmp_path, monkeypatch):
         cfg: SuiteConfig = captured["run_kwargs"]["config"]
         assert cfg.plugin_path == plugin
 
+    def test_multiple_plugin_paths_return_error(self, tmp_path, monkeypatch, capsys):
+        captured: dict = {}
+        self._patch_stack(monkeypatch, captured)
+
+        rc = internal_profiling.run_internal_profiling(
+            self._success_args(
+                tmp_path,
+                plugin_path=[tmp_path / "plugin-a", tmp_path / "plugin-b"],
+            )
+        )
+
+        assert rc == 1
+        assert "expected exactly one --plugin-path" in capsys.readouterr().err
+
     def test_non_success_status_returns_one(self, tmp_path, monkeypatch, capsys):
         from unittest.mock import MagicMock
 
@@ -165,7 +179,7 @@ def test_non_success_status_returns_one(self, tmp_path, monkeypatch, capsys):
         # in place — we only need to swap the runner.
         monkeypatch.setattr(
             internal_profiling,
-            "_run_single_provider_engine",
+            "run_single_provider_engine",
             lambda **kw: bad_result,
         )
 
@@ -182,7 +196,7 @@ def test_execution_exception_returns_one(self, tmp_path, monkeypatch, capsys):
         def raising(**kw):
             raise RuntimeError("kernel exploded")
 
-        monkeypatch.setattr(internal_profiling, "_run_single_provider_engine", raising)
+        monkeypatch.setattr(internal_profiling, "run_single_provider_engine", raising)
 
         rc = internal_profiling.run_internal_profiling(self._success_args(tmp_path))
         assert rc == 1
diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_suite_cli.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_suite_cli.py
index c4b1c5c1bca8..39ee83ceecc6 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_suite_cli.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/cli/test_suite_cli.py
@@ -80,14 +80,21 @@ def test_engine_flag_default_none(self) -> None:
         args = parser.parse_args(["--graph", "g.json"])
         assert args.engine is None
 
-    def test_engine_flag_deduplicates_preserving_order(self) -> None:
-        """--engine 1,1,1 -> [1]; '3,1,3,2' -> [3, 1, 2] (first-seen order)."""
+    def test_engine_flag_preserves_duplicates(self) -> None:
+        """--engine entries are ordered execution selections, not a set."""
         parser = create_parser()
         args = parser.parse_args(["--graph", "g.json", "--engine", "1,1,1"])
-        assert args.engine == [1]
+        assert args.engine == [1, 1, 1]
 
         args = parser.parse_args(["--graph", "g.json", "--engine", "3,1,3,2"])
-        assert args.engine == [3, 1, 2]
+        assert args.engine == [3, 1, 3, 2]
+
+    def test_plugin_path_accepts_comma_separated_list(self) -> None:
+        parser = create_parser()
+        args = parser.parse_args(
+            ["--graph", "g.json", "--plugin-path", "/plugins/a,/plugins/b"]
+        )
+        assert args.plugin_path == [Path("/plugins/a"), Path("/plugins/b")]
 
     def test_verbose_flag_default_false(self) -> None:
         """No -v / --verbose => args.verbose is False."""
@@ -214,6 +221,92 @@ def test_engine_list_propagates_to_suite_config(
         suite_config = mock_benchmark.call_args.kwargs["config"]
         assert suite_config.engine_filter == [1, 2]
 
+    @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True)
+    @patch("dnn_benchmarking.cli.suite_runner_cli.run_suite_benchmark")
+    def test_plugin_paths_propagate_to_suite_config(
+        self, mock_benchmark: MagicMock, mock_gpu: MagicMock
+    ) -> None:
+        mock_benchmark.return_value = 0
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            paths = self._create_graph_files(Path(tmpdir), 1)
+
+            from dnn_benchmarking.cli.main import main
+
+            with patch(
+                "sys.argv",
+                [
+                    "dnn-benchmark",
+                    "--graph",
+                    paths[0],
+                    "--engine",
+                    "2,1",
+                    "--plugin-path",
+                    "/plugins/b,/plugins/a",
+                ],
+            ):
+                main()
+
+        suite_config = mock_benchmark.call_args.kwargs["config"]
+        assert suite_config.engine_filter == [2, 1]
+        assert suite_config.plugin_paths == [Path("/plugins/b"), Path("/plugins/a")]
+
+    @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True)
+    @patch("dnn_benchmarking.cli.suite_runner_cli.run_suite_benchmark")
+    def test_same_engine_plugin_paths_propagate_as_ordered_selections(
+        self, mock_benchmark: MagicMock, mock_gpu: MagicMock
+    ) -> None:
+        mock_benchmark.return_value = 0
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            paths = self._create_graph_files(Path(tmpdir), 1)
+
+            from dnn_benchmarking.cli.main import main
+
+            with patch(
+                "sys.argv",
+                [
+                    "dnn-benchmark",
+                    "--graph",
+                    paths[0],
+                    "--engine",
+                    "1,1",
+                    "--plugin-path",
+                    "/plugins/a,/plugins/b",
+                ],
+            ):
+                main()
+
+        suite_config = mock_benchmark.call_args.kwargs["config"]
+        selections = suite_config.engine_selections_for(suite_config.engine_filter)
+        assert suite_config.engine_filter == [1, 1]
+        assert [s.plugin_path for s in selections] == [
+            Path("/plugins/a"),
+            Path("/plugins/b"),
+        ]
+
+    def test_plugin_path_count_mismatch_rejected_at_cli_layer(self) -> None:
+        from dnn_benchmarking.cli.suite_runner_cli import run_suite_cli
+
+        parser = create_parser()
+        args = parser.parse_args(
+            [
+                "--graph",
+                "g.json",
+                "--engine",
+                "1,2,3",
+                "--plugin-path",
+                "/plugins/a,/plugins/b",
+            ]
+        )
+        reporter = MagicMock(spec=Reporter)
+
+        rc = run_suite_cli(args, graph_paths=[Path("g.json")], reporter=reporter)
+
+        assert rc == 1
+        reporter.print_error.assert_called_once()
+        assert "entry count" in reporter.print_error.call_args[0][0]
+
     @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True)
     @patch("dnn_benchmarking.cli.main.run_pytorch_cli")
     @patch("dnn_benchmarking.cli.main.run_suite_cli")
@@ -414,7 +507,6 @@ def test_all_pass_returns_zero_exit_code(
                 graph_paths=paths,
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -445,7 +537,6 @@ def test_one_failure_still_processes_second(
                 graph_paths=paths,
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -491,7 +582,6 @@ def test_correctness_failure_returns_two(
                 graph_paths=paths,
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -520,7 +610,6 @@ def test_json_output_written_when_output_specified(
                 graph_paths=paths,
                 config=config,
                 output_path=output_file,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -554,7 +643,6 @@ def test_no_json_output_when_output_not_specified(
                 graph_paths=paths,
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -584,7 +672,6 @@ def test_warmup_iters_passed_per_graph(
                 graph_paths=paths,
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -623,7 +710,6 @@ def test_empty_nodes_graph_records_error_and_continues(
                 graph_paths=paths,
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -656,7 +742,6 @@ def test_graph_load_error_continues_to_next(
                 graph_paths=paths,
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -664,63 +749,14 @@ def test_graph_load_error_continues_to_next(
         assert result == 1
 
 
-class TestEngineFlagModeRejection:
-    """--engine list is incompatible with A/B and PyTorch single-engine modes."""
+class TestBackendEngineRouting:
+    """Tests for engine selection rules across execution backends."""
 
     def _create_graph(self, tmpdir: Path) -> Path:
         p = tmpdir / "g.json"
         p.write_text(json.dumps({"name": "g", "nodes": [], "tensors": []}))
         return p
 
-    @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True)
-    def test_engine_list_with_ab_mode_rejected(self, mock_gpu: MagicMock) -> None:
-        from dnn_benchmarking.cli.main import main
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            graph = self._create_graph(Path(tmpdir))
-            with patch(
-                "sys.argv",
-                [
-                    "dnn-benchmark",
-                    "--graph",
-                    str(graph),
-                    "--engine",
-                    "1,2",
-                    "--AId",
-                    "1",
-                    "--BId",
-                    "2",
-                ],
-            ):
-                result = main()
-        assert result == 1
-
-    @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True)
-    def test_single_engine_with_ab_mode_also_rejected(
-        self, mock_gpu: MagicMock
-    ) -> None:
-        """Even a single-element --engine list is rejected in A/B (it has --AId/--BId)."""
-        from dnn_benchmarking.cli.main import main
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            graph = self._create_graph(Path(tmpdir))
-            with patch(
-                "sys.argv",
-                [
-                    "dnn-benchmark",
-                    "--graph",
-                    str(graph),
-                    "--engine",
-                    "5",
-                    "--AId",
-                    "1",
-                    "--BId",
-                    "2",
-                ],
-            ):
-                result = main()
-        assert result == 1
-
     @patch("dnn_benchmarking.cli.main.gpu_is_available", return_value=True)
     def test_engine_list_with_pytorch_backend_rejected(
         self, mock_gpu: MagicMock
@@ -803,7 +839,6 @@ def test_unregistered_reference_provider_fails_at_startup(
                 graph_paths=[graph],
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -830,7 +865,6 @@ def test_unavailable_reference_provider_fails_at_startup(
                 graph_paths=[graph],
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
@@ -931,7 +965,6 @@ def test_available_reference_provider_proceeds_to_graph_iteration(
                 graph_paths=[graph],
                 config=config,
                 output_path=None,
-                plugin_path=None,
                 reporter=Reporter(),
             )
 
diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/config/test_benchmark_config.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/config/test_benchmark_config.py
index d695172b9c53..f9345b23567c 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/config/test_benchmark_config.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/config/test_benchmark_config.py
@@ -7,7 +7,7 @@
 
 import pytest
 
-from dnn_benchmarking.config import ABTestConfig, BenchmarkConfig, ValidationConfig
+from dnn_benchmarking.config import BenchmarkConfig, SuiteConfig, ValidationConfig
 
 
 class TestBenchmarkConfig:
@@ -72,96 +72,61 @@ def test_negative_engine_id_accepted(self) -> None:
         assert config.engine_id == -4567890123456789012
 
 
-class TestABTestConfig:
-    """Tests for ABTestConfig dataclass."""
+class TestSuiteConfigPluginPaths:
+    """Tests for SuiteConfig engine/plugin path selection."""
 
-    def test_default_values(self) -> None:
-        """Test that defaults are applied correctly."""
-        config = ABTestConfig()
-
-        assert config.a_path is None
-        assert config.a_id == 1
-        assert config.b_path is None
-        assert config.b_id == 1
-        assert config.rtol == 1e-5
-        assert config.atol == 1e-8
-
-    def test_custom_values(self) -> None:
-        """Test that custom values are stored correctly."""
-        config = ABTestConfig(
-            a_path=Path("/path/to/pluginA"),
-            a_id=1,
-            b_path=Path("/path/to/pluginB"),
-            b_id=2,
-            rtol=1e-3,
-            atol=1e-6,
+    def test_single_plugin_path_applies_to_all_engines(self) -> None:
+        config = SuiteConfig(
+            engine_filter=[1, 2],
+            plugin_paths=[Path("/plugins/a")],
         )
-
-        assert config.a_path == Path("/path/to/pluginA")
-        assert config.a_id == 1
-        assert config.b_path == Path("/path/to/pluginB")
-        assert config.b_id == 2
-        assert config.rtol == 1e-3
-        assert config.atol == 1e-6
-
-    def test_string_path_converted_to_path(self) -> None:
-        """Test that string paths are converted to Path objects."""
-        config = ABTestConfig(
-            a_path="/path/to/pluginA",  # type: ignore
-            b_path="/path/to/pluginB",  # type: ignore
+        selections = config.engine_selections_for([1, 2])
+
+        assert [s.engine_id for s in selections] == [1, 2]
+        assert [s.plugin_path for s in selections] == [
+            Path("/plugins/a"),
+            Path("/plugins/a"),
+        ]
+        assert config.plugin_path == Path("/plugins/a")
+
+    def test_multiple_plugin_paths_follow_engine_order(self) -> None:
+        config = SuiteConfig(
+            engine_filter=[2, 1],
+            plugin_paths=[Path("/plugins/b"), Path("/plugins/a")],
+        )
+        selections = config.engine_selections_for([2, 1])
+
+        assert [s.engine_id for s in selections] == [2, 1]
+        assert [s.plugin_path for s in selections] == [
+            Path("/plugins/b"),
+            Path("/plugins/a"),
+        ]
+        assert config.plugin_path is None
+
+    def test_repeated_engine_ids_keep_distinct_plugin_paths(self) -> None:
+        config = SuiteConfig(
+            engine_filter=[1, 1],
+            plugin_paths=[Path("/plugins/a"), Path("/plugins/b")],
         )
 
-        assert isinstance(config.a_path, Path)
-        assert isinstance(config.b_path, Path)
-        assert config.a_path == Path("/path/to/pluginA")
-        assert config.b_path == Path("/path/to/pluginB")
+        selections = config.engine_selections_for([1, 1])
 
-    def test_negative_ids_accepted(self) -> None:
-        """a_id / b_id may be negative (FNV-1a engine ID hashes)."""
-        config = ABTestConfig(a_id=-1, b_id=-2)
-        assert config.a_id == -1
-        assert config.b_id == -2
+        assert [s.engine_id for s in selections] == [1, 1]
+        assert [s.plugin_path for s in selections] == [
+            Path("/plugins/a"),
+            Path("/plugins/b"),
+        ]
 
-    def test_negative_rtol_raises(self) -> None:
-        """Test that negative rtol raises ValueError."""
-        with pytest.raises(ValueError, match="rtol must be non-negative"):
-            ABTestConfig(rtol=-1e-5)
+    def test_multiple_plugin_paths_require_engine_filter(self) -> None:
+        with pytest.raises(ValueError, match="requires --engine"):
+            SuiteConfig(plugin_paths=[Path("/plugins/a"), Path("/plugins/b")])
 
-    def test_negative_atol_raises(self) -> None:
-        """Test that negative atol raises ValueError."""
-        with pytest.raises(ValueError, match="atol must be non-negative"):
-            ABTestConfig(atol=-1e-8)
-
-    def test_validate_paths_with_existing_paths(self, tmp_path: Path) -> None:
-        """Test validate_paths succeeds with existing paths."""
-        plugin_a = tmp_path / "pluginA"
-        plugin_b = tmp_path / "pluginB"
-        plugin_a.mkdir()
-        plugin_b.mkdir()
-
-        config = ABTestConfig(a_path=plugin_a, b_path=plugin_b)
-        # Should not raise
-        config.validate_paths()
-
-    def test_validate_paths_with_none_paths(self) -> None:
-        """Test validate_paths succeeds with None paths."""
-        config = ABTestConfig()
-        # Should not raise
-        config.validate_paths()
-
-    def test_validate_paths_nonexistent_a_path(self, tmp_path: Path) -> None:
-        """Test validate_paths raises for nonexistent a_path."""
-        config = ABTestConfig(a_path=tmp_path / "nonexistent")
-
-        with pytest.raises(ValueError, match="Plugin path A does not exist"):
-            config.validate_paths()
-
-    def test_validate_paths_nonexistent_b_path(self, tmp_path: Path) -> None:
-        """Test validate_paths raises for nonexistent b_path."""
-        config = ABTestConfig(b_path=tmp_path / "nonexistent")
-
-        with pytest.raises(ValueError, match="Plugin path B does not exist"):
-            config.validate_paths()
+    def test_plugin_path_count_must_match_engine_count(self) -> None:
+        with pytest.raises(ValueError, match="entry count"):
+            SuiteConfig(
+                engine_filter=[1, 2, 3],
+                plugin_paths=[Path("/plugins/a"), Path("/plugins/b")],
+            )
 
 
 class TestValidationConfig:
diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/execution/test_suite_runner.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/execution/test_suite_runner.py
index 940f6dfee34a..9503b9d130f9 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/execution/test_suite_runner.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/execution/test_suite_runner.py
@@ -4,7 +4,7 @@
 """Unit tests for suite_runner module."""
 
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock, call, patch
 
 import pytest
 
@@ -333,10 +333,22 @@ def test_no_engines_unsupported_error_recorded_as_skipped(self, mock_exec_cls):
         assert r.status == "skipped"
         assert "No engine configurations" in (r.skip_reason or "")
 
+    @patch("dnn_benchmarking.execution.suite_runner._resolve_engine_name")
+    @patch("dnn_benchmarking.execution.suite_runner._get_reference_provider")
     @patch("dnn_benchmarking.execution.suite_runner.Executor")
-    def test_engine_filter_excludes_everything(self, mock_exec_cls):
-        """When engine_filter excludes every discovered engine, surface as error."""
+    @patch("dnn_benchmarking.execution.suite_runner.BufferManager")
+    def test_engine_filter_runs_explicit_id_without_discovery(
+        self,
+        mock_bm_cls,
+        mock_exec_cls,
+        mock_get_ref,
+        mock_resolve_name,
+    ):
+        """Explicit --engine IDs run in CLI order without discovery filtering."""
+        mock_resolve_name.side_effect = lambda eid: f"engine_{eid}"
+        mock_get_ref.return_value = None
         mock_exec_cls.side_effect = _make_exec_factory(engine_ids=[0, 1])
+        mock_bm_cls.return_value = _make_bm_mock()
 
         result = run_graph_all_providers(
             graph_path=Path("test.json"),
@@ -347,8 +359,8 @@ def test_engine_filter_excludes_everything(self, mock_exec_cls):
         )
 
         assert len(result.results) == 1
-        assert result.results[0].status == "error"
-        assert "filter" in result.results[0].error_message.lower()
+        assert result.results[0].status == "success"
+        assert result.results[0].engine_id == 99
 
 
 class TestSuiteConfigValidation:
@@ -454,7 +466,7 @@ def test_engine_filter_list_keeps_intersection(
         mock_get_ref,
         mock_resolve_name,
     ):
-        """engine_filter=[1, 3, 99]: engines 1 and 3 run; 99 (not discovered) is dropped."""
+        """engine_filter=[1, 3, 99] runs exactly those IDs in caller order."""
         mock_resolve_name.side_effect = lambda eid: f"engine_{eid}"
         mock_get_ref.return_value = None
 
@@ -469,8 +481,91 @@ def test_engine_filter_list_keeps_intersection(
             handle=MagicMock(),
         )
 
-        engine_ids = sorted(r.engine_id for r in result.results)
-        assert engine_ids == [1, 3]
+        engine_ids = [r.engine_id for r in result.results]
+        assert engine_ids == [1, 3, 99]
+
+    @patch("dnn_benchmarking.execution.suite_runner._resolve_engine_name")
+    @patch("dnn_benchmarking.execution.suite_runner._get_reference_provider")
+    @patch("dnn_benchmarking.execution.suite_runner.Executor")
+    @patch("dnn_benchmarking.execution.suite_runner.BufferManager")
+    def test_same_engine_runs_with_distinct_plugin_paths(
+        self,
+        mock_bm_cls,
+        mock_exec_cls,
+        mock_get_ref,
+        mock_resolve_name,
+    ):
+        """Repeated engine IDs are separate ordered selections."""
+        mock_resolve_name.side_effect = lambda eid: f"engine_{eid}"
+        mock_get_ref.return_value = None
+        mock_exec_cls.side_effect = _make_exec_factory(has_kernel_timings=True)
+        mock_bm_cls.return_value = _make_bm_mock()
+        hipdnn = MagicMock()
+        hipdnn.PluginLoadingMode.ABSOLUTE = "absolute"
+        hipdnn.Handle.side_effect = [MagicMock(), MagicMock()]
+
+        with patch.dict("sys.modules", {"hipdnn_frontend": hipdnn}):
+            result = run_graph_all_providers(
+                graph_path=Path("test.json"),
+                graph_json=_make_graph_json(),
+                tensor_infos=[_make_tensor_info(1)],
+                config=_make_config(
+                    engine_filter=[1, 1],
+                    plugin_paths=[Path("/plugins/a"), Path("/plugins/b")],
+                ),
+                handle=None,
+            )
+
+        assert [r.engine_id for r in result.results] == [1, 1]
+        assert [r.plugin_path for r in result.results] == [
+            "/plugins/a",
+            "/plugins/b",
+        ]
+        hipdnn.set_engine_plugin_paths.assert_has_calls(
+            [
+                call(["/plugins/a"], "absolute"),
+                call(["/plugins/b"], "absolute"),
+            ]
+        )
+
+    @patch("dnn_benchmarking.execution.suite_runner._resolve_engine_name")
+    @patch("dnn_benchmarking.execution.suite_runner._get_reference_provider")
+    @patch("dnn_benchmarking.execution.suite_runner.Executor")
+    @patch("dnn_benchmarking.execution.suite_runner.BufferManager")
+    def test_per_engine_handle_creation_failure_records_error_result(
+        self,
+        mock_bm_cls,
+        mock_exec_cls,
+        mock_get_ref,
+        mock_resolve_name,
+    ):
+        """A later per-engine handle failure records an error row and continues."""
+        mock_resolve_name.side_effect = lambda eid: f"engine_{eid}"
+        mock_get_ref.return_value = None
+        mock_exec_cls.side_effect = _make_exec_factory(has_kernel_timings=True)
+        mock_bm_cls.return_value = _make_bm_mock()
+        hipdnn = MagicMock()
+        hipdnn.PluginLoadingMode.ABSOLUTE = "absolute"
+        hipdnn.Handle.side_effect = [MagicMock(), RuntimeError("bad plugin")]
+
+        with patch.dict("sys.modules", {"hipdnn_frontend": hipdnn}):
+            result = run_graph_all_providers(
+                graph_path=Path("test.json"),
+                graph_json=_make_graph_json(),
+                tensor_infos=[_make_tensor_info(1)],
+                config=_make_config(
+                    engine_filter=[1, 2],
+                    plugin_paths=[Path("/plugins/a"), Path("/plugins/b")],
+                ),
+                handle=None,
+            )
+
+        assert [r.status for r in result.results] == ["success", "error"]
+        assert result.results[0].plugin_path == "/plugins/a"
+        assert result.results[1].plugin_path == "/plugins/b"
+        assert "bad plugin" in (result.results[1].error_message or "")
+        assert result.results[1].correctness is not None
+        assert result.results[1].correctness.execution_success is False
 
 
 class TestNoRetryOnFailure:
diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_reporter.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_reporter.py
index ceaa11482c34..f71841d6e6a5 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_reporter.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_reporter.py
@@ -6,8 +6,9 @@
 import io
 from pathlib import Path
 
-from dnn_benchmarking.config import ABTestConfig, BenchmarkConfig
+from dnn_benchmarking.config import BenchmarkConfig
 from dnn_benchmarking.reporting import BenchmarkStats, Reporter
+from dnn_benchmarking.reporting.suite_results import GraphResult, ProviderEngineResult
 
 
 class TestReporter:
@@ -123,160 +124,89 @@ def test_print_error(self) -> None:
         assert "ERROR: Something went wrong" in result
 
 
-class TestReporterAB:
-    """Tests for Reporter A/B testing methods."""
+class TestReporterEngineTable:
+    """Tests for the compact per-engine result table."""
 
-    def test_print_ab_header(self) -> None:
-        """Test A/B header output format."""
+    def test_print_graph_result_table_without_comparison_columns(self) -> None:
         output = io.StringIO()
         reporter = Reporter(output=output)
-
-        config = BenchmarkConfig(
-            graph_path=Path("/test/graph.json"),
-            warmup_iters=10,
-            benchmark_iters=100,
-        )
-        ab_config = ABTestConfig(
-            a_path=Path("/path/to/pluginA"),
-            a_id=1,
-            b_path=Path("/path/to/pluginB"),
-            b_id=2,
+        graph = GraphResult(
+            graph_name="g",
+            graph_path="/tmp/g.json",
+            results=[
+                ProviderEngineResult(
+                    provider="engine_1",
+                    engine_id=1,
+                    status="success",
+                    gpu_kernel_stats=BenchmarkStats(
+                        mean_ms=1.0,
+                        median_ms=0.9,
+                        std_ms=0.1,
+                        min_ms=0.8,
+                        max_ms=1.2,
+                        p95_ms=1.1,
+                        p99_ms=1.2,
+                    ),
+                    e2e_stats=BenchmarkStats(
+                        mean_ms=2.0,
+                        median_ms=1.8,
+                        std_ms=0.2,
+                        min_ms=1.6,
+                        max_ms=2.4,
+                        p95_ms=2.2,
+                        p99_ms=2.4,
+                    ),
+                )
+            ],
         )
 
-        reporter.print_ab_header(config, ab_config, "test_conv_fwd")
+        reporter.print_graph_result_table(graph)
 
         result = output.getvalue()
-        assert "hipDNN A/B Test: test_conv_fwd" in result
-        assert "/test/graph.json" in result
-        assert "Configuration A:" in result
-        assert "Configuration B:" in result
-        assert "/path/to/pluginA" in result
-        assert "/path/to/pluginB" in result
-        assert "Engine ID:   1" in result
-        assert "Engine ID:   2" in result
-
-    def test_print_ab_header_default_paths(self) -> None:
-        """Test A/B header with default plugin paths."""
-        output = io.StringIO()
-        reporter = Reporter(output=output)
+        assert "kernel_mean_ms" in result
+        assert "kernel_median_ms" in result
+        assert "e2e_mean_ms" in result
+        assert "e2e_median_ms" in result
+        assert "delta_pct" not in result
 
-        config = BenchmarkConfig(
-            graph_path=Path("/test/graph.json"),
-        )
-        ab_config = ABTestConfig(a_id=1, b_id=2)
-
-        reporter.print_ab_header(config, ab_config, "test_conv_fwd")
-
-        result = output.getvalue()
-        assert "(default)" in result
-
-    def test_print_ab_stats(self) -> None:
-        """Test A/B statistics output format."""
+    def test_print_graph_result_table_with_plugin_path_column(self) -> None:
         output = io.StringIO()
         reporter = Reporter(output=output)
-
-        stats_a = BenchmarkStats(
-            mean_ms=1.234,
-            std_ms=0.045,
-            min_ms=1.156,
-            max_ms=1.456,
-            p95_ms=1.312,
-            p99_ms=1.398,
-        )
-        stats_b = BenchmarkStats(
-            mean_ms=1.100,
-            std_ms=0.035,
-            min_ms=1.050,
-            max_ms=1.200,
-            p95_ms=1.180,
-            p99_ms=1.195,
+        graph = GraphResult(
+            graph_name="g",
+            graph_path="/tmp/g.json",
+            results=[
+                ProviderEngineResult(
+                    provider="engine_2",
+                    engine_id=2,
+                    status="success",
+                    plugin_path="/plugins/b",
+                    gpu_kernel_stats=BenchmarkStats(
+                        mean_ms=1.0,
+                        median_ms=0.9,
+                        std_ms=0.1,
+                        min_ms=0.8,
+                        max_ms=1.2,
+                        p95_ms=1.1,
+                        p99_ms=1.2,
+                    ),
+                    e2e_stats=BenchmarkStats(
+                        mean_ms=2.0,
+                        median_ms=1.8,
+                        std_ms=0.2,
+                        min_ms=1.6,
+                        max_ms=2.4,
+                        p95_ms=2.2,
+                        p99_ms=2.4,
+                    ),
+                )
+            ],
         )
 
-        reporter.print_ab_stats(stats_a, stats_b, 45.0, 42.0)
+        reporter.print_graph_result_table(graph)
 
         result = output.getvalue()
-        assert "A" in result
-        assert "B" in result
-        assert "Init Time:" in result
-        assert "Mean:" in result
-        assert "Speedup:" in result
-
-    def test_print_ab_stats_speedup_b_faster(self) -> None:
-        """Test A/B stats shows B is faster."""
-        output = io.StringIO()
-        reporter = Reporter(output=output)
-
-        stats_a = BenchmarkStats(
-            mean_ms=2.0,
-            std_ms=0.1,
-            min_ms=1.9,
-            max_ms=2.1,
-            p95_ms=2.0,
-            p99_ms=2.1,
-        )
-        stats_b = BenchmarkStats(
-            mean_ms=1.0,
-            std_ms=0.1,
-            min_ms=0.9,
-            max_ms=1.1,
-            p95_ms=1.0,
-            p99_ms=1.1,
-        )
-
-        reporter.print_ab_stats(stats_a, stats_b, 45.0, 42.0)
-
-        result = output.getvalue()
-        assert "B is" in result
-        assert "faster" in result
-
-    def test_print_ab_stats_speedup_a_faster(self) -> None:
-        """Test A/B stats shows A is faster."""
-        output = io.StringIO()
-        reporter = Reporter(output=output)
-
-        stats_a = BenchmarkStats(
-            mean_ms=1.0,
-            std_ms=0.1,
-            min_ms=0.9,
-            max_ms=1.1,
-            p95_ms=1.0,
-            p99_ms=1.1,
-        )
-        stats_b = BenchmarkStats(
-            mean_ms=2.0,
-            std_ms=0.1,
-            min_ms=1.9,
-            max_ms=2.1,
-            p95_ms=2.0,
-            p99_ms=2.1,
-        )
-
-        reporter.print_ab_stats(stats_a, stats_b, 45.0, 42.0)
-
-        result = output.getvalue()
-        assert "A is" in result
-        assert "faster" in result
-
-    def test_print_ab_comparison_passed(self) -> None:
-        """Test A/B comparison passed output."""
-        output = io.StringIO()
-        reporter = Reporter(output=output)
-
-        reporter.print_ab_comparison(True, 1e-7, 1e-6, 1e-5, 1e-8)
-
-        result = output.getvalue()
-        assert "PASSED" in result
-        assert "rtol=" in result
-        assert "atol=" in result
-
-    def test_print_ab_comparison_failed(self) -> None:
-        """Test A/B comparison failed output."""
-        output = io.StringIO()
-        reporter = Reporter(output=output)
-
-        reporter.print_ab_comparison(False, 0.1, 0.05, 1e-5, 1e-8)
-
-        result = output.getvalue()
-        assert "FAILED" in result
-        assert "Max abs diff:" in result
-        assert "Max rel diff:" in result
+        assert "plugin_path" in result
+        assert "/plugins/b" in result
+        assert "delta_pct" not in result
+        assert "%" not in result
diff --git a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_suite_results.py b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_suite_results.py
index 0ec157ead2e9..d077ab6fe982 100644
--- a/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_suite_results.py
+++ b/projects/hipdnn/tools/dnn-benchmarking/tests/unit/reporting/test_suite_results.py
@@ -34,6 +34,7 @@ def test_to_dict(self):
         d = stats.to_dict()
         assert d == {
             "mean_ms": 1.0,
+            "median_ms": 0.0,
             "std_ms": 0.1,
             "min_ms": 0.5,
             "max_ms": 1.5,
@@ -148,6 +149,31 @@ def test_success_serializes_with_timing_and_correctness(self):
         assert "correctness" in d
         assert d["gpu_kernel_stats"]["mean_ms"] == 1.0
 
+    def test_success_serializes_plugin_path(self):
+        stats = BenchmarkStats(
+            mean_ms=1.0,
+            std_ms=0.1,
+            min_ms=0.5,
+            max_ms=1.5,
+            p95_ms=1.4,
+            p99_ms=1.49,
+            median_ms=0.9,
+        )
+        pe = ProviderEngineResult(
+            provider="miopen",
+            engine_id=1,
+            status="success",
+            plugin_path="/plugins/a",
+            cpu_build_time_ms=10.5,
+            gpu_kernel_stats=stats,
+            e2e_stats=stats,
+        )
+
+        d = pe.to_dict()
+
+        assert d["plugin_path"] == "/plugins/a"
+        assert "comparison_to_baseline" not in d
+
     def test_error_serializes_without_timing(self):
         """ProviderEngineResult with status='error' serializes with
         status, error_message, no timing data."""