feat: add BenchmarkLogger (NO TESTS)

iraedeus · iraedeus · commit f13336b5192c · 2026-04-14T17:34:09.000+03:00
diff --git a/examples/noreset_shewhart.py b/examples/noreset_shewhart.py
@@ -141,7 +141,7 @@ def main() -> None:
     WINDOW_SIZE = 50
 
     # Thresholds to evaluate
-    THRESHOLDS = np.linspace(0, 7, 30)
+    THRESHOLDS = np.linspace(0, 7, 3000)
 
     # Error margin for TP/FP/FN matching & Delays
     ERROR_MARGIN = (0, 100)
@@ -168,7 +168,8 @@ def main() -> None:
 
     print(f"Algorithm: ShewhartControlChart(learning_period={LEARNING_PERIOD}, window={WINDOW_SIZE})")
     print(
-        f"Dataset (NoReset): {N_SERIES} series, length={SERIES_LENGTH}, change_point={CHANGE_POINT}, shift={MU_AFTER - MU_BEFORE:.1f}σ"
+        f"Dataset (NoReset): {N_SERIES} series, length={SERIES_LENGTH}, change_point={CHANGE_POINT},"
+        "shift={MU_AFTER - MU_BEFORE:.1f}*sigma"
     )
     print(f"Dataset (ARL):     {N_SERIES} series, length={SERIES_LENGTH}, no change points")
     print(f"Error margin: {ERROR_MARGIN}")
@@ -197,6 +198,7 @@ def main() -> None:
         solver=solver,
         policy=policy,
         dump_dir="benchmark_cache/noreset",
+        verbose=True,
     )
     noreset_results = runner.run()
 
@@ -209,6 +211,7 @@ def main() -> None:
         solver=solver,
         mode="noreset",  # uses rapid point-based extraction behind the scenes
         dump_dir="benchmark_cache/arl",
+        verbose=True,
     )
     arl_results = arl_runner.run()
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,6 +28,7 @@ scikit-learn = ">=1.5.2"
 plotly = "^6.6.0"
 jupyter = "^1.1.1"
 pandas = "^3.0.2"
+tqdm = "^4.67.3"
 
 [tool.poetry.group.dev.dependencies]
 pytest = ">=8.2.2"
diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py
@@ -71,6 +71,7 @@ def __init__(
         solver: OnlineCpdSolver,
         mode: Literal["reset", "noreset"],
         dump_dir: Path | str | None = None,
+        verbose: bool = False,
     ) -> None:
         for provider in providers:
             if provider.change_points:
@@ -87,6 +88,7 @@ def __init__(
             metrics=metrics,  # type: ignore[arg-type]
             solver=solver,
             dump_dir=dump_dir,
+            verbose=verbose,
         )
 
         self._mode = mode
diff --git a/pysatl_cpd/benchmark/core/benchmark_logger.py b/pysatl_cpd/benchmark/core/benchmark_logger.py
@@ -0,0 +1,145 @@
+# -*- coding: ascii -*-
+"""
+Logging utilities for benchmark execution.
+"""
+
+import logging
+from typing import Any
+
+__author__ = "PySATL contributors"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+
+class BenchmarkLogger:
+    """Dedicated logger for benchmark execution with structured logging."""
+
+    def __init__(self, name: str = "pysatl.benchmark"):
+        self.logger = logging.getLogger(name)
+        self._setup_logger()
+
+    def _setup_logger(self) -> None:
+        """Setup logger if not already configured."""
+        if not self.logger.handlers:
+            handler = logging.StreamHandler()
+            formatter = logging.Formatter(
+                "[%(asctime)s] %(levelname)-8s | %(message)s",
+                datefmt="%H:%M:%S",
+            )
+            handler.setFormatter(formatter)
+            self.logger.addHandler(handler)
+            self.logger.setLevel(logging.INFO)
+
+    def info(self, msg: str, **kwargs: Any) -> None:
+        """Log info message with optional context."""
+        if kwargs:
+            msg = f"{msg} | {' | '.join(f'{k}={v}' for k, v in kwargs.items())}"
+        self.logger.info(msg)
+
+    def debug(self, msg: str, **kwargs: Any) -> None:
+        """Log debug message with optional context."""
+        if kwargs:
+            msg = f"{msg} | {' | '.join(f'{k}={v}' for k, v in kwargs.items())}"
+        self.logger.debug(msg)
+
+    def warning(self, msg: str, **kwargs: Any) -> None:
+        """Log warning message."""
+        if kwargs:
+            msg = f"{msg} | {' | '.join(f'{k}={v}' for k, v in kwargs.items())}"
+        self.logger.warning(msg)
+
+    def error(self, msg: str, **kwargs: Any) -> None:
+        """Log error message."""
+        if kwargs:
+            msg = f"{msg} | {' | '.join(f'{k}={v}' for k, v in kwargs.items())}"
+        self.logger.error(msg)
+
+    def start_benchmark(
+        self,
+        n_algorithms: int,
+        n_providers: int,
+        n_total_runs: int,
+    ) -> None:
+        """Log benchmark start."""
+        self.info(
+            "Starting benchmark execution",
+            algorithms=n_algorithms,
+            providers=n_providers,
+            total_runs=n_total_runs,
+        )
+
+    def algorithm_start(self, algo_name: str, n_thresholds: int) -> None:
+        """Log algorithm processing start."""
+        self.info(
+            f"Processing algorithm: {algo_name}",
+            thresholds=n_thresholds,
+        )
+
+    def threshold_processed(
+        self,
+        algo_name: str,
+        threshold: float,
+        n_providers: int,
+    ) -> None:
+        """Log threshold processing."""
+        self.debug(
+            "Threshold processed",
+            algo=algo_name,
+            threshold=f"{threshold:.4f}",
+            providers=n_providers,
+        )
+
+    def cache_hit(self, algo_name: str, threshold: float, provider: str) -> None:
+        """Log cache hit."""
+        self.debug(
+            "Cache hit",
+            algo=algo_name,
+            threshold=f"{threshold:.4f}",
+            provider=provider,
+        )
+
+    def solver_start(self, algo_name: str, provider: str, threshold: float) -> None:
+        """Log solver execution start."""
+        self.debug(
+            "Executing solver",
+            algo=algo_name,
+            provider=provider,
+            threshold=f"{threshold:.4f}",
+        )
+
+    def metrics_computed(
+        self,
+        algo_name: str,
+        threshold: float,
+        metric_names: list[str],
+    ) -> None:
+        """Log metrics computation."""
+        self.debug(
+            "Metrics computed",
+            algo=algo_name,
+            threshold=f"{threshold:.4f}",
+            metrics=", ".join(metric_names),
+        )
+
+    def benchmark_complete(self, total_runs: int, elapsed_sec: float) -> None:
+        """Log benchmark completion."""
+        avg_time = elapsed_sec / total_runs if total_runs > 0 else 0
+        self.info(
+            "Benchmark completed",
+            total_runs=total_runs,
+            elapsed_time=f"{elapsed_sec:.2f}s",
+            avg_time_per_run=f"{avg_time:.3f}s",
+        )
+
+    def warning_no_metrics(self) -> None:
+        """Log warning about missing metrics."""
+        self.warning("No metrics registered for evaluation")
+
+    def error_exception(self, algo_name: str, threshold: float, error: str) -> None:
+        """Log exception during benchmark."""
+        self.error(
+            "Error during execution",
+            algo=algo_name,
+            threshold=f"{threshold:.4f}",
+            error=error,
+        )
diff --git a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
@@ -64,13 +64,15 @@ def __init__(
         solver: OnlineCpdSolver,
         policy: ThresholdPolicy,
         dump_dir: Path | str | None = None,
+        verbose: bool = False,
     ) -> None:
         super().__init__(
             algorithms=algorithms,
             providers=providers,
             metrics=metrics,
             solver=solver,
             dump_dir=dump_dir,
+            verbose=verbose,
         )
         self._policy = policy
 
diff --git a/pysatl_cpd/benchmark/online_benchmark_runner.py b/pysatl_cpd/benchmark/online_benchmark_runner.py
@@ -8,12 +8,16 @@
 __copyright__ = "Copyright (c) 2026 PySATL project"
 __license__ = "SPDX-License-Identifier: MIT"
 
+import time
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
+from tqdm.auto import tqdm
+
 from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.core.benchmark_logger import BenchmarkLogger
 from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
 from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm, OnlineAlgorithmConfiguration
 from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
@@ -50,12 +54,15 @@ def __init__(
         metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
         solver: OnlineCpdSolver,
         dump_dir: Path | str | None = None,
+        verbose: bool = False,
     ) -> None:
         self._algorithms = algorithms
         self._providers = providers
         self._metrics = metrics
         self._solver = solver
         self._dump_dir = Path(dump_dir) if isinstance(dump_dir, str) else dump_dir
+        self._verbose = verbose
+        self._logger = BenchmarkLogger()
 
     @abstractmethod
     def _collect_runs(
@@ -100,23 +107,94 @@ def run(
             (threshold, {metric_name: metric_value}) entries, one per threshold.
         """
 
+        benchmark_start = time.time()
+
+        total_runs = sum(len(thresholds) for _, thresholds in self._algorithms)
+        n_algorithms = len(self._algorithms)
+        n_providers = len(self._providers)
+
+        if not self._metrics:
+            self._logger.warning_no_metrics()
+
+        self._logger.start_benchmark(
+            n_algorithms=n_algorithms,
+            n_providers=n_providers,
+            n_total_runs=total_runs,
+        )
+
         results: dict[
             tuple[str, OnlineAlgorithmConfiguration],
             list[tuple[float, dict[str, Any]]],
         ] = {}
 
-        for algorithm, thresholds in self._algorithms:
+        algo_iterator = tqdm(
+            self._algorithms,
+            disable=not self._verbose,
+            desc="Processing algorithms",
+            unit="algo",
+        )
+
+        for algorithm, thresholds in algo_iterator:
+            algo_name = str(algorithm)
+
+            self._logger.algorithm_start(algo_name, len(thresholds))
+
             key: tuple[str, OnlineAlgorithmConfiguration] = (
                 str(algorithm),
                 algorithm.configuration,
             )
             results[key] = []
 
-            for threshold in thresholds:
-                runs = self._collect_runs(algorithm, threshold, self._providers)
-
-                metric_values: dict[str, Any] = {name: metric.evaluate(runs) for name, metric in self._metrics.items()}
+            threshold_iterator = tqdm(
+                thresholds,
+                desc=f"  Thresholds ({algo_name})",
+                disable=not self._verbose,
+                leave=False,
+                unit="threshold",
+            )
 
-                results[key].append((threshold, metric_values))
+            for threshold in threshold_iterator:
+                try:
+                    self._logger.debug(
+                        "Collecting runs",
+                        algo=algo_name,
+                        threshold=f"{threshold:.4f}",
+                    )
+
+                    runs = self._collect_runs(algorithm, threshold, self._providers)
+
+                    self._logger.metrics_computed(
+                        algo_name=algo_name,
+                        threshold=threshold,
+                        metric_names=list(self._metrics.keys()),
+                    )
+
+                    metric_values: dict[str, Any] = {
+                        name: metric.evaluate(runs) for name, metric in self._metrics.items()
+                    }
+
+                    results[key].append((threshold, metric_values))
+
+                    self._logger.threshold_processed(
+                        algo_name=algo_name,
+                        threshold=threshold,
+                        n_providers=n_providers,
+                    )
+
+                except Exception as e:
+                    self._logger.error_exception(
+                        algo_name=algo_name,
+                        threshold=threshold,
+                        error=str(e),
+                    )
+                    raise
+
+        benchmark_end = time.time()
+        elapsed = benchmark_end - benchmark_start
+
+        self._logger.benchmark_complete(
+            total_runs=total_runs,
+            elapsed_sec=elapsed,
+        )
 
         return results
diff --git a/pysatl_cpd/benchmark/reset_benchmark_runner.py b/pysatl_cpd/benchmark/reset_benchmark_runner.py
@@ -58,13 +58,15 @@ def __init__(
         metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
         solver: OnlineCpdSolver,
         dump_dir: Path | str | None = None,
+        verbose: bool = False,
     ) -> None:
         super().__init__(
             algorithms=algorithms,
             providers=providers,
             metrics=metrics,
             solver=solver,
             dump_dir=dump_dir,
+            verbose=verbose,
         )
 
     def _collect_runs(