feat: add abstract OnlineBenchmarkRunner

iraedeus · iraedeus · commit 24d35beab6fd · 2026-04-13T23:07:15.000+03:00
diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py
@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
@@ -13,7 +14,7 @@ class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledDa
 ):
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
         providers: list[ProviderT],
         solver: OnlineCpdSolver,
         dump_dir: Path | None = None,
@@ -24,6 +25,6 @@ def _collect_runs(
         self,
         algorithm: OnlineAlgorithm[Any, Any, Any],
         threshold: float,
-        providers: list[ProviderT],
+        providers: Sequence[ProviderT],
     ) -> list[tuple[TraceT, ProviderT]]:
         raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
diff --git a/pysatl_cpd/benchmark/core/benchmark_executor.py b/pysatl_cpd/benchmark/core/benchmark_executor.py
@@ -79,11 +79,11 @@ class BenchmarkExecutor[DataT]:
 
     Parameters
     ----------
-    algorithms : list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
-        A list of tuples, where each tuple contains an instantiated online
+    algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        A sequence of tuples, where each tuple contains an instantiated online
         algorithm and a sequence of thresholds to test it against.
-    providers : list[DataProvider[DataT]]
-        A list of data providers to be fed into the algorithms.
+    providers : Sequence[DataProvider[DataT]]
+        A sequence of data providers to be fed into the algorithms.
     solver : OnlineCpdSolver
         The solver instance responsible for iterating over the data providers
         and running the algorithmic logic.
@@ -94,8 +94,8 @@ class BenchmarkExecutor[DataT]:
 
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
-        providers: list[DataProvider[DataT]],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[DataProvider[DataT]],
         solver: OnlineCpdSolver,
         dump_dir: str | Path | None = None,
     ) -> None:
diff --git a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
@@ -14,8 +15,8 @@
 class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[NoResetDetectionTrace[Any], ProviderT]):
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
-        providers: list[ProviderT],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[ProviderT],
         metrics: dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]],
         solver: OnlineCpdSolver,
         policy: ThresholdPolicy,
@@ -27,7 +28,7 @@ def _collect_runs(
         self,
         algorithm: OnlineAlgorithm[Any, Any, Any],
         threshold: float,
-        providers: list[ProviderT],
+        providers: Sequence[ProviderT],
     ) -> list[tuple[NoResetDetectionTrace[Any], ProviderT]]:
         raise NotImplementedError("Method '_collect_runs' is not implemented yet.")
 
diff --git a/pysatl_cpd/benchmark/online_benchmark_runner.py b/pysatl_cpd/benchmark/online_benchmark_runner.py
@@ -1,5 +1,15 @@
-# online_runner.py
+# -*- coding: ascii -*-
+
+"""
+Abstract base class for online benchmark runners.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
 from abc import ABC, abstractmethod
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
@@ -11,26 +21,102 @@
 
 
 class OnlineBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](ABC):
+    """
+    Abstract base class for online benchmark runners.
+
+    Organises the evaluation loop over algorithms and thresholds,
+    delegates data collection to subclasses via _collect_runs(), and
+    applies all registered metrics to each batch of runs.
+
+    Parameters
+    ----------
+    algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        Sequence of (algorithm, thresholds) pairs to evaluate.
+    providers : Sequence[ProviderT]
+        Sequence of labeled data providers.
+    metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]]
+        Named metrics to evaluate for each (algorithm, threshold) batch.
+    solver : OnlineCpdSolver
+        Solver used to run algorithms against providers.
+    dump_dir : Path | str | None, optional
+        Directory for caching results via BenchmarkExecutor.
+        If None, caching is disabled. Default is None.
+    """
+
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
-        providers: list[ProviderT],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[ProviderT],
         metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
         solver: OnlineCpdSolver,
-        dump_dir: Path | None = None,
+        dump_dir: Path | str | None = None,
     ) -> None:
-        return
+        self._algorithms = algorithms
+        self._providers = providers
+        self._metrics = metrics
+        self._solver = solver
+        self._dump_dir = Path(dump_dir) if isinstance(dump_dir, str) else dump_dir
 
     @abstractmethod
     def _collect_runs(
         self,
         algorithm: OnlineAlgorithm[Any, Any, Any],
         threshold: float,
-        providers: list[ProviderT],
+        providers: Sequence[ProviderT],
     ) -> list[tuple[TraceT, ProviderT]]:
+        """
+        Collect (trace, provider) pairs for a given algorithm and threshold.
+
+        Parameters
+        ----------
+        algorithm : OnlineAlgorithm[Any, Any, Any]
+            The algorithm to evaluate.
+        threshold : float
+            The detection threshold.
+        providers : Sequence[ProviderT]
+            Sequence of data providers to run against.
+
+        Returns
+        -------
+        list[tuple[TraceT, ProviderT]]
+            Batch of (trace, provider) pairs for metric evaluation.
+        """
+
         raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
 
     def run(
         self,
     ) -> dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]]:
-        raise NotImplementedError("Method `run` is not implemented yet.")
+        """
+        Execute the benchmark over all algorithms and thresholds.
+
+        For each (algorithm, threshold) pair, collects runs via
+        _collect_runs() and evaluates all registered metrics.
+
+        Returns
+        -------
+        dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]]
+            Mapping of (algorithm_name, configuration) to a list of
+            (threshold, {metric_name: metric_value}) entries, one per threshold.
+        """
+
+        results: dict[
+            tuple[str, OnlineAlgorithmConfiguration],
+            list[tuple[float, dict[str, Any]]],
+        ] = {}
+
+        for algorithm, thresholds in self._algorithms:
+            key: tuple[str, OnlineAlgorithmConfiguration] = (
+                str(algorithm),
+                algorithm.configuration,
+            )
+            results[key] = []
+
+            for threshold in thresholds:
+                runs = self._collect_runs(algorithm, threshold, self._providers)
+
+                metric_values: dict[str, Any] = {name: metric.evaluate(runs) for name, metric in self._metrics.items()}
+
+                results[key].append((threshold, metric_values))
+
+        return results
diff --git a/pysatl_cpd/benchmark/reset_benchmark_runner.py b/pysatl_cpd/benchmark/reset_benchmark_runner.py
@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
@@ -14,8 +15,8 @@ class ResetBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: Labeled
 ):
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
-        providers: list[ProviderT],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[ProviderT],
         metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
         solver: OnlineCpdSolver,
         dump_dir: Path | None = None,
@@ -26,6 +27,6 @@ def _collect_runs(
         self,
         algorithm: OnlineAlgorithm[Any, Any, Any],
         threshold: float,
-        providers: list[ProviderT],
+        providers: Sequence[ProviderT],
     ) -> list[tuple[TraceT, ProviderT]]:
         raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
diff --git a/tests/mocks/benchmark/mock_benchmark_runner.py b/tests/mocks/benchmark/mock_benchmark_runner.py
@@ -0,0 +1,92 @@
+# -*- coding: ascii -*-
+
+"""
+Mock OnlineBenchmarkRunner for testing.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from collections.abc import Sequence
+from pathlib import Path
+from typing import Any
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
+from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+class MockBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](
+    OnlineBenchmarkRunner[TraceT, ProviderT]
+):
+    """
+    Mock implementation of OnlineBenchmarkRunner for testing.
+
+    Records all _collect_runs calls for assertion in tests.
+    Returns a pre-configured list of runs for each call.
+
+    Parameters
+    ----------
+    algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        Sequence of (algorithm, thresholds) pairs.
+    providers : Sequence[ProviderT]
+        Sequence of data providers.
+    metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]]
+        Dictionary of metrics to evaluate.
+    solver : OnlineCpdSolver
+        Solver instance.
+    dump_dir : Path | str | None, optional
+        Directory for caching results.
+    runs_to_return : list[tuple[TraceT, ProviderT]] | None, optional
+        Pre-configured runs returned by _collect_runs.
+        If None, returns empty list.
+    """
+
+    def __init__(
+        self,
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[ProviderT],
+        metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
+        solver: OnlineCpdSolver,
+        dump_dir: Path | str | None = None,
+        runs_to_return: list[tuple[TraceT, ProviderT]] | None = None,
+    ) -> None:
+        super().__init__(
+            algorithms=algorithms,
+            providers=providers,
+            metrics=metrics,
+            solver=solver,
+            dump_dir=dump_dir,
+        )
+        self._runs_to_return: list[tuple[TraceT, ProviderT]] = runs_to_return or []
+        self.collect_runs_calls: list[tuple[OnlineAlgorithm[Any, Any, Any], float, Sequence[ProviderT]]] = []
+
+    def _collect_runs(
+        self,
+        algorithm: OnlineAlgorithm[Any, Any, Any],
+        threshold: float,
+        providers: Sequence[ProviderT],
+    ) -> list[tuple[TraceT, ProviderT]]:
+        """
+        Record the call and return pre-configured runs.
+
+        Parameters
+        ----------
+        algorithm : OnlineAlgorithm[Any, Any, Any]
+            The algorithm being evaluated.
+        threshold : float
+            The detection threshold.
+        providers : Sequence[ProviderT]
+            Sequence of data providers.
+
+        Returns
+        -------
+        list[tuple[TraceT, ProviderT]]
+            Pre-configured runs set at construction time.
+        """
+        self.collect_runs_calls.append((algorithm, threshold, providers))
+        return self._runs_to_return
diff --git a/tests/unit/benchmark/test_online_benchmark_runner.py b/tests/unit/benchmark/test_online_benchmark_runner.py