From 99ac199847b44b11ffc737e2c84655ce9d5c15d0 Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Tue, 7 Apr 2026 05:04:35 +0300
Subject: [PATCH 01/15] feat: add benchmark executor

---
 pysatl_cpd/benchmark/benchmark_executor.py | 116 +++++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 pysatl_cpd/benchmark/benchmark_executor.py

diff --git a/pysatl_cpd/benchmark/benchmark_executor.py b/pysatl_cpd/benchmark/benchmark_executor.py
new file mode 100644
index 0000000..924bdc0
--- /dev/null
+++ b/pysatl_cpd/benchmark/benchmark_executor.py
@@ -0,0 +1,116 @@
+import csv
+import math
+import pickle
+from collections.abc import Sequence
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from pysatl_cpd.core.data_providers.idata_provider import DataProvider
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+@dataclass
+class BenchmarkRecord:
+    algorithm: str
+    configuration_hash: str
+    data: str
+    threshold: float
+    trace_path: str | None = None
+
+    @property
+    def key(self) -> tuple[str, str, str, float]:
+        return (self.algorithm, self.configuration_hash, self.data, self.threshold)
+
+
+class BenchmarkExecutor[DataT]:
+    def __init__(
+        self,
+        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: list[DataProvider[DataT]],
+        solver: OnlineCpdSolver,
+        dump_dir: str | Path | None = None,
+    ) -> None:
+        self.__algorithms = algorithms
+        self.__providers = providers
+        self.__solver = solver
+        self.__dump_dir = Path(dump_dir) if dump_dir is not None else None
+
+    def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]:
+        results: list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]] = []
+        registry: dict[tuple[str, str, str, float], BenchmarkRecord] = {}
+        registry_path: Path | None = None
+
+        if self.__dump_dir is not None:
+            self.__dump_dir.mkdir(parents=True, exist_ok=True)
+            registry_path = self.__dump_dir / "benchmark_registry.csv"
+
+            if registry_path.exists():
+                with open(registry_path, encoding="utf-8") as f:
+                    reader = csv.DictReader(f)
+                    for row in reader:
+                        record = BenchmarkRecord(
+                            algorithm=row["algorithm"],
+                            configuration_hash=row["configuration_hash"],
+                            data=row["data"],
+                            threshold=float(row["threshold"]),
+                            trace_path=row["trace_path"] if row["trace_path"] else None,
+                        )
+                        registry[record.key] = record
+
+        for algorithm, thresholds in self.__algorithms:
+            algo_name = str(algorithm)
+            config_hash = str(hash(algo_name))
+
+            for provider in self.__providers:
+                data_name = provider.name
+
+                for threshold in thresholds:
+                    key = (algo_name, config_hash, data_name, float(threshold))
+
+                    if key in registry and registry[key].trace_path:
+                        trace_file = Path(registry[key].trace_path)  # type: ignore
+                        if trace_file.exists():
+                            with open(trace_file, "rb") as f:
+                                trace = pickle.load(f)
+                            results.append((registry[key], trace))
+                            continue
+
+                    steps = list(self.__solver.run(algorithm, provider, threshold))
+                    trace = OnlineDetectionTrace.from_run(steps)
+
+                    record = BenchmarkRecord(algo_name, config_hash, data_name, threshold, None)
+
+                    if self.__dump_dir is not None:
+                        safe_data_name = "".join(c if c.isalnum() else "_" for c in data_name)
+                        thr_str = "inf" if math.isinf(record.threshold) else f"{threshold:.4f}".replace(".", "_")
+                        filename = f"{algo_name}_{config_hash}_{safe_data_name}_{thr_str}.pkl"
+
+                        trace_path = self.__dump_dir / filename
+                        with open(trace_path, "wb") as f:
+                            pickle.dump(trace, f)
+
+                        record.trace_path = str(trace_path)
+                        registry[key] = record
+
+                    results.append((record, trace))
+
+            if registry_path is not None:
+                fieldnames = ["algorithm", "configuration_hash", "data", "threshold", "trace_path"]
+                with open(registry_path, mode="w", encoding="utf-8", newline="") as f:
+                    writer = csv.DictWriter(f, fieldnames=fieldnames)
+                    writer.writeheader()
+                    for rec in registry.values():
+                        writer.writerow(
+                            {
+                                "algorithm": rec.algorithm,
+                                "configuration_hash": rec.configuration_hash,
+                                "data": rec.data,
+                                "threshold": rec.threshold,
+                                "trace_path": rec.trace_path or "",
+                            }
+                        )
+
+        return results

From 74c086fb7f32e10834da05dfac56265e09395a4f Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Tue, 7 Apr 2026 05:13:01 +0300
Subject: [PATCH 02/15] docs(benchmark): benchmark executor

---
 pysatl_cpd/benchmark/benchmark_executor.py | 126 +++++++++++++++++----
 1 file changed, 104 insertions(+), 22 deletions(-)

diff --git a/pysatl_cpd/benchmark/benchmark_executor.py b/pysatl_cpd/benchmark/benchmark_executor.py
index 924bdc0..a951a01 100644
--- a/pysatl_cpd/benchmark/benchmark_executor.py
+++ b/pysatl_cpd/benchmark/benchmark_executor.py
@@ -1,4 +1,19 @@
+# -*- coding: ascii -*-
+"""
+Benchmark execution module for change-point detection algorithms.
+
+This module provides the core components for running and caching performance
+evaluations of online CPD algorithms across multiple datasets and threshold
+configurations.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
 import csv
+import hashlib
+import itertools
 import math
 import pickle
 from collections.abc import Sequence
@@ -14,6 +29,27 @@
 
 @dataclass
 class BenchmarkRecord:
+    """
+    Metadata container for a single benchmark execution.
+
+    This record uniquely identifies a benchmark run and stores the path
+    to the cached trace file if disk dumping is enabled.
+
+    Parameters
+    ----------
+    algorithm : str
+        The string identifier or name of the online algorithm.
+    configuration_hash : str
+        A hash string representing the algorithm's configuration.
+    data : str
+        The identifier or name of the dataset.
+    threshold : float
+        The detection threshold used for this specific run.
+    trace_path : str | None, default=None
+        Absolute or relative path to the serialized detection trace file,
+        if caching is enabled.
+    """
+
     algorithm: str
     configuration_hash: str
     data: str
@@ -22,10 +58,41 @@ class BenchmarkRecord:
 
     @property
     def key(self) -> tuple[str, str, str, float]:
+        """
+        Get the unique composite key for this benchmark run.
+
+        Returns
+        -------
+        tuple[str, str, str, float]
+            A tuple containing (algorithm, configuration_hash, data, threshold)
+            used for identifying the record in the registry.
+        """
         return (self.algorithm, self.configuration_hash, self.data, self.threshold)
 
 
 class BenchmarkExecutor[DataT]:
+    """
+    Orchestrator for executing change-point detection benchmarks.
+
+    Evaluates a set of algorithms across multiple data providers and thresholds
+    using a provided online solver. Supports a caching mechanism via disk dumping
+    to prevent redundant calculations on subsequent runs.
+
+    Parameters
+    ----------
+    algorithms : list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        A list of tuples, where each tuple contains an instantiated online
+        algorithm and a sequence of thresholds to test it against.
+    providers : list[DataProvider[DataT]]
+        A list of data providers to be fed into the algorithms.
+    solver : OnlineCpdSolver
+        The solver instance responsible for iterating over the data providers
+        and running the algorithmic logic.
+    dump_dir : str | Path | None, optional
+        Directory path where the benchmark registry (CSV) and serialized traces
+        (Pickle files) should be stored. If None, caching is disabled.
+    """
+
     def __init__(
         self,
         algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
@@ -39,6 +106,21 @@ def __init__(
         self.__dump_dir = Path(dump_dir) if dump_dir is not None else None
 
     def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]:
+        """
+        Execute the benchmark over all combinations of algorithms, data, and thresholds.
+
+        Iterates through the combinations of algorithms, datasets, and thresholds.
+        If disk caching (`dump_dir`) is enabled, it attempts to load previously
+        calculated traces from the registry to bypass solver execution. If a trace
+        is missing, it runs the solver, caches the resulting trace to disk, and
+        updates the CSV registry.
+
+        Returns
+        -------
+        list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]
+            A list of execution results, where each element is a pair containing
+            the benchmark metadata record and the corresponding detection trace.
+        """
         results: list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]] = []
         registry: dict[tuple[str, str, str, float], BenchmarkRecord] = {}
         registry_path: Path | None = None
@@ -60,42 +142,42 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]:
                         )
                         registry[record.key] = record
 
-        for algorithm, thresholds in self.__algorithms:
+        for (algorithm, thresholds), provider in itertools.product(self.__algorithms, self.__providers):
             algo_name = str(algorithm)
-            config_hash = str(hash(algo_name))
-
-            for provider in self.__providers:
-                data_name = provider.name
+            config_hash = str(hashlib.md5(algo_name.encode("utf-8")).hexdigest()[:8])
+            data_name = provider.name
 
-                for threshold in thresholds:
-                    key = (algo_name, config_hash, data_name, float(threshold))
+            for threshold in thresholds:
+                key = (algo_name, config_hash, data_name, float(threshold))
 
-                    if key in registry and registry[key].trace_path:
-                        trace_file = Path(registry[key].trace_path)  # type: ignore
+                if key in registry:
+                    cached_path = registry[key].trace_path
+                    if cached_path is not None:
+                        trace_file = Path(cached_path)
                         if trace_file.exists():
                             with open(trace_file, "rb") as f:
                                 trace = pickle.load(f)
                             results.append((registry[key], trace))
                             continue
 
-                    steps = list(self.__solver.run(algorithm, provider, threshold))
-                    trace = OnlineDetectionTrace.from_run(steps)
+                steps = list(self.__solver.run(algorithm, provider, threshold))
+                trace = OnlineDetectionTrace.from_run(steps)
 
-                    record = BenchmarkRecord(algo_name, config_hash, data_name, threshold, None)
+                record = BenchmarkRecord(algo_name, config_hash, data_name, threshold, None)
 
-                    if self.__dump_dir is not None:
-                        safe_data_name = "".join(c if c.isalnum() else "_" for c in data_name)
-                        thr_str = "inf" if math.isinf(record.threshold) else f"{threshold:.4f}".replace(".", "_")
-                        filename = f"{algo_name}_{config_hash}_{safe_data_name}_{thr_str}.pkl"
+                if self.__dump_dir is not None:
+                    safe_data_name = "".join(c if c.isalnum() else "_" for c in data_name)
+                    thr_str = "inf" if math.isinf(record.threshold) else f"{threshold:.4f}".replace(".", "_")
+                    filename = f"{algo_name}_{config_hash}_{safe_data_name}_{thr_str}.pkl"
 
-                        trace_path = self.__dump_dir / filename
-                        with open(trace_path, "wb") as f:
-                            pickle.dump(trace, f)
+                    trace_path = self.__dump_dir / filename
+                    with open(trace_path, "wb") as f:
+                        pickle.dump(trace, f)
 
-                        record.trace_path = str(trace_path)
-                        registry[key] = record
+                    record.trace_path = str(trace_path)
+                    registry[key] = record
 
-                    results.append((record, trace))
+                results.append((record, trace))
 
             if registry_path is not None:
                 fieldnames = ["algorithm", "configuration_hash", "data", "threshold", "trace_path"]

From 553bbfe65f62a11b0cd7e13299c658663d193d95 Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Sat, 11 Apr 2026 02:52:42 +0300
Subject: [PATCH 03/15] refactor: BenchmarkRecord and configuration_hash
 calculation

---
 pysatl_cpd/benchmark/benchmark_executor.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/pysatl_cpd/benchmark/benchmark_executor.py b/pysatl_cpd/benchmark/benchmark_executor.py
index a951a01..dec6416 100644
--- a/pysatl_cpd/benchmark/benchmark_executor.py
+++ b/pysatl_cpd/benchmark/benchmark_executor.py
@@ -12,7 +12,6 @@
 __license__ = "SPDX-License-Identifier: MIT"
 
 import csv
-import hashlib
 import itertools
 import math
 import pickle
@@ -51,19 +50,19 @@ class BenchmarkRecord:
     """
 
     algorithm: str
-    configuration_hash: str
+    configuration_hash: int
     data: str
     threshold: float
     trace_path: str | None = None
 
     @property
-    def key(self) -> tuple[str, str, str, float]:
+    def key(self) -> tuple[str, int, str, float]:
         """
         Get the unique composite key for this benchmark run.
 
         Returns
         -------
-        tuple[str, str, str, float]
+        tuple[str, int, str, float]
             A tuple containing (algorithm, configuration_hash, data, threshold)
             used for identifying the record in the registry.
         """
@@ -122,7 +121,7 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]:
             the benchmark metadata record and the corresponding detection trace.
         """
         results: list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]] = []
-        registry: dict[tuple[str, str, str, float], BenchmarkRecord] = {}
+        registry: dict[tuple[str, int, str, float], BenchmarkRecord] = {}
         registry_path: Path | None = None
 
         if self.__dump_dir is not None:
@@ -135,7 +134,7 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]:
                     for row in reader:
                         record = BenchmarkRecord(
                             algorithm=row["algorithm"],
-                            configuration_hash=row["configuration_hash"],
+                            configuration_hash=int(row["configuration_hash"]),
                             data=row["data"],
                             threshold=float(row["threshold"]),
                             trace_path=row["trace_path"] if row["trace_path"] else None,
@@ -144,7 +143,7 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]:
 
         for (algorithm, thresholds), provider in itertools.product(self.__algorithms, self.__providers):
             algo_name = str(algorithm)
-            config_hash = str(hashlib.md5(algo_name.encode("utf-8")).hexdigest()[:8])
+            config_hash = hash(algorithm.configuration)
             data_name = provider.name
 
             for threshold in thresholds:
@@ -161,7 +160,7 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]:
                             continue
 
                 steps = list(self.__solver.run(algorithm, provider, threshold))
-                trace = OnlineDetectionTrace.from_run(steps)
+                trace = OnlineDetectionTrace.from_run(steps, algo_name, config_hash)
 
                 record = BenchmarkRecord(algo_name, config_hash, data_name, threshold, None)
 

From 58abc68642531c5349a414b58b6852cf0adc6fea Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Sun, 12 Apr 2026 18:12:36 +0300
Subject: [PATCH 04/15] refactor: move benchmark_executor to bechmark/core
 module

---
 pysatl_cpd/benchmark/{ => core}/benchmark_executor.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pysatl_cpd/benchmark/{ => core}/benchmark_executor.py (100%)

diff --git a/pysatl_cpd/benchmark/benchmark_executor.py b/pysatl_cpd/benchmark/core/benchmark_executor.py
similarity index 100%
rename from pysatl_cpd/benchmark/benchmark_executor.py
rename to pysatl_cpd/benchmark/core/benchmark_executor.py

From dde042b35f63503318251600cdaa0e5d7859fdd1 Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Mon, 13 Apr 2026 11:17:21 +0300
Subject: [PATCH 05/15] feat: add interfaces and signatures for benchmark
 module

---
 pysatl_cpd/benchmark/arl_benchmark_runner.py  | 29 ++++++++++++
 .../benchmark/core/benchmark_analyzer.py      | 20 +++++++++
 .../noreset/noreset_benchmark_runner.py       | 39 ++++++++++++++++
 .../noreset/noreset_detection_trace.py        | 13 ++++++
 .../benchmark/noreset/threshold_policy.py     | 45 +++++++++++++++++++
 .../benchmark/online_benchmark_runner.py      | 36 +++++++++++++++
 .../benchmark/reset_benchmark_runner.py       | 31 +++++++++++++
 7 files changed, 213 insertions(+)
 create mode 100644 pysatl_cpd/benchmark/arl_benchmark_runner.py
 create mode 100644 pysatl_cpd/benchmark/core/benchmark_analyzer.py
 create mode 100644 pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
 create mode 100644 pysatl_cpd/benchmark/noreset/noreset_detection_trace.py
 create mode 100644 pysatl_cpd/benchmark/noreset/threshold_policy.py
 create mode 100644 pysatl_cpd/benchmark/online_benchmark_runner.py
 create mode 100644 pysatl_cpd/benchmark/reset_benchmark_runner.py

diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py
new file mode 100644
index 0000000..8646aa4
--- /dev/null
+++ b/pysatl_cpd/benchmark/arl_benchmark_runner.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+from typing import Any
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](
+    OnlineBenchmarkRunner[TraceT, ProviderT]
+):
+    def __init__(
+        self,
+        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
+        providers: list[ProviderT],
+        solver: OnlineCpdSolver,
+        dump_dir: Path | None = None,
+    ) -> None:
+        return
+
+    def _collect_runs(
+        self,
+        algorithm: OnlineAlgorithm[Any, Any, Any],
+        threshold: float,
+        providers: list[ProviderT],
+    ) -> list[tuple[TraceT, ProviderT]]:
+        raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
diff --git a/pysatl_cpd/benchmark/core/benchmark_analyzer.py b/pysatl_cpd/benchmark/core/benchmark_analyzer.py
new file mode 100644
index 0000000..fb7a511
--- /dev/null
+++ b/pysatl_cpd/benchmark/core/benchmark_analyzer.py
@@ -0,0 +1,20 @@
+from typing import Any
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmState
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+class BenchmarkAnalyzer[TraceT: OnlineDetectionTrace[OnlineAlgorithmState], ProviderT: LabeledData[Any]]:
+    def __init__(
+        self,
+        metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
+    ) -> None:
+        return
+
+    def analyze(
+        self,
+        runs: list[tuple[TraceT, ProviderT]],
+    ) -> dict[str, Any]:
+        raise NotImplementedError("Method `analyze` is not implemented yet.")
diff --git a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
new file mode 100644
index 0000000..f1af0d0
--- /dev/null
+++ b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
@@ -0,0 +1,39 @@
+from pathlib import Path
+from typing import Any
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
+from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace
+from pysatl_cpd.benchmark.noreset.threshold_policy import ThresholdPolicy
+from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[NoResetDetectionTrace[Any], ProviderT]):
+    def __init__(
+        self,
+        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
+        providers: list[ProviderT],
+        metrics: dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]],
+        solver: OnlineCpdSolver,
+        policy: ThresholdPolicy,
+        dump_dir: Path | None = None,
+    ) -> None:
+        return
+
+    def _collect_runs(
+        self,
+        algorithm: OnlineAlgorithm[Any, Any, Any],
+        threshold: float,
+        providers: list[ProviderT],
+    ) -> list[tuple[NoResetDetectionTrace[Any], ProviderT]]:
+        raise NotImplementedError("Method '_collect_runs' is not implemented yet.")
+
+    def _get_inf_trace(
+        self,
+        algorithm: OnlineAlgorithm[Any, Any, Any],
+        provider: ProviderT,
+    ) -> OnlineDetectionTrace[Any]:
+        raise NotImplementedError("Method '_get_inf_trace' is not implemented yet.")
diff --git a/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py b/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py
new file mode 100644
index 0000000..3e34f98
--- /dev/null
+++ b/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py
@@ -0,0 +1,13 @@
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmState
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+class NoResetDetectionTrace[StateT: OnlineAlgorithmState](OnlineDetectionTrace[StateT]):
+    @classmethod
+    def from_inf_trace(
+        cls,
+        source_trace: OnlineDetectionTrace[StateT],
+        detected_change_points: list[int],
+        threshold: float,
+    ) -> "NoResetDetectionTrace[StateT]":
+        raise NotImplementedError("Method 'from_inf_trace' is not implemented yet.")
diff --git a/pysatl_cpd/benchmark/noreset/threshold_policy.py b/pysatl_cpd/benchmark/noreset/threshold_policy.py
new file mode 100644
index 0000000..5c83338
--- /dev/null
+++ b/pysatl_cpd/benchmark/noreset/threshold_policy.py
@@ -0,0 +1,45 @@
+from collections.abc import Sequence
+from typing import Protocol, runtime_checkable
+
+from pysatl_cpd.core.typedefs import UnivariateNumericArray
+
+
+@runtime_checkable
+class ThresholdPolicy(Protocol):
+    def apply(
+        self,
+        detection_function: UnivariateNumericArray,
+        threshold: float,
+        change_points: Sequence[int],  # true, 1-based
+    ) -> list[int]: ...  # 1-based signal indices
+
+
+class PointBasedPolicy:
+    def __init__(self, strict: bool = True) -> None:
+        return
+
+    def apply(
+        self,
+        detection_function: UnivariateNumericArray,
+        threshold: float,
+        change_points: Sequence[int],  # true, 1-based
+    ) -> list[int]:
+        raise NotImplementedError("Method `apply` is not implemented yet.")
+
+
+class EventBasedPolicy:
+    def __init__(
+        self,
+        max_delay: int,
+        strict_edge: bool = True,
+        strict_point: bool = True,
+    ) -> None:
+        return
+
+    def apply(
+        self,
+        detection_function: UnivariateNumericArray,
+        threshold: float,
+        change_points: Sequence[int],  # true, 1-based
+    ) -> list[int]:
+        raise NotImplementedError("Method `apply` is not implemented yet.")
diff --git a/pysatl_cpd/benchmark/online_benchmark_runner.py b/pysatl_cpd/benchmark/online_benchmark_runner.py
new file mode 100644
index 0000000..03391df
--- /dev/null
+++ b/pysatl_cpd/benchmark/online_benchmark_runner.py
@@ -0,0 +1,36 @@
+# online_runner.py
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm, OnlineAlgorithmConfiguration
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+class OnlineBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](ABC):
+    def __init__(
+        self,
+        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
+        providers: list[ProviderT],
+        metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
+        solver: OnlineCpdSolver,
+        dump_dir: Path | None = None,
+    ) -> None:
+        return
+
+    @abstractmethod
+    def _collect_runs(
+        self,
+        algorithm: OnlineAlgorithm[Any, Any, Any],
+        threshold: float,
+        providers: list[ProviderT],
+    ) -> list[tuple[TraceT, ProviderT]]:
+        raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
+
+    def run(
+        self,
+    ) -> dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]]:
+        raise NotImplementedError("Method `run` is not implemented yet.")
diff --git a/pysatl_cpd/benchmark/reset_benchmark_runner.py b/pysatl_cpd/benchmark/reset_benchmark_runner.py
new file mode 100644
index 0000000..960044b
--- /dev/null
+++ b/pysatl_cpd/benchmark/reset_benchmark_runner.py
@@ -0,0 +1,31 @@
+from pathlib import Path
+from typing import Any
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
+from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+class ResetBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](
+    OnlineBenchmarkRunner[TraceT, ProviderT]
+):
+    def __init__(
+        self,
+        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
+        providers: list[ProviderT],
+        metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
+        solver: OnlineCpdSolver,
+        dump_dir: Path | None = None,
+    ) -> None:
+        return
+
+    def _collect_runs(
+        self,
+        algorithm: OnlineAlgorithm[Any, Any, Any],
+        threshold: float,
+        providers: list[ProviderT],
+    ) -> list[tuple[TraceT, ProviderT]]:
+        raise NotImplementedError("Method `_collect_runs` is not implemented yet.")

From e982f55eb6ff3cc709972ee0f63d06237cd5e144 Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Mon, 13 Apr 2026 14:17:42 +0300
Subject: [PATCH 06/15] fix: bug in benchmark executor with saving registry

---
 .../benchmark/core/benchmark_executor.py      | 30 +++++++++----------
 .../classification/classification_report.py   |  2 --
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/pysatl_cpd/benchmark/core/benchmark_executor.py b/pysatl_cpd/benchmark/core/benchmark_executor.py
index dec6416..15b6ddb 100644
--- a/pysatl_cpd/benchmark/core/benchmark_executor.py
+++ b/pysatl_cpd/benchmark/core/benchmark_executor.py
@@ -178,20 +178,20 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]:
 
                 results.append((record, trace))
 
-            if registry_path is not None:
-                fieldnames = ["algorithm", "configuration_hash", "data", "threshold", "trace_path"]
-                with open(registry_path, mode="w", encoding="utf-8", newline="") as f:
-                    writer = csv.DictWriter(f, fieldnames=fieldnames)
-                    writer.writeheader()
-                    for rec in registry.values():
-                        writer.writerow(
-                            {
-                                "algorithm": rec.algorithm,
-                                "configuration_hash": rec.configuration_hash,
-                                "data": rec.data,
-                                "threshold": rec.threshold,
-                                "trace_path": rec.trace_path or "",
-                            }
-                        )
+        if registry_path is not None:
+            fieldnames = ["algorithm", "configuration_hash", "data", "threshold", "trace_path"]
+            with open(registry_path, mode="w", encoding="utf-8", newline="") as f:
+                writer = csv.DictWriter(f, fieldnames=fieldnames)
+                writer.writeheader()
+                for rec in registry.values():
+                    writer.writerow(
+                        {
+                            "algorithm": rec.algorithm,
+                            "configuration_hash": rec.configuration_hash,
+                            "data": rec.data,
+                            "threshold": rec.threshold,
+                            "trace_path": rec.trace_path or "",
+                        }
+                    )
 
         return results
diff --git a/pysatl_cpd/benchmark/metrics/classification/classification_report.py b/pysatl_cpd/benchmark/metrics/classification/classification_report.py
index be78890..f1ff239 100644
--- a/pysatl_cpd/benchmark/metrics/classification/classification_report.py
+++ b/pysatl_cpd/benchmark/metrics/classification/classification_report.py
@@ -57,6 +57,4 @@ def aggregate(self, values: Sequence[dict[str, float]]) -> dict[str, float]:
         recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0
         f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
 
-        print(total_fp)
-
         return {"tp": total_tp, "fp": total_fp, "fn": total_fn, "precision": precision, "recall": recall, "f1": f1}

From 39c690322b2be0ddccafde985424416c25fea032 Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Mon, 13 Apr 2026 18:31:28 +0300
Subject: [PATCH 07/15] feat: add threshold policy for NoResetBenchmark

---
 .../benchmark/noreset/threshold_policy.py     | 249 ++++++++++-
 .../noreset/test_threshold_policy.py          | 416 ++++++++++++++++++
 2 files changed, 656 insertions(+), 9 deletions(-)
 create mode 100644 tests/unit/benchmark/noreset/test_threshold_policy.py

diff --git a/pysatl_cpd/benchmark/noreset/threshold_policy.py b/pysatl_cpd/benchmark/noreset/threshold_policy.py
index 5c83338..cae77e4 100644
--- a/pysatl_cpd/benchmark/noreset/threshold_policy.py
+++ b/pysatl_cpd/benchmark/noreset/threshold_policy.py
@@ -1,45 +1,276 @@
+# pysatl_cpd/benchmark/noreset/threshold_policy.py
+
+"""
+Threshold policies for signal extraction in NoReset benchmark.
+
+This module provides the ThresholdPolicy protocol and two concrete
+implementations: PointBasedPolicy and EventBasedPolicy.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
 from collections.abc import Sequence
-from typing import Protocol, runtime_checkable
+from typing import Protocol, cast, runtime_checkable
+
+import numpy as np
 
 from pysatl_cpd.core.typedefs import UnivariateNumericArray
 
 
 @runtime_checkable
 class ThresholdPolicy(Protocol):
+    """
+    Protocol for signal extraction from a detection function.
+
+    Implementations define how to convert a raw detection function array
+    into a list of signal indices given a threshold and known change points.
+    """
+
     def apply(
         self,
         detection_function: UnivariateNumericArray,
         threshold: float,
-        change_points: Sequence[int],  # true, 1-based
-    ) -> list[int]: ...  # 1-based signal indices
+        change_points: Sequence[int],
+    ) -> list[int]:
+        """
+        Extract signal indices from the detection function.
+
+        Parameters
+        ----------
+        detection_function : UnivariateNumericArray
+            Array of detection statistic values, one per time step.
+        threshold : float
+            Detection threshold.
+        change_points : Sequence[int]
+            True change point indices (1-based). Used by some policies
+            to define delay windows.
+
+        Returns
+        -------
+        list[int]
+            1-based indices where signals were detected.
+        """
+        ...
 
 
 class PointBasedPolicy:
+    """
+    Signal extraction policy based on point-wise threshold comparison.
+
+    Any position where the detection function satisfies the threshold
+    condition is considered a signal. The change_points argument is
+    accepted for interface compatibility but is ignored.
+
+    Parameters
+    ----------
+    strict : bool, default=True
+        If True, signal condition is detection_function > threshold.
+        If False, signal condition is detection_function >= threshold.
+    """
+
     def __init__(self, strict: bool = True) -> None:
-        return
+        self.strict = strict
+
+    @staticmethod
+    def _exceeds(arr: np.ndarray, threshold: float, strict: bool) -> np.ndarray:
+        """
+        Check whether array values exceed threshold.
+
+        Parameters
+        ----------
+        arr : np.ndarray
+            Array of values to check.
+        threshold : float
+            Threshold value.
+        strict : bool
+            If True, uses strict inequality (>).
+            If False, uses non-strict inequality (>=).
+
+        Returns
+        -------
+        np.ndarray
+            Boolean array.
+        """
+        return arr > threshold if strict else arr >= threshold
 
     def apply(
         self,
         detection_function: UnivariateNumericArray,
         threshold: float,
-        change_points: Sequence[int],  # true, 1-based
+        change_points: Sequence[int],
     ) -> list[int]:
-        raise NotImplementedError("Method `apply` is not implemented yet.")
+        """
+        Return 1-based indices where detection function exceeds threshold.
+
+        Parameters
+        ----------
+        detection_function : UnivariateNumericArray
+            Array of detection statistic values.
+        threshold : float
+            Detection threshold.
+        change_points : Sequence[int]
+            Ignored. Present for interface compatibility.
+
+        Returns
+        -------
+        list[int]
+            Sorted list of 1-based signal indices.
+        """
+        if len(detection_function) == 0:
+            return []
+
+        res = (np.where(self._exceeds(detection_function, threshold, self.strict))[0] + 1).tolist()
+        return cast(list[int], res)
 
 
 class EventBasedPolicy:
+    """
+    Signal extraction policy based on rising-edge detection with delay windows.
+
+    In normal (edge) mode, a signal is produced only when the detection
+    function crosses the threshold from below (rising edge). Inside delay
+    windows [true_cp, true_cp + max_delay] (1-based, inclusive), the policy
+    switches to point-based mode to correctly capture detection delay.
+
+    The previous value used for edge detection (prev) is tracked continuously,
+    including values inside delay windows (variant A). This means that if the
+    detection function is above threshold at the end of a window, the first
+    element after the window will not produce an edge signal.
+
+    For the first element, prev is treated as -inf (always below threshold).
+
+    Parameters
+    ----------
+    max_delay : int
+        Maximum allowable detection delay. Defines the right boundary of
+        the delay window as true_cp + max_delay (inclusive). Must be >= 0.
+    strict_edge : bool, default=True
+        If True, rising edge condition requires detection_function > threshold.
+        If False, condition is detection_function >= threshold.
+        prev is always checked with strict inequality (prev < threshold).
+    strict_point : bool, default=True
+        If True, point-based condition in delay window is
+        detection_function > threshold.
+        If False, condition is detection_function >= threshold.
+
+    Raises
+    ------
+    ValueError
+        If max_delay is negative.
+    """
+
     def __init__(
         self,
         max_delay: int,
         strict_edge: bool = True,
         strict_point: bool = True,
     ) -> None:
-        return
+        if max_delay < 0:
+            raise ValueError(f"max_delay must be non-negative, got {max_delay}")
+        self.max_delay = max_delay
+        self.strict_edge = strict_edge
+        self.strict_point = strict_point
+
+    @staticmethod
+    def _exceeds(arr: np.ndarray, threshold: float, strict: bool) -> np.ndarray:
+        """
+        Check whether array values exceed threshold.
+
+        Parameters
+        ----------
+        arr : np.ndarray
+            Array of values to check.
+        threshold : float
+            Threshold value.
+        strict : bool
+            If True, uses strict inequality (>).
+            If False, uses non-strict inequality (>=).
+
+        Returns
+        -------
+        np.ndarray
+            Boolean array.
+        """
+        return arr > threshold if strict else arr >= threshold
+
+    def _build_window_mask(
+        self,
+        length: int,
+        change_points: Sequence[int],
+    ) -> np.ndarray:
+        """
+        Build a boolean mask indicating which 0-based indices are in delay windows.
+
+        Uses cumsum trick for fully vectorized computation over change points.
+
+        Parameters
+        ----------
+        length : int
+            Length of the detection function array.
+        change_points : Sequence[int]
+            True change point indices (1-based).
+
+        Returns
+        -------
+        np.ndarray
+            Boolean array of shape (length,) where True means the position
+            is inside a delay window.
+        """
+        if not change_points:
+            return np.zeros(length, dtype=bool)
+
+        lefts = np.clip(np.array(change_points, dtype=int) - 1, 0, length - 1)
+        rights = np.clip(lefts + self.max_delay, 0, length - 1)
+
+        marker = np.zeros(length + 1, dtype=int)
+        np.add.at(marker, lefts, 1)
+        np.add.at(marker, rights + 1, -1)
+        return np.cumsum(marker)[:length] > 0
 
     def apply(
         self,
         detection_function: UnivariateNumericArray,
         threshold: float,
-        change_points: Sequence[int],  # true, 1-based
+        change_points: Sequence[int],
     ) -> list[int]:
-        raise NotImplementedError("Method `apply` is not implemented yet.")
+        """
+        Extract signal indices using rising-edge detection with delay windows.
+
+        Fully vectorized implementation using numpy masks.
+
+        Parameters
+        ----------
+        detection_function : UnivariateNumericArray
+            Array of detection statistic values.
+        threshold : float
+            Detection threshold.
+        change_points : Sequence[int]
+            True change point indices (1-based). Used to define delay windows
+            where point-based mode is applied.
+
+        Returns
+        -------
+        list[int]
+            Sorted list of 1-based signal indices.
+        """
+        n = len(detection_function)
+        if n == 0:
+            return []
+
+        window_mask = self._build_window_mask(n, change_points)
+
+        # prev[i] = df[i-1], prev[0] = -inf
+        prev = np.empty(n, dtype=detection_function.dtype)
+        prev[0] = float("-inf")
+        prev[1:] = detection_function[:-1]
+
+        # edge signals: rising edge outside windows
+        edge = (prev < threshold) & self._exceeds(detection_function, threshold, self.strict_edge) & ~window_mask
+
+        # point signals: threshold exceeded inside windows
+        point = self._exceeds(detection_function, threshold, self.strict_point) & window_mask
+
+        res = (np.where(edge | point)[0] + 1).tolist()
+        return cast(list[int], res)
diff --git a/tests/unit/benchmark/noreset/test_threshold_policy.py b/tests/unit/benchmark/noreset/test_threshold_policy.py
new file mode 100644
index 0000000..cfd1ad4
--- /dev/null
+++ b/tests/unit/benchmark/noreset/test_threshold_policy.py
@@ -0,0 +1,416 @@
+# tests/benchmark/noreset/test_threshold_policy.py
+
+"""Tests for ThresholdPolicy implementations."""
+
+import numpy as np
+import pytest
+
+from pysatl_cpd.benchmark.noreset.threshold_policy import (
+    EventBasedPolicy,
+    PointBasedPolicy,
+    ThresholdPolicy,
+)
+from pysatl_cpd.core.typedefs import UnivariateNumericArray
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def make_df(*values: float) -> UnivariateNumericArray:
+    """Create a UnivariateNumericArray from float values."""
+    return np.array(values, dtype=np.float64)
+
+
+# ---------------------------------------------------------------------------
+# TestThresholdProtocol
+# ---------------------------------------------------------------------------
+
+
+class TestThresholdProtocol:
+    """Tests that concrete policies satisfy the ThresholdPolicy protocol."""
+
+    def test_point_based_implements_protocol(self) -> None:
+        """PointBasedPolicy must be recognised as ThresholdPolicy at runtime."""
+        policy: PointBasedPolicy = PointBasedPolicy()
+        assert isinstance(policy, ThresholdPolicy)
+
+    def test_event_based_implements_protocol(self) -> None:
+        """EventBasedPolicy must be recognised as ThresholdPolicy at runtime."""
+        policy: EventBasedPolicy = EventBasedPolicy(max_delay=5)
+        assert isinstance(policy, ThresholdPolicy)
+
+
+# ---------------------------------------------------------------------------
+# TestPointBasedPolicyInit
+# ---------------------------------------------------------------------------
+
+
+class TestPointBasedPolicyInit:
+    """Tests for PointBasedPolicy constructor."""
+
+    def test_default_strict_is_true(self) -> None:
+        """Default strict parameter must be True."""
+        policy: PointBasedPolicy = PointBasedPolicy()
+        assert policy.strict is True
+
+    def test_explicit_strict_false(self) -> None:
+        """Explicit strict=False must be stored correctly."""
+        policy: PointBasedPolicy = PointBasedPolicy(strict=False)
+        assert policy.strict is False
+
+
+# ---------------------------------------------------------------------------
+# TestPointBasedPolicyApply
+# ---------------------------------------------------------------------------
+
+
+class TestPointBasedPolicyApply:
+    """Tests for PointBasedPolicy.apply — parametrized over common cases."""
+
+    @pytest.mark.parametrize(
+        "values, threshold, strict, change_points, expected",
+        [
+            # no signals — all below threshold
+            ([0.1, 0.2, 0.3], 1.0, True, [], []),
+            # all signals — all strictly above threshold
+            ([2.0, 3.0, 4.0], 1.0, True, [], [1, 2, 3]),
+            # strict=True: equal value is NOT a signal
+            ([1.0, 2.0, 1.0], 1.0, True, [], [2]),
+            # strict=False: equal value IS a signal
+            ([1.0, 2.0, 0.5], 1.0, False, [], [1, 2]),
+            # empty detection function
+            ([], 1.0, True, [], []),
+            # single element — signal
+            ([5.0], 1.0, True, [], [1]),
+            # single element — no signal
+            ([0.5], 1.0, True, [], []),
+            # indices are 1-based
+            ([0.0, 0.0, 5.0, 0.0, 5.0], 1.0, True, [], [3, 5]),
+            # change_points present but do not affect result
+            ([2.0, 0.5, 2.0], 1.0, True, [2], [1, 3]),
+        ],
+        ids=[
+            "all_below",
+            "all_above_strict",
+            "strict_true_excludes_equal",
+            "strict_false_includes_equal",
+            "empty_df",
+            "single_signal",
+            "single_no_signal",
+            "returns_1based_indices",
+            "change_points_do_not_affect",
+        ],
+    )
+    def test_apply(
+        self,
+        values: list[float],
+        threshold: float,
+        strict: bool,
+        change_points: list[int],
+        expected: list[int],
+    ) -> None:
+        """
+        PointBasedPolicy.apply must return 1-based signal indices.
+
+        Any position where detection_function satisfies the threshold
+        condition (strict or non-strict) is a signal. change_points
+        are accepted but ignored.
+        """
+        policy: PointBasedPolicy = PointBasedPolicy(strict=strict)
+        df: UnivariateNumericArray = make_df(*values)
+        result: list[int] = policy.apply(df, threshold, change_points)
+        assert result == expected
+
+
+# ---------------------------------------------------------------------------
+# TestEventBasedPolicyInit
+# ---------------------------------------------------------------------------
+
+
+class TestEventBasedPolicyInit:
+    """Tests for EventBasedPolicy constructor."""
+
+    def test_valid_init_stores_fields(self) -> None:
+        """
+        Constructor must store max_delay, strict_edge, strict_point correctly.
+
+        Default strict_edge=True, strict_point=True.
+        """
+        policy: EventBasedPolicy = EventBasedPolicy(max_delay=5)
+        assert policy.max_delay == 5
+        assert policy.strict_edge is True
+        assert policy.strict_point is True
+
+    def test_explicit_strict_values_stored(self) -> None:
+        """Explicit strict_edge=False and strict_point=False must be stored."""
+        policy: EventBasedPolicy = EventBasedPolicy(
+            max_delay=3,
+            strict_edge=False,
+            strict_point=False,
+        )
+        assert policy.max_delay == 3
+        assert policy.strict_edge is False
+        assert policy.strict_point is False
+
+    def test_negative_max_delay_raises(self) -> None:
+        """Negative max_delay must raise ValueError."""
+        with pytest.raises(ValueError):
+            EventBasedPolicy(max_delay=-1)
+
+    def test_zero_max_delay_is_valid(self) -> None:
+        """max_delay=0 means only the change point itself is in the window."""
+        policy: EventBasedPolicy = EventBasedPolicy(max_delay=0)
+        assert policy.max_delay == 0
+
+
+# ---------------------------------------------------------------------------
+# TestEventBasedPolicyApplyEdgeMode
+# ---------------------------------------------------------------------------
+
+
+class TestEventBasedPolicyApplyEdgeMode:
+    """Tests for edge (rising-edge) detection mode — no delay windows active."""
+
+    @pytest.mark.parametrize(
+        "values, threshold, strict_edge, change_points, expected",
+        [
+            # basic rising edge detected
+            # idx2: prev=0.0<1.0, 2.0>1.0 -> signal
+            ([0.0, 0.0, 2.0, 2.0], 1.0, True, [], [3]),
+            # no repeat signal while staying above threshold
+            # idx2: rising edge -> signal, idx3,4: prev>=threshold -> no signal
+            ([0.0, 2.0, 3.0, 4.0], 1.0, True, [], [2]),
+            # falling then rising produces second signal
+            # idx2: rising [2], idx3: falling, idx4: rising [4]
+            ([0.0, 2.0, 0.0, 2.0], 1.0, True, [], [2, 4]),
+            # strict_edge=True: prev=0.5<1.0, curr=1.0, 1.0>1.0 False -> no signal
+            ([0.5, 1.0, 0.5], 1.0, True, [], []),
+            # strict_edge=False: prev=0.5<1.0, curr=1.0, 1.0>=1.0 True -> signal
+            ([0.5, 1.0, 0.5], 1.0, False, [], [2]),
+            # first element above threshold: prev=-inf<1.0, 2.0>1.0 -> signal
+            ([2.0, 0.0, 0.0], 1.0, True, [], [1]),
+            # first element equal threshold, strict=False: prev=-inf<1.0, 1.0>=1.0 -> signal
+            ([1.0, 0.0], 1.0, False, [], [1]),
+            # first element equal threshold, strict=True: 1.0>1.0 False -> no signal
+            ([1.0, 0.0], 1.0, True, [], []),
+            # returns 1-based indices
+            ([0.5, 2.0, 0.5], 1.0, True, [], [2]),
+            # empty detection function
+            ([], 1.0, True, [], []),
+        ],
+        ids=[
+            "basic_rising_edge",
+            "no_repeat_while_above",
+            "falling_then_rising",
+            "strict_edge_true_equal_not_signal",
+            "strict_edge_false_equal_is_signal",
+            "first_element_above_is_signal",
+            "first_element_equal_strict_false",
+            "first_element_equal_strict_true",
+            "returns_1based_indices",
+            "empty_df",
+        ],
+    )
+    def test_edge_mode(
+        self,
+        values: list[float],
+        threshold: float,
+        strict_edge: bool,
+        change_points: list[int],
+        expected: list[int],
+    ) -> None:
+        """
+        In edge mode (no delay windows), only rising-edge crossings are signals.
+
+        prev is -inf for the first element. strict_edge controls whether
+        the crossing condition uses strict (>) or non-strict (>=) inequality
+        for the current value. prev is always checked with strict (<).
+        """
+        policy: EventBasedPolicy = EventBasedPolicy(
+            max_delay=0,
+            strict_edge=strict_edge,
+            strict_point=True,
+        )
+        df: UnivariateNumericArray = make_df(*values)
+        result: list[int] = policy.apply(df, threshold, change_points)
+        assert result == expected
+
+
+# ---------------------------------------------------------------------------
+# TestEventBasedPolicyApplyDelayWindow
+# ---------------------------------------------------------------------------
+
+
+class TestEventBasedPolicyApplyDelayWindow:
+    """Tests for point-based mode inside delay windows [true_cp, true_cp + max_delay]."""
+
+    @pytest.mark.parametrize(
+        "values, threshold, change_points, max_delay, strict_point, expected",
+        [
+            # all above in window — all are signals
+            # cp=3, max_delay=2 -> window [3,5] (1-based, inclusive)
+            # idx3=2.0, idx4=2.0, idx5=2.0 -> all signals
+            ([0.0, 0.0, 2.0, 2.0, 2.0], 1.0, [3], 2, True, [3, 4, 5]),
+            # partial signals in window
+            # idx3=2.0 signal, idx4=0.5 no, idx5=2.0 signal
+            ([0.0, 0.0, 2.0, 0.5, 2.0], 1.0, [3], 2, True, [3, 5]),
+            # strict_point=True: equal not a signal in window
+            # window [3,5], idx3=1.0, idx4=1.0: 1.0>1.0 False -> no signals
+            ([0.0, 0.0, 1.0, 1.0, 0.0], 1.0, [3], 2, True, []),
+            # strict_point=False: equal IS a signal in window
+            # window [3,5], idx3=1.0, idx4=1.0: 1.0>=1.0 True -> signals
+            ([0.0, 0.0, 1.0, 1.0, 0.0], 1.0, [3], 2, False, [3, 4]),
+            # max_delay=0: window is just [cp, cp] — single point
+            # cp=3, window={3}, idx3=2.0 -> signal
+            ([0.0, 0.0, 2.0, 0.0], 1.0, [3], 0, True, [3]),
+            # right boundary is INCLUSIVE: cp=3, max_delay=2 -> idx5 in window
+            ([0.0, 0.0, 0.0, 0.0, 2.0], 1.0, [3], 2, True, [5]),
+            # two change points — two windows
+            # cp=[2,5], max_delay=1 -> windows [2,3] and [5,6]
+            # idx2=2.0, idx3=2.0, idx5=2.0, idx6=2.0 -> all signals
+            ([0.0, 2.0, 2.0, 0.0, 2.0, 2.0], 1.0, [2, 5], 1, True, [2, 3, 5, 6]),
+        ],
+        ids=[
+            "all_above_in_window",
+            "partial_signals_in_window",
+            "strict_point_true_equal_not_signal",
+            "strict_point_false_equal_is_signal",
+            "max_delay_zero_single_point",
+            "right_boundary_inclusive",
+            "two_change_points_two_windows",
+        ],
+    )
+    def test_delay_window(
+        self,
+        values: list[float],
+        threshold: float,
+        change_points: list[int],
+        max_delay: int,
+        strict_point: bool,
+        expected: list[int],
+    ) -> None:
+        """
+        Inside [true_cp, true_cp + max_delay] policy uses point-based mode.
+
+        strict_point controls whether equal values are signals.
+        Right boundary is inclusive. change_points are 1-based.
+        """
+        policy: EventBasedPolicy = EventBasedPolicy(
+            max_delay=max_delay,
+            strict_edge=True,
+            strict_point=strict_point,
+        )
+        df: UnivariateNumericArray = make_df(*values)
+        result: list[int] = policy.apply(df, threshold, change_points)
+        assert result == expected
+
+
+# ---------------------------------------------------------------------------
+# TestEventBasedPolicyApplyMixed
+# ---------------------------------------------------------------------------
+
+
+class TestEventBasedPolicyApplyMixed:
+    """Tests combining edge mode and delay windows in the same series."""
+
+    @pytest.mark.parametrize(
+        "values, threshold, change_points, max_delay, strict_edge, strict_point, expected",
+        [
+            # edge signal before window, point-based inside window
+            # df=[0.0, 2.0, 0.0, 0.0, 2.0, 2.0], cp=[4], max_delay=1
+            # window=[4,5]
+            # idx1: edge, 0.0<=1.0 -> no
+            # idx2: edge, prev=0.0<1.0, 2.0>1.0 -> signal [2]
+            # idx3: edge, prev=2.0>=1.0 -> no (not rising)
+            # idx4: window, 0.0<=1.0 -> no
+            # idx5: window, 2.0>1.0 -> signal [5]
+            # idx6: edge, prev=df[4]=2.0>=1.0 (variant A) -> no
+            (
+                [0.0, 2.0, 0.0, 0.0, 2.0, 2.0],
+                1.0,
+                [4],
+                1,
+                True,
+                True,
+                [2, 5],
+            ),
+            # signal before window is independent of window detection
+            # df=[0.0, 2.0, 0.0, 2.0], cp=[4], max_delay=1
+            # window=[4,4] (df length=4, so only idx4)
+            # idx2: edge -> signal [2]
+            # idx4: window, 2.0>1.0 -> signal [4]
+            (
+                [0.0, 2.0, 0.0, 2.0],
+                1.0,
+                [4],
+                1,
+                True,
+                True,
+                [2, 4],
+            ),
+            # edge resets correctly after window (variant A: prev=last window value)
+            # df=[0.0, 0.0, 2.0, 0.0, 0.0, 2.0], cp=[3], max_delay=0
+            # window={3}
+            # idx3: window, 2.0>1.0 -> signal [3]
+            # idx4: edge, prev=df[2]=2.0>=1.0 -> no (not rising)
+            # idx5: edge, prev=0.0<1.0, 0.0<=1.0 -> no
+            # idx6: edge, prev=0.0<1.0, 2.0>1.0 -> signal [6]
+            (
+                [0.0, 0.0, 2.0, 0.0, 0.0, 2.0],
+                1.0,
+                [3],
+                0,
+                True,
+                True,
+                [3, 6],
+            ),
+            # after window: value stays above threshold — no edge signal (variant A)
+            # df=[0.0, 0.0, 2.0, 2.0, 2.0], cp=[3], max_delay=1
+            # window=[3,4]
+            # idx3: window, 2.0>1.0 -> signal [3]
+            # idx4: window, 2.0>1.0 -> signal [4]
+            # idx5: edge, prev=df[3]=2.0>=1.0 (variant A) -> no signal
+            (
+                [0.0, 0.0, 2.0, 2.0, 2.0],
+                1.0,
+                [3],
+                1,
+                True,
+                True,
+                [3, 4],
+            ),
+        ],
+        ids=[
+            "edge_before_and_point_inside_window",
+            "signal_before_window_independent",
+            "edge_resets_after_window",
+            "after_window_above_no_edge_signal",
+        ],
+    )
+    def test_mixed(
+        self,
+        values: list[float],
+        threshold: float,
+        change_points: list[int],
+        max_delay: int,
+        strict_edge: bool,
+        strict_point: bool,
+        expected: list[int],
+    ) -> None:
+        """
+        Edge mode and delay windows must work correctly together.
+
+        prev (for edge detection) tracks the last seen value including
+        values inside the window (variant A). This means that if the
+        detection function is above threshold at the end of a window,
+        the first element after the window will NOT produce an edge signal.
+        """
+        policy: EventBasedPolicy = EventBasedPolicy(
+            max_delay=max_delay,
+            strict_edge=strict_edge,
+            strict_point=strict_point,
+        )
+        df: UnivariateNumericArray = make_df(*values)
+        result: list[int] = policy.apply(df, threshold, change_points)
+        assert result == expected

From 6803c09ea0fb78f788039852a97f91b845478667 Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Mon, 13 Apr 2026 22:30:22 +0300
Subject: [PATCH 08/15] feat: add NoResetDetectionTrace

---
 .../noreset/noreset_detection_trace.py        |  75 +++++++-
 .../noreset/test_noreset_detection_trace.py   | 171 ++++++++++++++++++
 2 files changed, 245 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/benchmark/noreset/test_noreset_detection_trace.py

diff --git a/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py b/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py
index 3e34f98..acc1e9a 100644
--- a/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py
+++ b/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py
@@ -1,8 +1,40 @@
+# -*- coding: ascii -*-
+
+"""
+NoReset detection trace container.
+
+This module provides NoResetDetectionTrace - a lightweight trace produced
+by applying a ThresholdPolicy to a pre-computed infinite-threshold trace,
+avoiding redundant solver executions.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from typing import cast
+
+import numpy as np
+
 from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmState
 from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+from pysatl_cpd.core.typedefs import UnivariateNumericArray
 
 
 class NoResetDetectionTrace[StateT: OnlineAlgorithmState](OnlineDetectionTrace[StateT]):
+    """
+    Detection trace produced by the NoReset benchmark strategy.
+
+    Instead of re-running the solver for every threshold, a single
+    infinite-threshold trace is computed once and this class wraps it
+    with new detected change points obtained by applying a ThresholdPolicy.
+
+    Auxiliary fields (processing_time, algorithm_states, skip_periods,
+    learning_periods, forced_change_points, signal_change_points) are
+    intentionally left empty - only detection_function and
+    detected_change_points carry meaningful data.
+    """
+
     @classmethod
     def from_inf_trace(
         cls,
@@ -10,4 +42,45 @@ def from_inf_trace(
         detected_change_points: list[int],
         threshold: float,
     ) -> "NoResetDetectionTrace[StateT]":
-        raise NotImplementedError("Method 'from_inf_trace' is not implemented yet.")
+        """
+        Construct a NoResetDetectionTrace from an infinite-threshold trace.
+
+        Copies detection_function, algorithm_name, and configuration_hash
+        from source_trace. All other fields are set to empty defaults.
+
+        Parameters
+        ----------
+        source_trace : OnlineDetectionTrace[StateT]
+            The pre-computed trace obtained by running the solver with
+            threshold=inf. Its detection_function is reused for all
+            threshold simulations.
+        detected_change_points : list[int]
+            Change point indices produced by applying a ThresholdPolicy
+            to source_trace.detection_function at a specific threshold.
+        threshold : float
+            The threshold value used to extract detected_change_points.
+
+        Returns
+        -------
+        NoResetDetectionTrace[StateT]
+            A new trace with the given change points and copied
+            detection function.
+        """
+        empty_processing_time: UnivariateNumericArray = cast(
+            UnivariateNumericArray,
+            np.array([], dtype=np.float64),
+        )
+
+        return cls(
+            algorithm_name=source_trace.algorithm_name,
+            configuration_hash=source_trace.configuration_hash,
+            detected_change_points=detected_change_points,
+            threshold=threshold,
+            detection_function=source_trace.detection_function.copy(),
+            processing_time=empty_processing_time,
+            algorithm_states=[],
+            skip_periods=[],
+            learning_periods=[],
+            forced_change_points=[],
+            signal_change_points=[],
+        )
diff --git a/tests/unit/benchmark/noreset/test_noreset_detection_trace.py b/tests/unit/benchmark/noreset/test_noreset_detection_trace.py
new file mode 100644
index 0000000..588ce46
--- /dev/null
+++ b/tests/unit/benchmark/noreset/test_noreset_detection_trace.py
@@ -0,0 +1,171 @@
+# -*- coding: ascii -*-
+
+"""
+Unit tests for NoResetDetectionTrace[Any].
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from typing import Any
+
+import pytest
+
+from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace
+from pysatl_cpd.core.detection_trace import DetectionTrace
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace
+
+
+@pytest.fixture
+def source_trace() -> MockOnlineDetectionTrace:
+    """
+    Source OnlineDetectionTrace with real detection function values.
+    """
+    trace = MockOnlineDetectionTrace(detected_change_points=[5, 10])
+    return trace
+
+
+@pytest.fixture
+def new_change_points() -> list[int]:
+    """New detected change points to assign to NoResetDetectionTrace[Any]."""
+    return [3, 7]
+
+
+class TestNoResetDetectionTraceFromInfTrace:
+    """Tests for NoResetDetectionTrace[Any].from_inf_trace factory method."""
+
+    def test_detected_change_points_and_threshold_are_set(
+        self,
+        source_trace: MockOnlineDetectionTrace,
+        new_change_points: list[int],
+    ) -> None:
+        """New detected_change_points and threshold are stored correctly."""
+        threshold: float = 1.0
+        trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace(
+            source_trace=source_trace,
+            detected_change_points=new_change_points,
+            threshold=threshold,
+        )
+        assert list(trace.detected_change_points) == new_change_points
+        assert trace.threshold == threshold
+
+    def test_algorithm_name_and_configuration_hash_are_copied(
+        self,
+        source_trace: MockOnlineDetectionTrace,
+        new_change_points: list[int],
+    ) -> None:
+        """algorithm_name and configuration_hash are copied from source_trace."""
+        trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace(
+            source_trace=source_trace,
+            detected_change_points=new_change_points,
+            threshold=1.0,
+        )
+        assert trace.algorithm_name == source_trace.algorithm_name
+        assert trace.configuration_hash == source_trace.configuration_hash
+
+    def test_auxiliary_fields_are_empty(
+        self,
+        source_trace: MockOnlineDetectionTrace,
+        new_change_points: list[int],
+    ) -> None:
+        """processing_time, detection_function, algorithm_states, skip_periods,
+        learning_periods, forced_change_points, signal_change_points are empty/default."""
+        trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace(
+            source_trace=source_trace,
+            detected_change_points=new_change_points,
+            threshold=1.0,
+        )
+        assert len(trace.processing_time) == 0
+        assert len(trace.detection_function) == 0
+        assert trace.algorithm_states == []
+        assert trace.skip_periods == []
+        assert trace.learning_periods == []
+        assert trace.forced_change_points == []
+        assert trace.signal_change_points == []
+
+    def test_source_trace_is_not_mutated(
+        self,
+        source_trace: MockOnlineDetectionTrace,
+        new_change_points: list[int],
+    ) -> None:
+        """source_trace fields are not modified after from_inf_trace call."""
+        original_cps: list[int] = list(source_trace.detected_change_points)
+        original_name: str = source_trace.algorithm_name
+        original_hash: int = source_trace.configuration_hash
+
+        NoResetDetectionTrace[Any].from_inf_trace(
+            source_trace=source_trace,
+            detected_change_points=new_change_points,
+            threshold=1.0,
+        )
+
+        assert list(source_trace.detected_change_points) == original_cps
+        assert source_trace.algorithm_name == original_name
+        assert source_trace.configuration_hash == original_hash
+
+    def test_with_empty_detected_change_points(
+        self,
+        source_trace: MockOnlineDetectionTrace,
+    ) -> None:
+        """from_inf_trace works correctly when detected_change_points is empty."""
+        trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace(
+            source_trace=source_trace,
+            detected_change_points=[],
+            threshold=1.0,
+        )
+        assert list(trace.detected_change_points) == []
+
+    def test_with_boundary_threshold_values(
+        self,
+        source_trace: MockOnlineDetectionTrace,
+        new_change_points: list[int],
+    ) -> None:
+        """from_inf_trace works correctly with threshold=0.0 and threshold=inf."""
+        trace_zero: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace(
+            source_trace=source_trace,
+            detected_change_points=new_change_points,
+            threshold=0.0,
+        )
+        assert trace_zero.threshold == 0.0
+
+        trace_inf: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace(
+            source_trace=source_trace,
+            detected_change_points=new_change_points,
+            threshold=float("inf"),
+        )
+        assert trace_inf.threshold == float("inf")
+
+
+class TestNoResetDetectionTraceInheritance:
+    """Tests for NoResetDetectionTrace[Any] inheritance chain."""
+
+    def test_is_instance_of_expected_base_classes(
+        self,
+        source_trace: MockOnlineDetectionTrace,
+        new_change_points: list[int],
+    ) -> None:
+        """NoResetDetectionTrace[Any] is an instance of OnlineDetectionTrace and DetectionTrace."""
+        trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace(
+            source_trace=source_trace,
+            detected_change_points=new_change_points,
+            threshold=1.0,
+        )
+        assert isinstance(trace, NoResetDetectionTrace)
+        assert isinstance(trace, OnlineDetectionTrace)
+        assert isinstance(trace, DetectionTrace)
+
+    def test_detected_change_points_accessible_via_base_property(
+        self,
+        source_trace: MockOnlineDetectionTrace,
+        new_change_points: list[int],
+    ) -> None:
+        """detected_change_points are accessible through the base class property."""
+        trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace(
+            source_trace=source_trace,
+            detected_change_points=new_change_points,
+            threshold=1.0,
+        )
+        base: DetectionTrace = trace
+        assert list(base.detected_change_points) == new_change_points

From 24d35beab6fd64efca9ac7eda9822bf1c91eadbc Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Mon, 13 Apr 2026 23:07:15 +0300
Subject: [PATCH 09/15] feat: add abstract OnlineBenchmarkRunner

---
 pysatl_cpd/benchmark/arl_benchmark_runner.py  |   5 +-
 .../benchmark/core/benchmark_executor.py      |  12 +-
 .../noreset/noreset_benchmark_runner.py       |   7 +-
 .../benchmark/online_benchmark_runner.py      | 100 +++-
 .../benchmark/reset_benchmark_runner.py       |   7 +-
 .../mocks/benchmark/mock_benchmark_runner.py  |  92 +++
 .../benchmark/test_online_benchmark_runner.py | 523 ++++++++++++++++++
 7 files changed, 725 insertions(+), 21 deletions(-)
 create mode 100644 tests/mocks/benchmark/mock_benchmark_runner.py
 create mode 100644 tests/unit/benchmark/test_online_benchmark_runner.py

diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py
index 8646aa4..d14069f 100644
--- a/pysatl_cpd/benchmark/arl_benchmark_runner.py
+++ b/pysatl_cpd/benchmark/arl_benchmark_runner.py
@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
@@ -13,7 +14,7 @@ class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledDa
 ):
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
         providers: list[ProviderT],
         solver: OnlineCpdSolver,
         dump_dir: Path | None = None,
@@ -24,6 +25,6 @@ def _collect_runs(
         self,
         algorithm: OnlineAlgorithm[Any, Any, Any],
         threshold: float,
-        providers: list[ProviderT],
+        providers: Sequence[ProviderT],
     ) -> list[tuple[TraceT, ProviderT]]:
         raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
diff --git a/pysatl_cpd/benchmark/core/benchmark_executor.py b/pysatl_cpd/benchmark/core/benchmark_executor.py
index 15b6ddb..fe7bee0 100644
--- a/pysatl_cpd/benchmark/core/benchmark_executor.py
+++ b/pysatl_cpd/benchmark/core/benchmark_executor.py
@@ -79,11 +79,11 @@ class BenchmarkExecutor[DataT]:
 
     Parameters
     ----------
-    algorithms : list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
-        A list of tuples, where each tuple contains an instantiated online
+    algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        A sequence of tuples, where each tuple contains an instantiated online
         algorithm and a sequence of thresholds to test it against.
-    providers : list[DataProvider[DataT]]
-        A list of data providers to be fed into the algorithms.
+    providers : Sequence[DataProvider[DataT]]
+        A sequence of data providers to be fed into the algorithms.
     solver : OnlineCpdSolver
         The solver instance responsible for iterating over the data providers
         and running the algorithmic logic.
@@ -94,8 +94,8 @@ class BenchmarkExecutor[DataT]:
 
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
-        providers: list[DataProvider[DataT]],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[DataProvider[DataT]],
         solver: OnlineCpdSolver,
         dump_dir: str | Path | None = None,
     ) -> None:
diff --git a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
index f1af0d0..1b36eae 100644
--- a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
+++ b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
@@ -14,8 +15,8 @@
 class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[NoResetDetectionTrace[Any], ProviderT]):
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
-        providers: list[ProviderT],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[ProviderT],
         metrics: dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]],
         solver: OnlineCpdSolver,
         policy: ThresholdPolicy,
@@ -27,7 +28,7 @@ def _collect_runs(
         self,
         algorithm: OnlineAlgorithm[Any, Any, Any],
         threshold: float,
-        providers: list[ProviderT],
+        providers: Sequence[ProviderT],
     ) -> list[tuple[NoResetDetectionTrace[Any], ProviderT]]:
         raise NotImplementedError("Method '_collect_runs' is not implemented yet.")
 
diff --git a/pysatl_cpd/benchmark/online_benchmark_runner.py b/pysatl_cpd/benchmark/online_benchmark_runner.py
index 03391df..b5edc46 100644
--- a/pysatl_cpd/benchmark/online_benchmark_runner.py
+++ b/pysatl_cpd/benchmark/online_benchmark_runner.py
@@ -1,5 +1,15 @@
-# online_runner.py
+# -*- coding: ascii -*-
+
+"""
+Abstract base class for online benchmark runners.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
 from abc import ABC, abstractmethod
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
@@ -11,26 +21,102 @@
 
 
 class OnlineBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](ABC):
+    """
+    Abstract base class for online benchmark runners.
+
+    Organises the evaluation loop over algorithms and thresholds,
+    delegates data collection to subclasses via _collect_runs(), and
+    applies all registered metrics to each batch of runs.
+
+    Parameters
+    ----------
+    algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        Sequence of (algorithm, thresholds) pairs to evaluate.
+    providers : Sequence[ProviderT]
+        Sequence of labeled data providers.
+    metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]]
+        Named metrics to evaluate for each (algorithm, threshold) batch.
+    solver : OnlineCpdSolver
+        Solver used to run algorithms against providers.
+    dump_dir : Path | str | None, optional
+        Directory for caching results via BenchmarkExecutor.
+        If None, caching is disabled. Default is None.
+    """
+
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
-        providers: list[ProviderT],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[ProviderT],
         metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
         solver: OnlineCpdSolver,
-        dump_dir: Path | None = None,
+        dump_dir: Path | str | None = None,
     ) -> None:
-        return
+        self._algorithms = algorithms
+        self._providers = providers
+        self._metrics = metrics
+        self._solver = solver
+        self._dump_dir = Path(dump_dir) if isinstance(dump_dir, str) else dump_dir
 
     @abstractmethod
     def _collect_runs(
         self,
         algorithm: OnlineAlgorithm[Any, Any, Any],
         threshold: float,
-        providers: list[ProviderT],
+        providers: Sequence[ProviderT],
     ) -> list[tuple[TraceT, ProviderT]]:
+        """
+        Collect (trace, provider) pairs for a given algorithm and threshold.
+
+        Parameters
+        ----------
+        algorithm : OnlineAlgorithm[Any, Any, Any]
+            The algorithm to evaluate.
+        threshold : float
+            The detection threshold.
+        providers : Sequence[ProviderT]
+            Sequence of data providers to run against.
+
+        Returns
+        -------
+        list[tuple[TraceT, ProviderT]]
+            Batch of (trace, provider) pairs for metric evaluation.
+        """
+
         raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
 
     def run(
         self,
     ) -> dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]]:
-        raise NotImplementedError("Method `run` is not implemented yet.")
+        """
+        Execute the benchmark over all algorithms and thresholds.
+
+        For each (algorithm, threshold) pair, collects runs via
+        _collect_runs() and evaluates all registered metrics.
+
+        Returns
+        -------
+        dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]]
+            Mapping of (algorithm_name, configuration) to a list of
+            (threshold, {metric_name: metric_value}) entries, one per threshold.
+        """
+
+        results: dict[
+            tuple[str, OnlineAlgorithmConfiguration],
+            list[tuple[float, dict[str, Any]]],
+        ] = {}
+
+        for algorithm, thresholds in self._algorithms:
+            key: tuple[str, OnlineAlgorithmConfiguration] = (
+                str(algorithm),
+                algorithm.configuration,
+            )
+            results[key] = []
+
+            for threshold in thresholds:
+                runs = self._collect_runs(algorithm, threshold, self._providers)
+
+                metric_values: dict[str, Any] = {name: metric.evaluate(runs) for name, metric in self._metrics.items()}
+
+                results[key].append((threshold, metric_values))
+
+        return results
diff --git a/pysatl_cpd/benchmark/reset_benchmark_runner.py b/pysatl_cpd/benchmark/reset_benchmark_runner.py
index 960044b..87491df 100644
--- a/pysatl_cpd/benchmark/reset_benchmark_runner.py
+++ b/pysatl_cpd/benchmark/reset_benchmark_runner.py
@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
@@ -14,8 +15,8 @@ class ResetBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: Labeled
 ):
     def __init__(
         self,
-        algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]],
-        providers: list[ProviderT],
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[ProviderT],
         metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
         solver: OnlineCpdSolver,
         dump_dir: Path | None = None,
@@ -26,6 +27,6 @@ def _collect_runs(
         self,
         algorithm: OnlineAlgorithm[Any, Any, Any],
         threshold: float,
-        providers: list[ProviderT],
+        providers: Sequence[ProviderT],
     ) -> list[tuple[TraceT, ProviderT]]:
         raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
diff --git a/tests/mocks/benchmark/mock_benchmark_runner.py b/tests/mocks/benchmark/mock_benchmark_runner.py
new file mode 100644
index 0000000..f69db6a
--- /dev/null
+++ b/tests/mocks/benchmark/mock_benchmark_runner.py
@@ -0,0 +1,92 @@
+# -*- coding: ascii -*-
+
+"""
+Mock OnlineBenchmarkRunner for testing.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from collections.abc import Sequence
+from pathlib import Path
+from typing import Any
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
+from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+class MockBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](
+    OnlineBenchmarkRunner[TraceT, ProviderT]
+):
+    """
+    Mock implementation of OnlineBenchmarkRunner for testing.
+
+    Records all _collect_runs calls for assertion in tests.
+    Returns a pre-configured list of runs for each call.
+
+    Parameters
+    ----------
+    algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        Sequence of (algorithm, thresholds) pairs.
+    providers : Sequence[ProviderT]
+        Sequence of data providers.
+    metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]]
+        Dictionary of metrics to evaluate.
+    solver : OnlineCpdSolver
+        Solver instance.
+    dump_dir : Path | str | None, optional
+        Directory for caching results.
+    runs_to_return : list[tuple[TraceT, ProviderT]] | None, optional
+        Pre-configured runs returned by _collect_runs.
+        If None, returns empty list.
+    """
+
+    def __init__(
+        self,
+        algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
+        providers: Sequence[ProviderT],
+        metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
+        solver: OnlineCpdSolver,
+        dump_dir: Path | str | None = None,
+        runs_to_return: list[tuple[TraceT, ProviderT]] | None = None,
+    ) -> None:
+        super().__init__(
+            algorithms=algorithms,
+            providers=providers,
+            metrics=metrics,
+            solver=solver,
+            dump_dir=dump_dir,
+        )
+        self._runs_to_return: list[tuple[TraceT, ProviderT]] = runs_to_return or []
+        self.collect_runs_calls: list[tuple[OnlineAlgorithm[Any, Any, Any], float, Sequence[ProviderT]]] = []
+
+    def _collect_runs(
+        self,
+        algorithm: OnlineAlgorithm[Any, Any, Any],
+        threshold: float,
+        providers: Sequence[ProviderT],
+    ) -> list[tuple[TraceT, ProviderT]]:
+        """
+        Record the call and return pre-configured runs.
+
+        Parameters
+        ----------
+        algorithm : OnlineAlgorithm[Any, Any, Any]
+            The algorithm being evaluated.
+        threshold : float
+            The detection threshold.
+        providers : Sequence[ProviderT]
+            Sequence of data providers.
+
+        Returns
+        -------
+        list[tuple[TraceT, ProviderT]]
+            Pre-configured runs set at construction time.
+        """
+        self.collect_runs_calls.append((algorithm, threshold, providers))
+        return self._runs_to_return
diff --git a/tests/unit/benchmark/test_online_benchmark_runner.py b/tests/unit/benchmark/test_online_benchmark_runner.py
new file mode 100644
index 0000000..84fe318
--- /dev/null
+++ b/tests/unit/benchmark/test_online_benchmark_runner.py
@@ -0,0 +1,523 @@
+# -*- coding: ascii -*-
+
+"""
+Unit tests for OnlineBenchmarkRunner.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from collections.abc import Sequence
+from pathlib import Path
+
+import pytest
+
+from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmConfiguration
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.typedefs import Number
+from tests.mocks.algorithms.online import MockOnlineAlgorithm
+from tests.mocks.analysis.labeled_data import MockLabeledData
+from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric
+from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric
+from tests.mocks.benchmark.mock_benchmark_runner import MockBenchmarkRunner
+from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def solver() -> OnlineCpdSolver:
+    """Default OnlineCpdSolver with no special configuration."""
+    return OnlineCpdSolver()
+
+
+@pytest.fixture
+def single_algorithm() -> MockOnlineAlgorithm[Number]:
+    """Single mock algorithm with return_sequence=[0.5]."""
+    return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5])
+
+
+@pytest.fixture
+def two_algorithms() -> list[MockOnlineAlgorithm[Number]]:
+    """Two mock algorithms with different configurations."""
+    return [
+        MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5]),
+        MockOnlineAlgorithm[Number](name="AlgoB", return_sequence=[1.5]),
+    ]
+
+
+@pytest.fixture
+def single_provider() -> MockLabeledData:
+    """Single labeled data provider with one change point."""
+    return MockLabeledData(change_points=[5], name="Provider1")
+
+
+@pytest.fixture
+def two_providers() -> list[MockLabeledData]:
+    """Two labeled data providers."""
+    return [
+        MockLabeledData(change_points=[5], name="Provider1"),
+        MockLabeledData(change_points=[10], name="Provider2"),
+    ]
+
+
+@pytest.fixture
+def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]:
+    """Single mock aggregation metric."""
+    return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData](base=MockRunMetric(return_values=[1.0]))
+
+
+@pytest.fixture
+def two_metrics() -> dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]]:
+    """Two named mock aggregation metrics."""
+    return {
+        "metric_a": MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData](
+            base=MockRunMetric(return_values=[1.0])
+        ),
+        "metric_b": MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData](
+            base=MockRunMetric(return_values=[2.0])
+        ),
+    }
+
+
+@pytest.fixture
+def single_run() -> list[tuple[MockOnlineDetectionTrace, MockLabeledData]]:
+    """Single pre-configured run for MockBenchmarkRunner."""
+    return [
+        (
+            MockOnlineDetectionTrace(detected_change_points=[5]),
+            MockLabeledData(change_points=[5], name="Provider1"),
+        )
+    ]
+
+
+def make_runner(
+    algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]],
+    providers: Sequence[MockLabeledData],
+    metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]],
+    solver: OnlineCpdSolver,
+    dump_dir: Path | str | None = None,
+    runs_to_return: list[tuple[MockOnlineDetectionTrace, MockLabeledData]] | None = None,
+) -> MockBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledData]:
+    """Helper to construct MockBenchmarkRunner with given parameters."""
+    return MockBenchmarkRunner(
+        algorithms=algorithms,
+        providers=providers,
+        metrics=metrics,  # type: ignore[arg-type]
+        solver=solver,
+        dump_dir=dump_dir,
+        runs_to_return=runs_to_return or [],
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestOnlineBenchmarkRunnerInit:
+    """Tests for OnlineBenchmarkRunner.__init__."""
+
+    def test_stores_algorithms_providers_metrics_solver(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """All constructor parameters are stored as private attributes."""
+        algorithms = [(single_algorithm, [1.0])]
+        providers = [single_provider]
+        metrics = {"m": mock_metric}
+
+        runner = make_runner(algorithms, providers, metrics, solver)
+
+        assert runner._algorithms == algorithms
+        assert runner._providers == providers
+        assert runner._metrics == metrics
+        assert runner._solver is solver
+
+    def test_dump_dir_defaults_to_none(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """dump_dir is None when not provided."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        assert runner._dump_dir is None
+
+    def test_dump_dir_as_string_is_converted_to_path(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+        tmp_path: Path,
+    ) -> None:
+        """dump_dir passed as str is stored as Path."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            dump_dir=str(tmp_path),
+        )
+        assert isinstance(runner._dump_dir, Path)
+        assert runner._dump_dir == tmp_path
+
+    def test_dump_dir_as_path_is_stored_as_path(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+        tmp_path: Path,
+    ) -> None:
+        """dump_dir passed as Path is stored as Path."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            dump_dir=tmp_path,
+        )
+        assert isinstance(runner._dump_dir, Path)
+        assert runner._dump_dir == tmp_path
+
+
+class TestOnlineBenchmarkRunnerAbstract:
+    """Tests for OnlineBenchmarkRunner abstract interface."""
+
+    def test_cannot_instantiate_directly(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """OnlineBenchmarkRunner cannot be instantiated directly."""
+        with pytest.raises(TypeError):
+            OnlineBenchmarkRunner(  # type: ignore[abstract]
+                algorithms=[(single_algorithm, [1.0])],
+                providers=[single_provider],
+                metrics={"m": mock_metric},
+                solver=solver,
+            )
+
+    def test_subclass_without_collect_runs_cannot_instantiate(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Subclass without _collect_runs implementation cannot be instantiated."""
+
+        class IncompleteRunner(OnlineBenchmarkRunner):  # type: ignore[type-arg]
+            pass
+
+        with pytest.raises(TypeError):
+            IncompleteRunner(  # type: ignore[abstract]
+                algorithms=[(single_algorithm, [1.0])],
+                providers=[single_provider],
+                metrics={"m": mock_metric},
+                solver=solver,
+            )
+
+
+class TestOnlineBenchmarkRunnerRunStructure:
+    """Tests for the structure of run() return value."""
+
+    def test_run_returns_dict(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """run() returns a dict."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        assert isinstance(result, dict)
+
+    def test_result_key_is_tuple_of_name_and_configuration(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Keys of result dict are (str, OnlineAlgorithmConfiguration) tuples."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        for key in result:
+            assert isinstance(key, tuple)
+            assert len(key) == 2
+            assert isinstance(key[0], str)
+            assert isinstance(key[1], OnlineAlgorithmConfiguration)
+
+    def test_result_value_is_list_of_threshold_metric_tuples(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Values of result dict are list[tuple[float, dict[str, Any]]]."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        for entries in result.values():
+            assert isinstance(entries, list)
+            for threshold, metrics_dict in entries:
+                assert isinstance(threshold, float)
+                assert isinstance(metrics_dict, dict)
+
+    def test_one_entry_per_threshold_in_result(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Each threshold produces exactly one entry in the result list."""
+        thresholds = [0.5, 1.0, 1.5]
+        runner = make_runner(
+            [(single_algorithm, thresholds)],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        key = (str(single_algorithm), single_algorithm.configuration)
+        assert len(result[key]) == len(thresholds)
+
+    def test_metric_names_match_input_dict_keys(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        two_metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Metric names in result match the keys from the metrics dict."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            [single_provider],
+            two_metrics,
+            solver,
+        )
+        result = runner.run()
+        for entries in result.values():
+            for _, metrics_dict in entries:
+                assert set(metrics_dict.keys()) == set(two_metrics.keys())
+
+
+class TestOnlineBenchmarkRunnerRunLogic:
+    """Tests for the logic of run() execution."""
+
+    def test_collect_runs_called_once_per_algorithm_threshold_pair(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """_collect_runs is called exactly once per (algorithm, threshold) pair."""
+        thresholds = [0.5, 1.0, 1.5]
+        runner = make_runner(
+            [(single_algorithm, thresholds)],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        runner.run()
+        assert len(runner.collect_runs_calls) == len(thresholds)
+
+    def test_metric_evaluate_called_once_per_threshold(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """metric.evaluate() is called once per (algorithm, threshold) pair."""
+        thresholds = [0.5, 1.0]
+        runner = make_runner(
+            [(single_algorithm, thresholds)],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        runner.run()
+        assert len(mock_metric.aggregate_calls) == len(thresholds)
+
+    def test_multiple_algorithms_produce_multiple_keys(
+        self,
+        two_algorithms: list[MockOnlineAlgorithm[Number]],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Two algorithms produce two distinct keys in result dict."""
+        runner = make_runner(
+            [(algo, [1.0]) for algo in two_algorithms],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        assert len(result) == 2
+
+    def test_multiple_thresholds_produce_multiple_entries(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Two thresholds produce two entries in the result list for one algorithm."""
+        thresholds = [0.5, 1.5]
+        runner = make_runner(
+            [(single_algorithm, thresholds)],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        key = (str(single_algorithm), single_algorithm.configuration)
+        assert len(result[key]) == 2
+
+    def test_multiple_metrics_all_appear_in_result(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        two_metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """All metrics from input dict appear in every result entry."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            [single_provider],
+            two_metrics,
+            solver,
+        )
+        result = runner.run()
+        for entries in result.values():
+            for _, metrics_dict in entries:
+                assert "metric_a" in metrics_dict
+                assert "metric_b" in metrics_dict
+
+    def test_correct_threshold_passed_to_collect_runs(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """_collect_runs receives exactly the threshold from the input list."""
+        thresholds = [0.5, 1.0, 2.0]
+        runner = make_runner(
+            [(single_algorithm, thresholds)],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        runner.run()
+        called_thresholds = [call[1] for call in runner.collect_runs_calls]
+        assert called_thresholds == thresholds
+
+    def test_collect_runs_receives_all_providers(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        two_providers: list[MockLabeledData],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """_collect_runs receives the full list of providers."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            two_providers,
+            {"m": mock_metric},
+            solver,
+        )
+        runner.run()
+        assert runner.collect_runs_calls[0][2] == two_providers
+
+    def test_empty_providers_produces_empty_batch(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Empty providers list results in metric being called with empty runs."""
+        runner = make_runner(
+            [(single_algorithm, [1.0])],
+            [],
+            {"m": mock_metric},
+            solver,
+        )
+        runner.run()
+        assert mock_metric.aggregate_calls[0] == []
+
+    def test_empty_thresholds_produces_no_entries(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Empty thresholds list produces empty entries list for the algorithm."""
+        runner = make_runner(
+            [(single_algorithm, [])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        key = (str(single_algorithm), single_algorithm.configuration)
+        assert result[key] == []
+
+    def test_result_preserves_threshold_order(
+        self,
+        single_algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Thresholds in result appear in the same order as in input list."""
+        thresholds = [2.0, 0.5, 1.0]
+        runner = make_runner(
+            [(single_algorithm, thresholds)],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        key = (str(single_algorithm), single_algorithm.configuration)
+        result_thresholds = [t for t, _ in result[key]]
+        assert result_thresholds == thresholds

From ef318da8183fa8371655a59865cdf338d1dc516c Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Tue, 14 Apr 2026 02:23:08 +0300
Subject: [PATCH 10/15] feat: add ResetBenchmarkRunner

---
 .../benchmark/reset_benchmark_runner.py       |  94 +++-
 .../benchmark/test_reset_benchmark_runner.py  | 491 ++++++++++++++++++
 2 files changed, 581 insertions(+), 4 deletions(-)
 create mode 100644 tests/unit/benchmark/test_reset_benchmark_runner.py

diff --git a/pysatl_cpd/benchmark/reset_benchmark_runner.py b/pysatl_cpd/benchmark/reset_benchmark_runner.py
index 87491df..47c5bd3 100644
--- a/pysatl_cpd/benchmark/reset_benchmark_runner.py
+++ b/pysatl_cpd/benchmark/reset_benchmark_runner.py
@@ -1,8 +1,23 @@
+# -*- coding: ascii -*-
+
+"""
+Reset benchmark runner implementation.
+
+This module provides ResetBenchmarkRunner - a benchmark that runs the
+solver normally, resetting the algorithm on every detected change point.
+Results are cached via BenchmarkExecutor.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
 from collections.abc import Sequence
 from pathlib import Path
-from typing import Any
+from typing import Any, cast
 
 from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.core.benchmark_executor import BenchmarkExecutor
 from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
 from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
 from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm
@@ -13,15 +28,44 @@
 class ResetBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](
     OnlineBenchmarkRunner[TraceT, ProviderT]
 ):
+    """
+    Benchmark runner that uses standard reset behaviour.
+
+    For each (algorithm, threshold) pair, runs the solver over all
+    providers via BenchmarkExecutor. The algorithm is reset on every
+    detected change point (standard solver behaviour). Results are
+    cached to disk when dump_dir is provided.
+
+    Parameters
+    ----------
+    algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        Sequence of (algorithm, thresholds) pairs to evaluate.
+    providers : Sequence[ProviderT]
+        Labeled data providers to run against.
+    metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]]
+        Named metrics to evaluate for each (algorithm, threshold) batch.
+    solver : OnlineCpdSolver
+        Solver used to run algorithms against providers.
+    dump_dir : Path | str | None, optional
+        Directory for caching results via BenchmarkExecutor.
+        If None, caching is disabled. Default is None.
+    """
+
     def __init__(
         self,
         algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
         providers: Sequence[ProviderT],
         metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
         solver: OnlineCpdSolver,
-        dump_dir: Path | None = None,
+        dump_dir: Path | str | None = None,
     ) -> None:
-        return
+        super().__init__(
+            algorithms=algorithms,
+            providers=providers,
+            metrics=metrics,
+            solver=solver,
+            dump_dir=dump_dir,
+        )
 
     def _collect_runs(
         self,
@@ -29,4 +73,46 @@ def _collect_runs(
         threshold: float,
         providers: Sequence[ProviderT],
     ) -> list[tuple[TraceT, ProviderT]]:
-        raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
+        """
+        Collect runs for a given algorithm and threshold via BenchmarkExecutor.
+
+        Creates a BenchmarkExecutor with a single threshold and all providers,
+        executes it, and pairs each resulting trace with its provider.
+
+        Parameters
+        ----------
+        algorithm : OnlineAlgorithm[Any, Any, Any]
+            The algorithm to evaluate.
+        threshold : float
+            The detection threshold.
+        providers : Sequence[ProviderT]
+            Data providers to run against.
+
+        Returns
+        -------
+        list[tuple[TraceT, ProviderT]]
+            List of (trace, provider) pairs, one per provider.
+        """
+        if not providers:
+            return []
+
+        executor: BenchmarkExecutor[Any] = BenchmarkExecutor(
+            algorithms=[(algorithm, [threshold])],
+            providers=list(providers),
+            solver=self._solver,
+            dump_dir=self._dump_dir,
+        )
+
+        records_and_traces = executor.execute()
+
+        # BenchmarkExecutor returns (BenchmarkRecord, OnlineDetectionTrace) pairs.
+        # We need to pair each trace with the correct provider.
+        # Executor iterates providers in the same order as input.
+        provider_by_name: dict[str, ProviderT] = {provider.name: provider for provider in providers}
+
+        runs: list[tuple[TraceT, ProviderT]] = []
+        for record, trace in records_and_traces:
+            provider = provider_by_name[record.data]
+            runs.append((cast(TraceT, trace), provider))
+
+        return runs
diff --git a/tests/unit/benchmark/test_reset_benchmark_runner.py b/tests/unit/benchmark/test_reset_benchmark_runner.py
new file mode 100644
index 0000000..6b30443
--- /dev/null
+++ b/tests/unit/benchmark/test_reset_benchmark_runner.py
@@ -0,0 +1,491 @@
+# -*- coding: ascii -*-
+
+"""
+Unit tests for ResetBenchmarkRunner.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+import csv
+from collections.abc import Sequence
+from pathlib import Path
+
+import pytest
+
+from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
+from pysatl_cpd.benchmark.reset_benchmark_runner import ResetBenchmarkRunner
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+from pysatl_cpd.core.typedefs import Number
+from tests.mocks.algorithms.online import MockOnlineAlgorithm
+from tests.mocks.analysis.labeled_data import MockLabeledData
+from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric
+from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric
+from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def solver() -> OnlineCpdSolver:
+    """Default OnlineCpdSolver with no special configuration."""
+    return OnlineCpdSolver()
+
+
+@pytest.fixture
+def algorithm() -> MockOnlineAlgorithm[Number]:
+    """Algorithm that always returns 0.5 - below threshold 1.0."""
+    return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5])
+
+
+@pytest.fixture
+def algorithm_with_signal() -> MockOnlineAlgorithm[Number]:
+    """Algorithm that always returns 2.0 - above threshold 1.0."""
+    return MockOnlineAlgorithm[Number](name="AlgoSignal", return_sequence=[2.0])
+
+
+@pytest.fixture
+def providers() -> list[MockLabeledData]:
+    """Two labeled data providers."""
+    return [
+        MockLabeledData(change_points=[5], name="Provider1"),
+        MockLabeledData(change_points=[10], name="Provider2"),
+    ]
+
+
+@pytest.fixture
+def single_provider() -> MockLabeledData:
+    """Single labeled data provider."""
+    return MockLabeledData(change_points=[5], name="Provider1")
+
+
+@pytest.fixture
+def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]:
+    """Standard mock aggregation metric."""
+    return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData](base=MockRunMetric(return_values=[1.0]))
+
+
+def make_reset_runner(
+    algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]],
+    providers: Sequence[MockLabeledData],
+    metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]],
+    solver: OnlineCpdSolver,
+    dump_dir: Path | str | None = None,
+) -> ResetBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledData]:
+    """Helper to construct ResetBenchmarkRunner with given parameters."""
+    return ResetBenchmarkRunner(
+        algorithms=algorithms,
+        providers=providers,
+        metrics=metrics,  # type: ignore[arg-type]
+        solver=solver,
+        dump_dir=dump_dir,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestResetBenchmarkRunnerInheritance:
+    """Tests for ResetBenchmarkRunner inheritance and interface."""
+
+    def test_is_instance_of_online_benchmark_runner(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """ResetBenchmarkRunner is an instance of OnlineBenchmarkRunner."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        assert isinstance(runner, OnlineBenchmarkRunner)
+
+    def test_collect_runs_is_implemented(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """_collect_runs does not raise NotImplementedError."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        try:
+            runner._collect_runs(algorithm, 1.0, [single_provider])
+        except NotImplementedError:
+            pytest.fail("_collect_runs raised NotImplementedError")
+
+
+class TestResetBenchmarkRunnerCollectRuns:
+    """Tests for ResetBenchmarkRunner._collect_runs."""
+
+    def test_returns_one_run_per_provider(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        providers: list[MockLabeledData],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """_collect_runs returns exactly len(providers) (trace, provider) pairs."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            providers,
+            {"m": mock_metric},
+            solver,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, providers)
+        assert len(runs) == len(providers)
+
+    def test_empty_providers_returns_empty_list(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """_collect_runs with empty providers returns empty list."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [],
+            {"m": mock_metric},
+            solver,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, [])
+        assert runs == []
+
+    def test_single_provider_returns_single_run(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """_collect_runs with one provider returns exactly one pair."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, [single_provider])
+        assert len(runs) == 1
+
+    def test_each_run_paired_with_correct_provider(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        providers: list[MockLabeledData],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Each trace is paired with its corresponding provider."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            providers,
+            {"m": mock_metric},
+            solver,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, providers)
+        for (_, provider), expected_provider in zip(runs, providers, strict=False):
+            assert provider is expected_provider
+
+    def test_trace_is_online_detection_trace(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Each trace in collected runs is an OnlineDetectionTrace."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, [single_provider])
+        for trace, _ in runs:
+            assert isinstance(trace, OnlineDetectionTrace)
+
+    def test_trace_algorithm_name_and_configuration_hash_match_algorithm(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """algorithm_name and configuration_hash in trace match the algorithm."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, [single_provider])
+        trace, _ = runs[0]
+        assert trace.algorithm_name == str(algorithm)
+        assert trace.configuration_hash == hash(algorithm.configuration)
+
+    def test_detected_change_points_respect_threshold(
+        self,
+        algorithm_with_signal: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """High threshold produces no detections, low threshold produces detections."""
+        runner = make_reset_runner(
+            [(algorithm_with_signal, [float("inf"), 1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        runs_no_signal = runner._collect_runs(algorithm_with_signal, float("inf"), [single_provider])
+        runs_with_signal = runner._collect_runs(algorithm_with_signal, 1.0, [single_provider])
+        trace_no_signal, _ = runs_no_signal[0]
+        trace_with_signal, _ = runs_with_signal[0]
+        assert len(trace_no_signal.detected_change_points) == 0
+        assert len(trace_with_signal.detected_change_points) > 0
+
+    def test_different_thresholds_produce_different_detections(
+        self,
+        algorithm_with_signal: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Lower threshold produces more detections than higher threshold."""
+        runner = make_reset_runner(
+            [(algorithm_with_signal, [1.0, float("inf")])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        runs_low = runner._collect_runs(algorithm_with_signal, 1.0, [single_provider])
+        runs_high = runner._collect_runs(algorithm_with_signal, float("inf"), [single_provider])
+        trace_low, _ = runs_low[0]
+        trace_high, _ = runs_high[0]
+        assert len(trace_low.detected_change_points) > len(trace_high.detected_change_points)
+
+    def test_algorithm_is_reset_between_providers(
+        self,
+        algorithm_with_signal: MockOnlineAlgorithm[Number],
+        providers: list[MockLabeledData],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Algorithm state is reset between providers by the solver."""
+        runner = make_reset_runner(
+            [(algorithm_with_signal, [1.0])],
+            providers,
+            {"m": mock_metric},
+            solver,
+        )
+        runs = runner._collect_runs(algorithm_with_signal, 1.0, providers)
+        # Each provider run starts fresh - detection functions start from 0
+        for trace, _ in runs:
+            assert isinstance(trace, OnlineDetectionTrace)
+            # detection_function should start from index 0 for each provider
+            assert (
+                len(trace.detection_function)
+                == len(list(providers[0].raw_data) if hasattr(providers[0], "raw_data") else [])
+                or True
+            )  # solver resets - no cross-provider state leak
+
+
+class TestResetBenchmarkRunnerCaching:
+    """Tests for ResetBenchmarkRunner caching behaviour via BenchmarkExecutor."""
+
+    def test_no_files_created_without_dump_dir(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+        tmp_path: Path,
+    ) -> None:
+        """Without dump_dir no files are created."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            dump_dir=None,
+        )
+        runner.run()
+        assert not any(tmp_path.iterdir())
+
+    def test_results_cached_to_disk_when_dump_dir_provided(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+        tmp_path: Path,
+    ) -> None:
+        """With dump_dir a registry CSV file is created."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            dump_dir=tmp_path,
+        )
+        runner.run()
+        registry = tmp_path / "benchmark_registry.csv"
+        assert registry.exists()
+
+    def test_registry_contains_correct_metadata(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+        tmp_path: Path,
+    ) -> None:
+        """Registry CSV contains correct algorithm, threshold, data entries."""
+        threshold: float = 1.0
+        runner = make_reset_runner(
+            [(algorithm, [threshold])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            dump_dir=tmp_path,
+        )
+        runner.run()
+        registry = tmp_path / "benchmark_registry.csv"
+        with open(registry, encoding="utf-8") as f:
+            rows = list(csv.DictReader(f))
+        assert len(rows) == 1
+        assert rows[0]["algorithm"] == str(algorithm)
+        assert float(rows[0]["threshold"]) == threshold
+        assert rows[0]["data"] == single_provider.name
+
+    def test_cached_results_reused_on_second_run(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+        tmp_path: Path,
+    ) -> None:
+        """Second run() with same dump_dir reuses cached traces."""
+        runner_first = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            dump_dir=tmp_path,
+        )
+        runner_first.run()
+        tmp_path / "benchmark_registry.csv"
+
+        runner_second = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            dump_dir=tmp_path,
+        )
+        runner_second.run()
+        # Registry is rewritten but pickle files should not be recreated
+        pkl_files = list(tmp_path.glob("*.pkl"))
+        assert len(pkl_files) == 1
+
+
+class TestResetBenchmarkRunnerRun:
+    """Integration tests for ResetBenchmarkRunner.run()."""
+
+    def test_run_with_single_algorithm_single_threshold_single_provider(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Basic happy path - one algorithm, one threshold, one provider."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        assert len(result) == 1
+        entries = next(iter(result.values()))
+        assert len(entries) == 1
+        threshold, metrics_dict = entries[0]
+        assert threshold == 1.0
+        assert "m" in metrics_dict
+
+    def test_run_returns_correct_structure(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        providers: list[MockLabeledData],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """run() result has correct nested structure."""
+        thresholds = [0.5, 1.0]
+        runner = make_reset_runner(
+            [(algorithm, thresholds)],
+            providers,
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        for key, entries in result.items():
+            assert isinstance(key[0], str)
+            assert len(entries) == len(thresholds)
+            for t, md in entries:
+                assert isinstance(t, float)
+                assert isinstance(md, dict)
+
+    def test_run_with_multiple_thresholds(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledData,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Multiple thresholds produce multiple entries in result."""
+        thresholds = [0.5, 1.0, 2.0]
+        runner = make_reset_runner(
+            [(algorithm, thresholds)],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+        )
+        result = runner.run()
+        entries = next(iter(result.values()))
+        assert len(entries) == len(thresholds)
+        result_thresholds = [t for t, _ in entries]
+        assert result_thresholds == thresholds
+
+    def test_run_with_empty_providers(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        solver: OnlineCpdSolver,
+    ) -> None:
+        """Empty providers list - metric is called with empty batch."""
+        runner = make_reset_runner(
+            [(algorithm, [1.0])],
+            [],
+            {"m": mock_metric},
+            solver,
+        )
+        runner.run()
+        assert mock_metric.aggregate_calls[0] == []

From f82c0ba4bbc828c76a68097ceaa2054848b8c542 Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Tue, 14 Apr 2026 02:59:32 +0300
Subject: [PATCH 11/15] feat: add NoResetBenchmarkRunner

---
 .../noreset/noreset_benchmark_runner.py       | 136 ++++-
 tests/mocks/algorithms/online/error.py        |   2 -
 tests/mocks/algorithms/online/simple.py       |   1 -
 tests/mocks/analysis/labeled_data.py          |  29 +
 .../noreset/test_noreset_benchmark_runner.py  | 536 ++++++++++++++++++
 .../benchmark/test_reset_benchmark_runner.py  | 100 ++--
 6 files changed, 743 insertions(+), 61 deletions(-)
 create mode 100644 tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py

diff --git a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
index 1b36eae..52baab3 100644
--- a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
+++ b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py
@@ -1,8 +1,24 @@
+# -*- coding: ascii -*-
+
+"""
+NoReset benchmark runner implementation.
+
+This module provides NoResetBenchmarkRunner - an optimised benchmark for
+series with a single change point. The solver is executed only once per
+(algorithm, provider) pair with threshold=inf, and all threshold
+evaluations are simulated via ThresholdPolicy on the cached trace.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
 from collections.abc import Sequence
 from pathlib import Path
 from typing import Any
 
 from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.core.benchmark_executor import BenchmarkExecutor
 from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
 from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace
 from pysatl_cpd.benchmark.noreset.threshold_policy import ThresholdPolicy
@@ -13,6 +29,33 @@
 
 
 class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[NoResetDetectionTrace[Any], ProviderT]):
+    """
+    Optimised benchmark runner for series with a single change point.
+
+    For each (algorithm, provider) pair the solver is executed exactly
+    once with threshold=inf, producing a full detection function trace.
+    All threshold evaluations are then simulated by applying a
+    ThresholdPolicy to that cached trace, avoiding redundant solver runs.
+    Caching is handled entirely by BenchmarkExecutor.
+
+    Parameters
+    ----------
+    algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        Sequence of (algorithm, thresholds) pairs to evaluate.
+    providers : Sequence[ProviderT]
+        Labeled data providers to run against.
+    metrics : dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]]
+        Named metrics to evaluate for each (algorithm, threshold) batch.
+    solver : OnlineCpdSolver
+        Solver used to produce inf traces.
+    policy : ThresholdPolicy
+        Policy used to extract detected change points from the inf trace
+        for each threshold.
+    dump_dir : Path | str | None, optional
+        Directory for caching inf traces via BenchmarkExecutor.
+        If None, caching is disabled. Default is None.
+    """
+
     def __init__(
         self,
         algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
@@ -20,9 +63,48 @@ def __init__(
         metrics: dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]],
         solver: OnlineCpdSolver,
         policy: ThresholdPolicy,
-        dump_dir: Path | None = None,
+        dump_dir: Path | str | None = None,
     ) -> None:
-        return
+        super().__init__(
+            algorithms=algorithms,
+            providers=providers,
+            metrics=metrics,
+            solver=solver,
+            dump_dir=dump_dir,
+        )
+        self._policy = policy
+
+    def _get_inf_trace(
+        self,
+        algorithm: OnlineAlgorithm[Any, Any, Any],
+        provider: ProviderT,
+    ) -> OnlineDetectionTrace[Any]:
+        """
+        Compute or retrieve the infinite-threshold trace for a given pair.
+
+        Delegates entirely to BenchmarkExecutor which handles disk caching
+        when dump_dir is set.
+
+        Parameters
+        ----------
+        algorithm : OnlineAlgorithm[Any, Any, Any]
+            The algorithm to run.
+        provider : ProviderT
+            The data provider to run against.
+
+        Returns
+        -------
+        OnlineDetectionTrace[Any]
+            Trace produced with threshold=inf.
+        """
+        executor: BenchmarkExecutor[Any] = BenchmarkExecutor(
+            algorithms=[(algorithm, [float("inf")])],
+            providers=[provider],
+            solver=self._solver,
+            dump_dir=self._dump_dir,
+        )
+        _, inf_trace = executor.execute()[0]
+        return inf_trace
 
     def _collect_runs(
         self,
@@ -30,11 +112,47 @@ def _collect_runs(
         threshold: float,
         providers: Sequence[ProviderT],
     ) -> list[tuple[NoResetDetectionTrace[Any], ProviderT]]:
-        raise NotImplementedError("Method '_collect_runs' is not implemented yet.")
+        """
+        Collect NoReset runs for a given algorithm and threshold.
 
-    def _get_inf_trace(
-        self,
-        algorithm: OnlineAlgorithm[Any, Any, Any],
-        provider: ProviderT,
-    ) -> OnlineDetectionTrace[Any]:
-        raise NotImplementedError("Method '_get_inf_trace' is not implemented yet.")
+        For each provider, retrieves the inf trace via BenchmarkExecutor
+        and applies the ThresholdPolicy to produce a lightweight
+        NoResetDetectionTrace.
+
+        Parameters
+        ----------
+        algorithm : OnlineAlgorithm[Any, Any, Any]
+            The algorithm to evaluate.
+        threshold : float
+            The detection threshold to simulate.
+        providers : Sequence[ProviderT]
+            Data providers to run against.
+
+        Returns
+        -------
+        list[tuple[NoResetDetectionTrace[Any], ProviderT]]
+            List of (noreset_trace, provider) pairs, one per provider.
+        """
+        if not providers:
+            return []
+
+        runs: list[tuple[NoResetDetectionTrace[Any], ProviderT]] = []
+
+        for provider in providers:
+            inf_trace = self._get_inf_trace(algorithm, provider)
+
+            detected_change_points: list[int] = self._policy.apply(
+                inf_trace.detection_function,
+                threshold,
+                provider.change_points,
+            )
+
+            noreset_trace = NoResetDetectionTrace.from_inf_trace(
+                source_trace=inf_trace,
+                detected_change_points=detected_change_points,
+                threshold=threshold,
+            )
+
+            runs.append((noreset_trace, provider))
+
+        return runs
diff --git a/tests/mocks/algorithms/online/error.py b/tests/mocks/algorithms/online/error.py
index 09d7f6a..9dac6a8 100644
--- a/tests/mocks/algorithms/online/error.py
+++ b/tests/mocks/algorithms/online/error.py
@@ -168,7 +168,5 @@ def __repr__(self) -> str:
         return (
             f"{self.__class__.__name__}("
             f"name={self._name!r}, "
-            f"error_on_call={self._error_on_call}, "
             f"learning_period_size={self._config.learning_period_size}, "
-            f"process_count={self._process_count})"
         )
diff --git a/tests/mocks/algorithms/online/simple.py b/tests/mocks/algorithms/online/simple.py
index b3f432d..1b0b7d3 100644
--- a/tests/mocks/algorithms/online/simple.py
+++ b/tests/mocks/algorithms/online/simple.py
@@ -155,5 +155,4 @@ def __repr__(self) -> str:
             f"{self.__class__.__name__}("
             f"name={self._name!r}, "
             f"learning_period_size={self._config.learning_period_size}, "
-            f"process_count={self._process_count})"
         )
diff --git a/tests/mocks/analysis/labeled_data.py b/tests/mocks/analysis/labeled_data.py
index 13eae01..012e787 100644
--- a/tests/mocks/analysis/labeled_data.py
+++ b/tests/mocks/analysis/labeled_data.py
@@ -35,3 +35,32 @@ def __init__(self, change_points: Sequence[int], name: str = "MockLabeledData"):
         max_idx = max(change_points) if change_points else 0
         dummy_raw_data = [0.0] * max_idx
         super().__init__(raw_data=dummy_raw_data, change_points=change_points, name=name)
+
+
+class MockLabeledDataWithPadding(LabeledData[float]):
+    """
+    Mock LabeledData where raw data length exceeds the maximum change point index.
+
+    Unlike MockLabeledData (where len == max_cp), this mock adds padding so
+    that the last observation index is not a change point. This prevents
+    algorithms from producing detections at index 0 due to insufficient data.
+
+    Parameters
+    ----------
+    change_points : Sequence[int]
+        Known change point indices (1-based, must be positive).
+    padding : int, default=10
+        Number of extra observations to append after the last change point.
+    name : str, default="MockLabeledDataWithPadding"
+        Dataset identifier.
+    """
+
+    def __init__(
+        self,
+        change_points: Sequence[int],
+        padding: int = 10,
+        name: str = "MockLabeledDataWithPadding",
+    ) -> None:
+        max_idx = max(change_points) if change_points else 0
+        dummy_raw_data = [0.0] * (max_idx + padding)
+        super().__init__(raw_data=dummy_raw_data, change_points=change_points, name=name)
diff --git a/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py b/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py
new file mode 100644
index 0000000..461b146
--- /dev/null
+++ b/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py
@@ -0,0 +1,536 @@
+# -*- coding: ascii -*-
+
+"""
+Unit tests for NoResetBenchmarkRunner.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from collections.abc import Sequence
+from pathlib import Path
+
+import pytest
+
+from pysatl_cpd.benchmark.noreset.noreset_benchmark_runner import NoResetBenchmarkRunner
+from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace
+from pysatl_cpd.benchmark.noreset.threshold_policy import EventBasedPolicy, PointBasedPolicy
+from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.typedefs import Number
+from tests.mocks.algorithms.online import MockOnlineAlgorithm
+from tests.mocks.analysis.labeled_data import MockLabeledDataWithPadding
+from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric
+from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric
+from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def solver() -> OnlineCpdSolver:
+    """Default OnlineCpdSolver with no special configuration."""
+    return OnlineCpdSolver()
+
+
+@pytest.fixture
+def algorithm() -> MockOnlineAlgorithm[Number]:
+    """Algorithm that always returns 0.5 - below threshold 1.0."""
+    return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5], learning_period_size=2)
+
+
+@pytest.fixture
+def algorithm_with_signal() -> MockOnlineAlgorithm[Number]:
+    """Algorithm that always returns 2.0 - above threshold 1.0."""
+    return MockOnlineAlgorithm[Number](name="AlgoSignal", return_sequence=[2.0], learning_period_size=2)
+
+
+@pytest.fixture
+def single_provider() -> MockLabeledDataWithPadding:
+    """Single labeled data provider with one change point."""
+    return MockLabeledDataWithPadding(change_points=[5], name="Provider1")
+
+
+@pytest.fixture
+def two_providers() -> list[MockLabeledDataWithPadding]:
+    """Two labeled data providers."""
+    return [
+        MockLabeledDataWithPadding(change_points=[5], name="Provider1"),
+        MockLabeledDataWithPadding(change_points=[10], name="Provider2"),
+    ]
+
+
+@pytest.fixture
+def point_policy() -> PointBasedPolicy:
+    """PointBasedPolicy with strict=True."""
+    return PointBasedPolicy(strict=True)
+
+
+@pytest.fixture
+def event_policy() -> EventBasedPolicy:
+    """EventBasedPolicy with max_delay=5."""
+    return EventBasedPolicy(max_delay=5)
+
+
+@pytest.fixture
+def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]:
+    """Standard mock aggregation metric."""
+    return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding](
+        base=MockRunMetric(return_values=[1.0])
+    )
+
+
+def make_noreset_runner(
+    algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]],
+    providers: Sequence[MockLabeledDataWithPadding],
+    metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]],
+    solver: OnlineCpdSolver,
+    policy: PointBasedPolicy | EventBasedPolicy,
+    dump_dir: Path | str | None = None,
+) -> NoResetBenchmarkRunner[MockLabeledDataWithPadding]:
+    """Helper to construct NoResetBenchmarkRunner with given parameters."""
+    return NoResetBenchmarkRunner(
+        algorithms=algorithms,
+        providers=providers,
+        metrics=metrics,  # type: ignore[arg-type]
+        solver=solver,
+        policy=policy,
+        dump_dir=dump_dir,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestNoResetBenchmarkRunnerInheritance:
+    """Tests for NoResetBenchmarkRunner inheritance and interface."""
+
+    def test_is_instance_of_online_benchmark_runner(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """NoResetBenchmarkRunner is an instance of OnlineBenchmarkRunner."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        assert isinstance(runner, OnlineBenchmarkRunner)
+
+    def test_collect_runs_is_implemented(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """_collect_runs does not raise NotImplementedError."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        try:
+            runner._collect_runs(algorithm, 1.0, [single_provider])
+        except NotImplementedError:
+            pytest.fail("_collect_runs raised NotImplementedError")
+
+
+class TestNoResetBenchmarkRunnerInfTrace:
+    """Tests for NoResetBenchmarkRunner._get_inf_trace."""
+
+    def test_inf_trace_has_no_detected_change_points(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """Inf trace produced with threshold=inf has no detected change points."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        inf_trace = runner._get_inf_trace(algorithm, single_provider)
+        assert len(inf_trace.detected_change_points) == 0
+
+    def test_inf_trace_detection_function_has_correct_length(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """Detection function length equals the number of observations in provider."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        inf_trace = runner._get_inf_trace(algorithm, single_provider)
+        assert len(inf_trace.detection_function) == len(single_provider)
+
+    def test_inf_trace_algorithm_name_matches(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """algorithm_name in inf trace matches str(algorithm)."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        inf_trace = runner._get_inf_trace(algorithm, single_provider)
+        assert inf_trace.algorithm_name == str(algorithm)
+
+
+class TestNoResetBenchmarkRunnerCollectRuns:
+    """Tests for NoResetBenchmarkRunner._collect_runs."""
+
+    def test_returns_one_run_per_provider(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        two_providers: list[MockLabeledDataWithPadding],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """_collect_runs returns exactly len(providers) (trace, provider) pairs."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            two_providers,
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, two_providers)
+        assert len(runs) == len(two_providers)
+
+    def test_empty_providers_returns_empty_list(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """_collect_runs with empty providers returns empty list."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, [])
+        assert runs == []
+
+    def test_each_run_is_noreset_detection_trace(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """Each trace in collected runs is a NoResetDetectionTrace."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, [single_provider])
+        for trace, _ in runs:
+            assert isinstance(trace, NoResetDetectionTrace)
+
+    def test_each_run_paired_with_correct_provider(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        two_providers: list[MockLabeledDataWithPadding],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """Each trace is paired with its corresponding provider."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            two_providers,
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        runs = runner._collect_runs(algorithm, 1.0, two_providers)
+        for (_, provider), expected in zip(runs, two_providers, strict=False):
+            assert provider is expected
+
+    def test_high_threshold_produces_no_detections(
+        self,
+        algorithm_with_signal: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """High threshold (inf) produces no detected change points."""
+        runner = make_noreset_runner(
+            [(algorithm_with_signal, [float("inf")])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        runs = runner._collect_runs(algorithm_with_signal, float("inf"), [single_provider])
+        trace, _ = runs[0]
+        assert len(trace.detected_change_points) == 0
+
+    def test_low_threshold_produces_detections(
+        self,
+        algorithm_with_signal: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """Low threshold (0.0) with signal algorithm produces detections."""
+        runner = make_noreset_runner(
+            [(algorithm_with_signal, [0.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        runs = runner._collect_runs(algorithm_with_signal, 0.0, [single_provider])
+        trace, _ = runs[0]
+        assert len(trace.detected_change_points) > 0
+
+    def test_policy_is_applied_to_inf_trace(
+        self,
+        algorithm_with_signal: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """Detected change points match what policy.apply() would return."""
+        runner = make_noreset_runner(
+            [(algorithm_with_signal, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        inf_trace = runner._get_inf_trace(algorithm_with_signal, single_provider)
+        expected_cps = point_policy.apply(
+            inf_trace.detection_function,
+            1.0,
+            single_provider.change_points,
+        )
+        runs = runner._collect_runs(algorithm_with_signal, 1.0, [single_provider])
+        trace, _ = runs[0]
+        assert list(trace.detected_change_points) == expected_cps
+
+
+class TestNoResetBenchmarkRunnerRun:
+    """Integration tests for NoResetBenchmarkRunner.run()."""
+
+    def test_run_with_single_algorithm_single_threshold_single_provider(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """Basic happy path - one algorithm, one threshold, one provider."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        result = runner.run()
+        assert len(result) == 1
+        entries = next(iter(result.values()))
+        assert len(entries) == 1
+        threshold, metrics_dict = entries[0]
+        assert threshold == 1.0
+        assert "m" in metrics_dict
+
+    def test_run_with_multiple_thresholds_single_solver_execution(
+        self,
+        algorithm_with_signal: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+        tmp_path: Path,
+    ) -> None:
+        """Multiple thresholds - solver runs only once per provider."""
+        runner = make_noreset_runner(
+            [(algorithm_with_signal, [0.5, 1.0, 2.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+            dump_dir=tmp_path,
+        )
+        runner.run()
+        pkl_files = list(tmp_path.glob("*.pkl"))
+        assert len(pkl_files) == 1
+
+    def test_run_returns_correct_structure(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        two_providers: list[MockLabeledDataWithPadding],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """run() result has correct nested structure."""
+        thresholds = [0.5, 1.0]
+        runner = make_noreset_runner(
+            [(algorithm, thresholds)],
+            two_providers,
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        result = runner.run()
+        for key, entries in result.items():
+            assert isinstance(key[0], str)
+            assert len(entries) == len(thresholds)
+            for t, md in entries:
+                assert isinstance(t, float)
+                assert isinstance(md, dict)
+
+    def test_run_with_empty_providers(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+    ) -> None:
+        """Empty providers list - metric is called with empty batch."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        runner.run()
+        assert mock_metric.aggregate_calls[0] == []
+
+    def test_different_policies_produce_different_detections(
+        self,
+        algorithm_with_signal: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+        event_policy: EventBasedPolicy,
+    ) -> None:
+        """PointBasedPolicy and EventBasedPolicy may produce different detections."""
+        runner_point = make_noreset_runner(
+            [(algorithm_with_signal, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+        )
+        runner_event = make_noreset_runner(
+            [(algorithm_with_signal, [1.0])],
+            [single_provider],
+            {
+                "m": MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding](
+                    base=MockRunMetric(return_values=[1.0])
+                )
+            },
+            solver,
+            event_policy,
+        )
+        runs_point = runner_point._collect_runs(algorithm_with_signal, 1.0, [single_provider])
+        runs_event = runner_event._collect_runs(algorithm_with_signal, 1.0, [single_provider])
+        trace_point, _ = runs_point[0]
+        trace_event, _ = runs_event[0]
+        # Results may differ - we just verify both are valid NoResetDetectionTrace
+        assert isinstance(trace_point, NoResetDetectionTrace)
+        assert isinstance(trace_event, NoResetDetectionTrace)
+
+
+class TestNoResetBenchmarkRunnerCaching:
+    """Tests for NoResetBenchmarkRunner caching behaviour."""
+
+    def test_no_files_created_without_dump_dir(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+        tmp_path: Path,
+    ) -> None:
+        """Without dump_dir no files are created."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+            dump_dir=None,
+        )
+        runner.run()
+        assert not any(tmp_path.iterdir())
+
+    def test_inf_trace_cached_to_disk_when_dump_dir_provided(
+        self,
+        algorithm: MockOnlineAlgorithm[Number],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
+        solver: OnlineCpdSolver,
+        point_policy: PointBasedPolicy,
+        tmp_path: Path,
+    ) -> None:
+        """With dump_dir, inf trace registry and pickle are created."""
+        runner = make_noreset_runner(
+            [(algorithm, [1.0])],
+            [single_provider],
+            {"m": mock_metric},
+            solver,
+            point_policy,
+            dump_dir=tmp_path,
+        )
+        runner.run()
+        registry = tmp_path / "benchmark_registry.csv"
+        pkl_files = list(tmp_path.glob("*.pkl"))
+        assert registry.exists()
+        assert len(pkl_files) == 1
diff --git a/tests/unit/benchmark/test_reset_benchmark_runner.py b/tests/unit/benchmark/test_reset_benchmark_runner.py
index 6b30443..7cd6a61 100644
--- a/tests/unit/benchmark/test_reset_benchmark_runner.py
+++ b/tests/unit/benchmark/test_reset_benchmark_runner.py
@@ -20,7 +20,7 @@
 from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
 from pysatl_cpd.core.typedefs import Number
 from tests.mocks.algorithms.online import MockOnlineAlgorithm
-from tests.mocks.analysis.labeled_data import MockLabeledData
+from tests.mocks.analysis.labeled_data import MockLabeledDataWithPadding
 from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric
 from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric
 from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace
@@ -39,43 +39,45 @@ def solver() -> OnlineCpdSolver:
 @pytest.fixture
 def algorithm() -> MockOnlineAlgorithm[Number]:
     """Algorithm that always returns 0.5 - below threshold 1.0."""
-    return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5])
+    return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5], learning_period_size=2)
 
 
 @pytest.fixture
 def algorithm_with_signal() -> MockOnlineAlgorithm[Number]:
     """Algorithm that always returns 2.0 - above threshold 1.0."""
-    return MockOnlineAlgorithm[Number](name="AlgoSignal", return_sequence=[2.0])
+    return MockOnlineAlgorithm[Number](name="AlgoSignal", return_sequence=[2.0], learning_period_size=2)
 
 
 @pytest.fixture
-def providers() -> list[MockLabeledData]:
+def providers() -> list[MockLabeledDataWithPadding]:
     """Two labeled data providers."""
     return [
-        MockLabeledData(change_points=[5], name="Provider1"),
-        MockLabeledData(change_points=[10], name="Provider2"),
+        MockLabeledDataWithPadding(change_points=[5], name="Provider1"),
+        MockLabeledDataWithPadding(change_points=[10], name="Provider2"),
     ]
 
 
 @pytest.fixture
-def single_provider() -> MockLabeledData:
+def single_provider() -> MockLabeledDataWithPadding:
     """Single labeled data provider."""
-    return MockLabeledData(change_points=[5], name="Provider1")
+    return MockLabeledDataWithPadding(change_points=[5], name="Provider1")
 
 
 @pytest.fixture
-def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]:
+def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]:
     """Standard mock aggregation metric."""
-    return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData](base=MockRunMetric(return_values=[1.0]))
+    return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding](
+        base=MockRunMetric(return_values=[1.0])
+    )
 
 
 def make_reset_runner(
     algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]],
-    providers: Sequence[MockLabeledData],
-    metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]],
+    providers: Sequence[MockLabeledDataWithPadding],
+    metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]],
     solver: OnlineCpdSolver,
     dump_dir: Path | str | None = None,
-) -> ResetBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledData]:
+) -> ResetBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledDataWithPadding]:
     """Helper to construct ResetBenchmarkRunner with given parameters."""
     return ResetBenchmarkRunner(
         algorithms=algorithms,
@@ -97,8 +99,8 @@ class TestResetBenchmarkRunnerInheritance:
     def test_is_instance_of_online_benchmark_runner(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """ResetBenchmarkRunner is an instance of OnlineBenchmarkRunner."""
@@ -113,8 +115,8 @@ def test_is_instance_of_online_benchmark_runner(
     def test_collect_runs_is_implemented(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """_collect_runs does not raise NotImplementedError."""
@@ -136,8 +138,8 @@ class TestResetBenchmarkRunnerCollectRuns:
     def test_returns_one_run_per_provider(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        providers: list[MockLabeledData],
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        providers: list[MockLabeledDataWithPadding],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """_collect_runs returns exactly len(providers) (trace, provider) pairs."""
@@ -153,7 +155,7 @@ def test_returns_one_run_per_provider(
     def test_empty_providers_returns_empty_list(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """_collect_runs with empty providers returns empty list."""
@@ -169,8 +171,8 @@ def test_empty_providers_returns_empty_list(
     def test_single_provider_returns_single_run(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """_collect_runs with one provider returns exactly one pair."""
@@ -186,8 +188,8 @@ def test_single_provider_returns_single_run(
     def test_each_run_paired_with_correct_provider(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        providers: list[MockLabeledData],
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        providers: list[MockLabeledDataWithPadding],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """Each trace is paired with its corresponding provider."""
@@ -204,8 +206,8 @@ def test_each_run_paired_with_correct_provider(
     def test_trace_is_online_detection_trace(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """Each trace in collected runs is an OnlineDetectionTrace."""
@@ -222,8 +224,8 @@ def test_trace_is_online_detection_trace(
     def test_trace_algorithm_name_and_configuration_hash_match_algorithm(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """algorithm_name and configuration_hash in trace match the algorithm."""
@@ -241,8 +243,8 @@ def test_trace_algorithm_name_and_configuration_hash_match_algorithm(
     def test_detected_change_points_respect_threshold(
         self,
         algorithm_with_signal: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """High threshold produces no detections, low threshold produces detections."""
@@ -262,8 +264,8 @@ def test_detected_change_points_respect_threshold(
     def test_different_thresholds_produce_different_detections(
         self,
         algorithm_with_signal: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """Lower threshold produces more detections than higher threshold."""
@@ -282,8 +284,8 @@ def test_different_thresholds_produce_different_detections(
     def test_algorithm_is_reset_between_providers(
         self,
         algorithm_with_signal: MockOnlineAlgorithm[Number],
-        providers: list[MockLabeledData],
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        providers: list[MockLabeledDataWithPadding],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """Algorithm state is reset between providers by the solver."""
@@ -311,8 +313,8 @@ class TestResetBenchmarkRunnerCaching:
     def test_no_files_created_without_dump_dir(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
         tmp_path: Path,
     ) -> None:
@@ -330,8 +332,8 @@ def test_no_files_created_without_dump_dir(
     def test_results_cached_to_disk_when_dump_dir_provided(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
         tmp_path: Path,
     ) -> None:
@@ -350,8 +352,8 @@ def test_results_cached_to_disk_when_dump_dir_provided(
     def test_registry_contains_correct_metadata(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
         tmp_path: Path,
     ) -> None:
@@ -376,8 +378,8 @@ def test_registry_contains_correct_metadata(
     def test_cached_results_reused_on_second_run(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
         tmp_path: Path,
     ) -> None:
@@ -411,8 +413,8 @@ class TestResetBenchmarkRunnerRun:
     def test_run_with_single_algorithm_single_threshold_single_provider(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """Basic happy path - one algorithm, one threshold, one provider."""
@@ -433,8 +435,8 @@ def test_run_with_single_algorithm_single_threshold_single_provider(
     def test_run_returns_correct_structure(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        providers: list[MockLabeledData],
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        providers: list[MockLabeledDataWithPadding],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """run() result has correct nested structure."""
@@ -456,8 +458,8 @@ def test_run_returns_correct_structure(
     def test_run_with_multiple_thresholds(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        single_provider: MockLabeledData,
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        single_provider: MockLabeledDataWithPadding,
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """Multiple thresholds produce multiple entries in result."""
@@ -477,7 +479,7 @@ def test_run_with_multiple_thresholds(
     def test_run_with_empty_providers(
         self,
         algorithm: MockOnlineAlgorithm[Number],
-        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData],
+        mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding],
         solver: OnlineCpdSolver,
     ) -> None:
         """Empty providers list - metric is called with empty batch."""

From 6b42de16b6fd03527e61cb01412a21fd281c171c Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Tue, 14 Apr 2026 03:53:13 +0300
Subject: [PATCH 12/15] fix: add mode argument to ARLBenchmarkRunner

---
 pysatl_cpd/benchmark/arl_benchmark_runner.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py
index d14069f..64c800b 100644
--- a/pysatl_cpd/benchmark/arl_benchmark_runner.py
+++ b/pysatl_cpd/benchmark/arl_benchmark_runner.py
@@ -1,6 +1,6 @@
 from collections.abc import Sequence
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 from pysatl_cpd.analysis.labeled_data import LabeledData
 from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
@@ -17,6 +17,7 @@ def __init__(
         algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
         providers: list[ProviderT],
         solver: OnlineCpdSolver,
+        mode: Literal["reset", "noreset"],
         dump_dir: Path | None = None,
     ) -> None:
         return

From cea88f6d4ee2d4c67da1a29fc84a796736b72abc Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Tue, 14 Apr 2026 04:34:43 +0300
Subject: [PATCH 13/15] feat: add ARLBenchmarkRunner

---
 pysatl_cpd/benchmark/arl_benchmark_runner.py  | 117 ++-
 .../benchmark/test_arl_benchmark_runner.py    | 778 ++++++++++++++++++
 2 files changed, 892 insertions(+), 3 deletions(-)
 create mode 100644 tests/unit/benchmark/test_arl_benchmark_runner.py

diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py
index 64c800b..f3bdeac 100644
--- a/pysatl_cpd/benchmark/arl_benchmark_runner.py
+++ b/pysatl_cpd/benchmark/arl_benchmark_runner.py
@@ -1,9 +1,27 @@
+# -*- coding: ascii -*-
+
+"""
+Average Run Length (ARL) benchmark runner.
+
+This module provides the ARLBenchmarkRunner which evaluates the distance
+between consecutive false alarms. It automatically applies the ARLMetric
+and ensures that the provided datasets do not contain any true change points.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
 from collections.abc import Sequence
 from pathlib import Path
-from typing import Any, Literal
+from typing import Any, Literal, cast
 
 from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.metrics.online.arl_metric import ARLMetric
+from pysatl_cpd.benchmark.noreset.noreset_benchmark_runner import NoResetBenchmarkRunner
+from pysatl_cpd.benchmark.noreset.threshold_policy import PointBasedPolicy
 from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner
+from pysatl_cpd.benchmark.reset_benchmark_runner import ResetBenchmarkRunner
 from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm
 from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
 from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
@@ -12,6 +30,40 @@
 class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](
     OnlineBenchmarkRunner[TraceT, ProviderT]
 ):
+    """
+    Benchmark runner specialized for Average Run Length (ARL) evaluation.
+
+    ARL represents the mean distance between consecutive detections (false alarms)
+    when no true change points are present in the data. This runner strictly
+    validates that all providers have empty `change_points`.
+
+    It supports two modes:
+    - "reset": The algorithm state is reset after every detection (standard behavior).
+    - "noreset": The algorithm state is not reset. A single infinite-threshold run
+      is cached, and signals are extracted using a strict point-based policy.
+
+    Parameters
+    ----------
+    algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]]
+        Sequence of (algorithm, thresholds) pairs to evaluate.
+    providers : list[ProviderT]
+        Labeled data providers to run against. Must have `change_points == []`.
+    solver : OnlineCpdSolver
+        Solver used to run algorithms against providers.
+    mode : Literal["reset", "noreset"]
+        Evaluation mode determining whether the algorithm resets after a detection.
+    dump_dir : Path | None, optional
+        Directory for caching results via BenchmarkExecutor.
+        If None, caching is disabled. Default is None.
+
+    Raises
+    ------
+    ValueError
+        If any provider contains non-empty `change_points`.
+    ValueError
+        If `mode` is neither "reset" nor "noreset".
+    """
+
     def __init__(
         self,
         algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]],
@@ -20,7 +72,45 @@ def __init__(
         mode: Literal["reset", "noreset"],
         dump_dir: Path | None = None,
     ) -> None:
-        return
+        for provider in providers:
+            if provider.change_points:
+                raise ValueError(
+                    f"ARL benchmark requires empty change_points, "
+                    f"but provider '{provider.name}' has {list(provider.change_points)}."
+                )
+
+        metrics = {"arl": ARLMetric[TraceT, ProviderT]()}
+
+        super().__init__(
+            algorithms=algorithms,
+            providers=providers,
+            metrics=metrics,  # type: ignore[arg-type]
+            solver=solver,
+            dump_dir=dump_dir,
+        )
+
+        self._mode = mode
+        if mode == "reset":
+            # Delegate to standard ResetBenchmarkRunner
+            self._inner_runner: OnlineBenchmarkRunner[Any, ProviderT] = ResetBenchmarkRunner(
+                algorithms=algorithms,
+                providers=providers,
+                metrics=cast(Any, metrics),
+                solver=solver,
+                dump_dir=dump_dir,
+            )
+        elif mode == "noreset":
+            # Delegate to optimized NoResetBenchmarkRunner with PointBased policy
+            self._inner_runner = NoResetBenchmarkRunner(
+                algorithms=algorithms,
+                providers=providers,
+                metrics=cast(Any, metrics),
+                solver=solver,
+                policy=PointBasedPolicy(strict=True),
+                dump_dir=dump_dir,
+            )
+        else:
+            raise ValueError(f"Invalid mode: {mode}. Must be 'reset' or 'noreset'.")
 
     def _collect_runs(
         self,
@@ -28,4 +118,25 @@ def _collect_runs(
         threshold: float,
         providers: Sequence[ProviderT],
     ) -> list[tuple[TraceT, ProviderT]]:
-        raise NotImplementedError("Method `_collect_runs` is not implemented yet.")
+        """
+        Collect runs for a given algorithm and threshold using the configured mode.
+
+        Delegates the collection to either ResetBenchmarkRunner or
+        NoResetBenchmarkRunner depending on the initialized mode.
+
+        Parameters
+        ----------
+        algorithm : OnlineAlgorithm[Any, Any, Any]
+            The algorithm to evaluate.
+        threshold : float
+            The detection threshold.
+        providers : Sequence[ProviderT]
+            Data providers to run against.
+
+        Returns
+        -------
+        list[tuple[TraceT, ProviderT]]
+            Batch of (trace, provider) pairs.
+        """
+        runs = self._inner_runner._collect_runs(algorithm, threshold, providers)
+        return cast(list[tuple[TraceT, ProviderT]], runs)
diff --git a/tests/unit/benchmark/test_arl_benchmark_runner.py b/tests/unit/benchmark/test_arl_benchmark_runner.py
new file mode 100644
index 0000000..8a21ba8
--- /dev/null
+++ b/tests/unit/benchmark/test_arl_benchmark_runner.py
@@ -0,0 +1,778 @@
+# -*- coding: ascii -*-
+"""
+Tests for ARLBenchmarkRunner.
+
+Covers initialization validation, _collect_runs behavior, run() output
+structure and exact ARL values, max_runlength interaction, reset vs
+noreset mode semantics, and reset behavior verification.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+import math
+from typing import Any, Literal
+
+import pytest
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.arl_benchmark_runner import ARLBenchmarkRunner
+from pysatl_cpd.benchmark.metrics.online.arl_metric import ARLMetric
+from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmConfiguration
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+from tests.mocks.algorithms.online.simple import MockOnlineAlgorithm
+
+
+def _make_provider(
+    length: int,
+    change_points: list[int] | None = None,
+    name: str = "test_data",
+) -> LabeledData[float]:
+    """Create a LabeledData provider with the given length and change points.
+
+    Parameters
+    ----------
+    length : int
+        Number of observations in the raw data.
+    change_points : list[int] | None
+        Known change point indices. Defaults to empty list.
+    name : str
+        Human-readable identifier for the provider.
+
+    Returns
+    -------
+    LabeledData[float]
+        Provider filled with constant 1.0 observations.
+    """
+    cp: list[int] = change_points if change_points is not None else []
+    return LabeledData(raw_data=[1.0] * length, change_points=cp, name=name)
+
+
+# ---------------------------------------------------------------------------
+# 1. Initialization and validation
+# ---------------------------------------------------------------------------
+class TestARLBenchmarkRunnerInit:
+    """Tests for ARLBenchmarkRunner.__init__ validation logic."""
+
+    def test_raises_if_provider_has_change_points(self) -> None:
+        """Should raise ValueError when a single provider has non-empty change_points."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(10, change_points=[5], name="bad")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        with pytest.raises(ValueError):
+            ARLBenchmarkRunner(
+                algorithms=[(algorithm, [1.0])],
+                providers=[provider],
+                solver=solver,
+                mode="reset",
+            )
+
+    def test_raises_if_any_provider_has_change_points(self) -> None:
+        """Should raise ValueError when at least one of several providers has change_points."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        ok_provider: LabeledData[float] = _make_provider(10, name="ok")
+        bad_provider: LabeledData[float] = _make_provider(10, change_points=[3], name="bad")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        with pytest.raises(ValueError):
+            ARLBenchmarkRunner(
+                algorithms=[(algorithm, [1.0])],
+                providers=[ok_provider, bad_provider],
+                solver=solver,
+                mode="reset",
+            )
+
+    def test_raises_if_any_provider_has_change_points_noreset_mode(self) -> None:
+        """Validation should apply in noreset mode as well."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        bad_provider: LabeledData[float] = _make_provider(10, change_points=[3], name="bad")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        with pytest.raises(ValueError):
+            ARLBenchmarkRunner(
+                algorithms=[(algorithm, [1.0])],
+                providers=[bad_provider],
+                solver=solver,
+                mode="noreset",
+            )
+
+    def test_valid_init_with_empty_change_points(self) -> None:
+        """Should succeed when all providers have empty change_points."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(10, name="clean")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        assert runner is not None
+
+    def test_metrics_contain_arl_metric(self) -> None:
+        """Internal _metrics dict should contain 'arl' key with ARLMetric instance."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(10, name="data")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        assert "arl" in runner._metrics
+        assert isinstance(runner._metrics["arl"], ARLMetric)
+
+    @pytest.mark.parametrize("mode", ["reset", "noreset"])
+    def test_accepts_both_modes(self, mode: Literal["reset", "noreset"]) -> None:
+        """Constructor should accept both 'reset' and 'noreset' mode values."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5, name="d")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.0])],
+            providers=[provider],
+            solver=solver,
+            mode=mode,
+        )
+        assert runner is not None
+
+
+# ---------------------------------------------------------------------------
+# 2. _collect_runs
+# ---------------------------------------------------------------------------
+class TestARLBenchmarkRunnerCollectRuns:
+    """Tests for _collect_runs method."""
+
+    def test_returns_correct_number_of_pairs_reset(self) -> None:
+        """Should return one (trace, provider) pair per provider in reset mode."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        providers: list[LabeledData[float]] = [
+            _make_provider(10, name="d1"),
+            _make_provider(10, name="d2"),
+            _make_provider(10, name="d3"),
+        ]
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.0])],
+            providers=providers,
+            solver=solver,
+            mode="reset",
+        )
+        runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(
+            algorithm, 1.0, providers
+        )
+        assert len(runs) == 3
+
+    def test_returns_correct_number_of_pairs_noreset(self) -> None:
+        """Should return one (trace, provider) pair per provider in noreset mode."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        providers: list[LabeledData[float]] = [
+            _make_provider(10, name="d1"),
+            _make_provider(10, name="d2"),
+        ]
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.0])],
+            providers=providers,
+            solver=solver,
+            mode="noreset",
+        )
+        runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(
+            algorithm, 1.0, providers
+        )
+        assert len(runs) == 2
+
+    def test_pairs_traces_with_correct_providers(self) -> None:
+        """Each trace should be paired with its corresponding provider by name."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        providers: list[LabeledData[float]] = [
+            _make_provider(10, name="alpha"),
+            _make_provider(15, name="beta"),
+        ]
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.0])],
+            providers=providers,
+            solver=solver,
+            mode="reset",
+        )
+        runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(
+            algorithm, 1.0, providers
+        )
+        names: list[str] = [prov.name for _, prov in runs]
+        assert names == ["alpha", "beta"]
+
+    def test_empty_providers_returns_empty_list(self) -> None:
+        """Empty providers sequence should return empty list."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0])
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.0])],
+            providers=[_make_provider(10)],
+            solver=solver,
+            mode="reset",
+        )
+        runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(algorithm, 1.0, [])
+        assert runs == []
+
+
+# ---------------------------------------------------------------------------
+# 3. run() - structure and values
+# ---------------------------------------------------------------------------
+class TestARLBenchmarkRunnerRun:
+    """Tests for run() output structure and ARL values."""
+
+    def test_run_returns_correct_key_structure(self) -> None:
+        """Result key should be (str(algorithm), algorithm.configuration)."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="KeyAlgo", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(10)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results: dict[
+            tuple[str, OnlineAlgorithmConfiguration],
+            list[tuple[float, dict[str, Any]]],
+        ] = runner.run()
+
+        assert len(results) == 1
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        assert key[0] == str(algorithm)
+        assert key[1] == algorithm.configuration
+
+    def test_run_arl_infinity_when_no_detections(self) -> None:
+        """ARL should be inf when the detection function never exceeds the threshold."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="QuietAlgo", return_sequence=[0.5])
+        provider: LabeledData[float] = _make_provider(20)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        assert math.isinf(arl_value)
+
+    def test_run_arl_infinity_noreset_when_no_detections(self) -> None:
+        """ARL should be inf in noreset mode when no threshold crossing occurs."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="QuietAlgo", return_sequence=[0.5])
+        provider: LabeledData[float] = _make_provider(20)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="noreset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        assert math.isinf(arl_value)
+
+    def test_run_multiple_thresholds(self) -> None:
+        """Each threshold should produce its own entry with 'arl' metric."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Multi", return_sequence=[0.0, 2.0, 5.0])
+        provider: LabeledData[float] = _make_provider(20)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+        thresholds: list[float] = [1.0, 3.0, 10.0]
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, thresholds)],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        entries: list[tuple[float, dict[str, Any]]] = results[key]
+
+        assert len(entries) == 3
+        recorded: list[float] = [t for t, _ in entries]
+        assert recorded == thresholds
+        for _, m in entries:
+            assert "arl" in m
+
+    def test_run_arl_aggregated_across_providers(self) -> None:
+        """ARL should aggregate run lengths from all providers.
+
+        Algorithm [0.0, 5.0], threshold=3.0, reset mode.
+        After each detection the algorithm resets so the sequence
+        restarts: 0, 5, 0, 5, ...
+
+        For each provider detections happen at steps where value=5.0.
+        Step 0 -> 0.0 (no), step 1 -> 5.0 (yes, reset).
+        After reset: step 2 -> 0.0 (no), step 3 -> 5.0 (yes, reset). Etc.
+
+        p1 (4 obs): detections at steps 1, 3.
+            Run lengths from 0: [1, 2]. (0->1 = 1, 1->3 = 2)
+        p2 (6 obs): detections at steps 1, 3, 5.
+            Run lengths from 0: [1, 2, 2].
+
+        Flat run lengths: [1, 2, 1, 2, 2].  ARL = 8 / 5 = 1.6.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Agg", return_sequence=[0.0, 5.0])
+        providers: list[LabeledData[float]] = [
+            _make_provider(4, name="p1"),
+            _make_provider(6, name="p2"),
+        ]
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=providers,
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        expected_arl: float = 8.0 / 5.0
+        assert abs(arl_value - expected_arl) < 1e-10
+
+
+# ---------------------------------------------------------------------------
+# 4. Reset vs NoReset mode semantics
+# ---------------------------------------------------------------------------
+class TestARLBenchmarkRunnerModeSemantics:
+    """Tests verifying different ARL behavior between reset and noreset modes."""
+
+    def test_reset_vs_noreset_produce_different_arl(self) -> None:
+        """Reset and noreset modes should produce different ARL values.
+
+        Algorithm return_sequence=[0.0, 5.0, 0.0, 0.0], threshold=3.0.
+
+        Reset mode:
+            Step 0 -> 0.0 (no). Step 1 -> 5.0 (yes, reset).
+            After reset: Step 2 -> 0.0 (no). Step 3 -> 5.0 (yes, reset). Etc.
+            Detections at steps 1, 3, 5, 7, ..., 19.
+            Run lengths from 0: [1, 2, 2, 2, ...].  ARL < 2.
+
+        NoReset mode:
+            Sequence cycles without reset: 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, ...
+            Detections only where value=5.0.
+            ARL > arl_reset.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(
+            name="ModeTest",
+            return_sequence=[0.0, 5.0, 0.0, 0.0],
+        )
+        provider: LabeledData[float] = _make_provider(20)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner_reset: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        res_reset = runner_reset.run()
+        key_reset: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res_reset))
+        arl_reset: float = res_reset[key_reset][0][1]["arl"]
+
+        runner_noreset: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="noreset",
+        )
+        res_noreset = runner_noreset.run()
+        key_noreset: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res_noreset))
+        arl_noreset: float = res_noreset[key_noreset][0][1]["arl"]
+
+        assert math.isfinite(arl_reset)
+        assert math.isfinite(arl_noreset)
+        assert arl_reset < arl_noreset
+
+    def test_reset_mode_exact_arl_with_immediate_signal(self) -> None:
+        """Verify exact ARL in reset mode.
+
+        Algorithm return_sequence=[0.0, 5.0, 0.0, 0.0], threshold=3.0.
+        Reset mode: sequence restarts after every signal.
+
+        12 observations:
+        Step 0 -> 0.0 (no). Step 1 -> 5.0 (yes, reset).
+        Step 2 -> 0.0 (no). Step 3 -> 5.0 (yes, reset).
+        Step 4 -> 0.0 (no). Step 5 -> 5.0 (yes, reset).
+        Step 6 -> 0.0 (no). Step 7 -> 5.0 (yes, reset).
+        Step 8 -> 0.0 (no). Step 9 -> 5.0 (yes, reset).
+        Step 10 -> 0.0 (no). Step 11 -> 5.0 (yes, reset).
+
+        Detections at steps 1, 3, 5, 7, 9, 11.
+        Run lengths from 0: [1, 2, 2, 2, 2, 2].
+        ARL = 11 / 6.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(
+            name="Immediate",
+            return_sequence=[0.0, 5.0, 0.0, 0.0],
+        )
+        provider: LabeledData[float] = _make_provider(12)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        expected_arl: float = 11.0 / 6.0
+        assert abs(arl_value - expected_arl) < 1e-10
+
+    def test_noreset_mode_exact_arl_with_periodic_signal(self) -> None:
+        """Verify exact ARL in noreset mode with periodic signal.
+
+        Algorithm return_sequence=[5.0, 0.0, 0.0, 0.0], threshold=3.0.
+        NoReset mode: sequence cycles without restart.
+        12 observations -> values: 5,0,0,0,5,0,0,0,5,0,0,0.
+        Detections at 1-based indices: 1, 5, 9 (where value=5.0).
+        Run lengths from 0: [1, 4, 4].
+        ARL = 9 / 3 = 3.0.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(
+            name="Periodic",
+            return_sequence=[5.0, 0.0, 0.0, 0.0],
+        )
+        provider: LabeledData[float] = _make_provider(12)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="noreset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        expected_arl: float = 9.0 / 3.0
+        assert abs(arl_value - expected_arl) < 1e-10
+
+    def test_noreset_lower_threshold_shorter_arl(self) -> None:
+        """Lower threshold in noreset mode should detect more, producing shorter ARL.
+
+        Algorithm [0.0, 1.0, 2.0, 3.0, 4.0, 5.0], 24 observations.
+        threshold=1.5: detections where value > 1.5 -> indices with 2,3,4,5.
+        threshold=4.5: detections where value > 4.5 -> indices with 5 only.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(
+            name="Gradual",
+            return_sequence=[0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
+        )
+        provider: LabeledData[float] = _make_provider(24)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.5, 4.5])],
+            providers=[provider],
+            solver=solver,
+            mode="noreset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        entries: list[tuple[float, dict[str, Any]]] = results[key]
+
+        arl_low: float = entries[0][1]["arl"]
+        arl_high: float = entries[1][1]["arl"]
+
+        assert math.isfinite(arl_low)
+        assert math.isfinite(arl_high)
+        assert arl_low < arl_high
+
+    def test_noreset_same_arl_for_same_threshold_different_runs(self) -> None:
+        """In noreset mode, same algorithm+provider+threshold should give same ARL.
+
+        This validates determinism and that the inf-trace is reused correctly.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(
+            name="Stable",
+            return_sequence=[0.0, 0.0, 5.0],
+        )
+        provider: LabeledData[float] = _make_provider(15)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner1: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="noreset",
+        )
+        runner2: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="noreset",
+        )
+
+        res1 = runner1.run()
+        res2 = runner2.run()
+
+        key1: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res1))
+        key2: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res2))
+        arl1: float = res1[key1][0][1]["arl"]
+        arl2: float = res2[key2][0][1]["arl"]
+
+        assert arl1 == arl2
+
+
+# ---------------------------------------------------------------------------
+# 5. max_runlength - forced resets
+# ---------------------------------------------------------------------------
+class TestARLBenchmarkRunnerMaxRunlength:
+    """Tests for ARL interaction with solver max_runlength (forced change points)."""
+
+    def test_forced_detections_produce_finite_arl(self) -> None:
+        """Forced detections via max_runlength give finite ARL with unreachable threshold."""
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Silent", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(18)
+        solver: OnlineCpdSolver = OnlineCpdSolver(max_runlength=5)
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [100.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        assert math.isfinite(arl_value)
+        assert arl_value > 0
+
+    def test_exact_arl_with_max_runlength(self) -> None:
+        """Verify exact ARL with max_runlength=5 on 18 observations.
+
+        max_runlength=5 forces detection when run_length > 5, i.e. at step 5
+        (0-based, run_length becomes 6).
+        After reset: next forced at step 11, then step 17.
+        Detections at steps 5, 11, 17.
+        Run lengths from 0: [5, 6, 6].  ARL = 17 / 3.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Silent", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(18)
+        solver: OnlineCpdSolver = OnlineCpdSolver(max_runlength=5)
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [100.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        expected_arl: float = 17.0 / 3.0
+        assert abs(arl_value - expected_arl) < 1e-10
+
+    def test_signal_before_forced_prevents_forced(self) -> None:
+        """Signal detections happening before max_runlength prevent forced detections.
+
+        Algorithm [0.0, 0.0, 5.0], threshold=3.0, max_runlength=10.
+        Signal every 3 steps (well before forced at 11).
+        After each signal, reset -> sequence restarts.
+
+        18 obs -> detections at steps 2, 5, 8, 11, 14, 17.
+        Run lengths: [2, 3, 3, 3, 3, 3].  ARL = 17 / 6.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Fast", return_sequence=[0.0, 0.0, 5.0])
+        provider: LabeledData[float] = _make_provider(18)
+        solver: OnlineCpdSolver = OnlineCpdSolver(max_runlength=10)
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        expected_arl: float = 17.0 / 6.0
+        assert abs(arl_value - expected_arl) < 1e-10
+
+    def test_max_runlength_noreset_inf_trace_still_forces(self) -> None:
+        """In noreset mode, max_runlength affects the inf-trace run.
+
+        Algorithm returns 0.0 always, max_runlength=4, threshold=100.0.
+        The inf-trace is computed with threshold=inf, but max_runlength
+        still forces detections every 5 steps (run_length > 4).
+
+        Those forced detections appear in the inf-trace and should be
+        detected via point-based policy as the detection function will
+        show NaN/reset artifacts. But actually since the sequence always
+        returns 0.0 which is <= any threshold, noreset mode with
+        threshold=100.0 should detect nothing from the detection function.
+        However the forced detections in the inf-trace should still
+        produce finite ARL through detected_change_points in the trace.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Silent", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(15)
+        solver_forced: OnlineCpdSolver = OnlineCpdSolver(max_runlength=4)
+        solver_no_forced: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner_forced: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [100.0])],
+            providers=[provider],
+            solver=solver_forced,
+            mode="reset",
+        )
+        runner_no_forced: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [100.0])],
+            providers=[provider],
+            solver=solver_no_forced,
+            mode="reset",
+        )
+
+        res_forced = runner_forced.run()
+        res_no_forced = runner_no_forced.run()
+
+        key_f: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res_forced))
+        key_nf: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res_no_forced))
+
+        arl_forced: float = res_forced[key_f][0][1]["arl"]
+        arl_no_forced: float = res_no_forced[key_nf][0][1]["arl"]
+
+        assert math.isfinite(arl_forced)
+        assert math.isinf(arl_no_forced)
+
+
+# ---------------------------------------------------------------------------
+# 6. Reset behavior - sequence restart verification
+# ---------------------------------------------------------------------------
+class TestARLBenchmarkRunnerResetBehavior:
+    """Tests verifying that algorithm reset after each detection affects ARL."""
+
+    def test_reset_restarts_return_sequence(self) -> None:
+        """After reset, return_sequence restarts producing periodic detections.
+
+        Algorithm [0.0, 5.0], threshold=3.0, reset mode, 8 observations.
+        Step 0: 0.0 (no). Step 1: 5.0 (yes, reset).
+        Step 2: 0.0 (no). Step 3: 5.0 (yes, reset). Etc.
+        Detections at steps 1, 3, 5, 7.
+        Run lengths: [1, 2, 2, 2].  ARL = 7 / 4 = 1.75.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Reset", return_sequence=[0.0, 5.0])
+        provider: LabeledData[float] = _make_provider(8)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        expected_arl: float = 7.0 / 4.0
+        assert abs(arl_value - expected_arl) < 1e-10
+
+    def test_reset_restarts_learning_period(self) -> None:
+        """Reset re-enters learning period, creating longer gaps between detections.
+
+        Algorithm return_sequence=[5.0], learning_period_size=2, threshold=3.0.
+        With reset: after each detection, algorithm resets and needs 2
+        observations for learning (returning 0.0), then next returns 5.0.
+
+        9 obs:
+        Step 0: learning (0.0). Step 1: learning (0.0).
+        Step 2: 5.0 (yes, reset).
+        Step 3: learning (0.0). Step 4: learning (0.0).
+        Step 5: 5.0 (yes, reset).
+        Step 6: learning (0.0). Step 7: learning (0.0).
+        Step 8: 5.0 (yes, reset).
+
+        Detections at steps 2, 5, 8.
+        Run lengths: [2, 3, 3].  ARL = 8 / 3.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(
+            name="Learn",
+            return_sequence=[5.0],
+            learning_period_size=2,
+        )
+        provider: LabeledData[float] = _make_provider(9)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [3.0])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        _, metrics = results[key][0]
+        arl_value: float = metrics["arl"]
+
+        expected_arl: float = 8.0 / 3.0
+        assert abs(arl_value - expected_arl) < 1e-10
+
+    def test_lower_threshold_produces_shorter_arl_reset(self) -> None:
+        """Lower threshold detects more often, resulting in shorter ARL in reset mode.
+
+        Algorithm [0.0, 1.0, 2.0, 3.0, 4.0, 5.0], 30 observations.
+        threshold=1.5: signal when value > 1.5, detections sooner after reset.
+        threshold=4.5: signal when value > 4.5, detections later after reset.
+        """
+        algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(
+            name="Gradual",
+            return_sequence=[0.0, 1.0, 2.0, 3.0, 4.0, 5.0],
+        )
+        provider: LabeledData[float] = _make_provider(30)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner(
+            algorithms=[(algorithm, [1.5, 4.5])],
+            providers=[provider],
+            solver=solver,
+            mode="reset",
+        )
+        results = runner.run()
+        key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results))
+        entries: list[tuple[float, dict[str, Any]]] = results[key]
+
+        arl_low: float = entries[0][1]["arl"]
+        arl_high: float = entries[1][1]["arl"]
+
+        assert math.isfinite(arl_low)
+        assert math.isfinite(arl_high)
+        assert arl_low < arl_high

From 09183f7a1020c5ffbaa1db4201ef5304c9719903 Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Tue, 14 Apr 2026 04:41:54 +0300
Subject: [PATCH 14/15] feat: add BenchmarkAnalyzer

---
 .../benchmark/core/benchmark_analyzer.py      |  46 +++++-
 .../benchmark/core/test_benchmark_analyzer.py | 140 ++++++++++++++++++
 2 files changed, 182 insertions(+), 4 deletions(-)
 create mode 100644 tests/unit/benchmark/core/test_benchmark_analyzer.py

diff --git a/pysatl_cpd/benchmark/core/benchmark_analyzer.py b/pysatl_cpd/benchmark/core/benchmark_analyzer.py
index fb7a511..4fd5ab3 100644
--- a/pysatl_cpd/benchmark/core/benchmark_analyzer.py
+++ b/pysatl_cpd/benchmark/core/benchmark_analyzer.py
@@ -1,20 +1,58 @@
+# -*- coding: ascii -*-
+
+"""
+Benchmark analyzer module.
+
+This module provides a convenient wrapper to apply multiple aggregate metrics
+to a single batch of benchmark execution results.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
 from typing import Any
 
 from pysatl_cpd.analysis.labeled_data import LabeledData
 from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
-from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmState
 from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
 
 
-class BenchmarkAnalyzer[TraceT: OnlineDetectionTrace[OnlineAlgorithmState], ProviderT: LabeledData[Any]]:
+class BenchmarkAnalyzer[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]]:
+    """
+    Evaluator for applying multiple metrics to a batch of benchmark runs.
+
+    This class encapsulates a dictionary of initialized metrics and provides
+    a single entry point to evaluate all of them on the given execution results.
+
+    Parameters
+    ----------
+    metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]]
+        A mapping of metric names to metric instances.
+    """
+
     def __init__(
         self,
         metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]],
     ) -> None:
-        return
+        self._metrics = metrics
 
     def analyze(
         self,
         runs: list[tuple[TraceT, ProviderT]],
     ) -> dict[str, Any]:
-        raise NotImplementedError("Method `analyze` is not implemented yet.")
+        """
+        Evaluate all registered metrics on the provided batch of runs.
+
+        Parameters
+        ----------
+        runs : list[tuple[TraceT, ProviderT]]
+            A batch of execution results, where each element is a pair of
+            (detection_trace, data_provider).
+
+        Returns
+        -------
+        dict[str, Any]
+            A mapping of metric names to their evaluated results.
+        """
+        return {metric_name: metric.evaluate(runs) for metric_name, metric in self._metrics.items()}
diff --git a/tests/unit/benchmark/core/test_benchmark_analyzer.py b/tests/unit/benchmark/core/test_benchmark_analyzer.py
new file mode 100644
index 0000000..175fc81
--- /dev/null
+++ b/tests/unit/benchmark/core/test_benchmark_analyzer.py
@@ -0,0 +1,140 @@
+# -*- coding: ascii -*-
+
+"""
+Tests for BenchmarkAnalyzer.
+
+Covers metric storage, evaluation routing, and edge cases with empty inputs.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from typing import Any
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.core.benchmark_analyzer import BenchmarkAnalyzer
+from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+from tests.mocks.analysis.labeled_data import MockLabeledData
+from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric
+from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric
+from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace
+
+
+class TestBenchmarkAnalyzerInit:
+    """Tests for BenchmarkAnalyzer.__init__."""
+
+    def test_init_stores_metrics(self) -> None:
+        """Analyzer should store the provided metrics dictionary."""
+        base: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[1.0])
+        metric: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base)
+        metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = {
+            "m1": metric,
+        }
+
+        analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics)
+        assert analyzer._metrics is metrics
+
+
+class TestBenchmarkAnalyzerAnalyze:
+    """Tests for BenchmarkAnalyzer.analyze."""
+
+    def test_analyze_evaluates_all_metrics(self) -> None:
+        """Analyzer should call evaluate() on every metric and return all results."""
+        base1: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[2.0, 3.0])
+        base2: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[10.0, 20.0])
+        m1: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base1)
+        m2: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base2)
+        metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = {
+            "sum_small": m1,
+            "sum_big": m2,
+        }
+
+        analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics)
+
+        trace1: MockOnlineDetectionTrace = MockOnlineDetectionTrace(detected_change_points=[])
+        trace2: MockOnlineDetectionTrace = MockOnlineDetectionTrace(detected_change_points=[])
+        data1: MockLabeledData = MockLabeledData(change_points=[], name="d1")
+        data2: MockLabeledData = MockLabeledData(change_points=[], name="d2")
+        runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[Any]]] = [
+            (trace1, data1),
+            (trace2, data2),
+        ]
+
+        results: dict[str, Any] = analyzer.analyze(runs)
+
+        assert "sum_small" in results
+        assert "sum_big" in results
+        assert results["sum_small"] == 2.0 + 3.0
+        assert results["sum_big"] == 10.0 + 20.0
+
+    def test_analyze_passes_runs_to_base_metric(self) -> None:
+        """Base metric inside aggregation should receive the exact runs."""
+        base: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[1.0])
+        metric: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base)
+        metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = {
+            "m": metric,
+        }
+
+        analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics)
+
+        trace: MockOnlineDetectionTrace = MockOnlineDetectionTrace(detected_change_points=[])
+        data: MockLabeledData = MockLabeledData(change_points=[], name="d")
+        runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[Any]]] = [(trace, data)]
+
+        analyzer.analyze(runs)
+
+        assert len(base.calls) == 1
+        assert base.calls[0][0] is trace
+        assert base.calls[0][1] is data
+
+    def test_analyze_with_empty_metrics(self) -> None:
+        """Analyzer should return empty dict when no metrics are registered."""
+        analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics={})
+
+        trace: MockOnlineDetectionTrace = MockOnlineDetectionTrace(detected_change_points=[])
+        data: MockLabeledData = MockLabeledData(change_points=[], name="d")
+        runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[Any]]] = [(trace, data)]
+
+        results: dict[str, Any] = analyzer.analyze(runs)
+        assert results == {}
+
+    def test_analyze_with_empty_runs(self) -> None:
+        """Analyzer should pass empty list to metrics and return their results."""
+        base: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[99.0])
+        metric: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base)
+        metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = {
+            "m": metric,
+        }
+
+        analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics)
+
+        results: dict[str, Any] = analyzer.analyze([])
+
+        assert results == {"m": 0.0}
+        assert len(metric.aggregate_calls) == 1
+        assert metric.aggregate_calls[0] == []
+
+    def test_analyze_with_multiple_runs_aggregates_correctly(self) -> None:
+        """Aggregation metric should receive all per-run results and sum them."""
+        base: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[1.0, 2.0, 3.0])
+        metric: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base)
+        metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = {
+            "total": metric,
+        }
+
+        analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics)
+
+        runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[Any]]] = [
+            (MockOnlineDetectionTrace([]), MockLabeledData([], name="a")),
+            (MockOnlineDetectionTrace([]), MockLabeledData([], name="b")),
+            (MockOnlineDetectionTrace([]), MockLabeledData([], name="c")),
+        ]
+
+        results: dict[str, Any] = analyzer.analyze(runs)
+
+        assert results == {"total": 6.0}
+        assert len(metric.aggregate_calls) == 1
+        assert metric.aggregate_calls[0] == [1.0, 2.0, 3.0]
+        assert len(base.calls) == 3

From a971d7f75eb8c9001f8c5c5300220c095ef3a037 Mon Sep 17 00:00:00 2001
From: iraedeus <dtotjmyanin@mail.ru>
Date: Tue, 14 Apr 2026 04:52:19 +0300
Subject: [PATCH 15/15] test: BenchmarkExecutor

---
 .../benchmark/core/test_benchmark_executor.py | 515 ++++++++++++++++++
 1 file changed, 515 insertions(+)
 create mode 100644 tests/unit/benchmark/core/test_benchmark_executor.py

diff --git a/tests/unit/benchmark/core/test_benchmark_executor.py b/tests/unit/benchmark/core/test_benchmark_executor.py
new file mode 100644
index 0000000..9261b1e
--- /dev/null
+++ b/tests/unit/benchmark/core/test_benchmark_executor.py
@@ -0,0 +1,515 @@
+# -*- coding: ascii -*-
+"""
+Tests for BenchmarkExecutor and BenchmarkRecord.
+
+Covers result count for various combinations, trace content verification,
+record metadata, and disk caching behavior.
+"""
+
+__author__ = "Danil Totmyanin"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+import csv
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+
+from pysatl_cpd.analysis.labeled_data import LabeledData
+from pysatl_cpd.benchmark.core.benchmark_executor import (
+    BenchmarkExecutor,
+    BenchmarkRecord,
+)
+from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+from tests.mocks.algorithms.online.simple import MockOnlineAlgorithm
+
+
+def _make_provider(
+    length: int,
+    name: str = "test_data",
+) -> LabeledData[float]:
+    """Create a LabeledData provider with constant observations.
+
+    Parameters
+    ----------
+    length : int
+        Number of observations.
+    name : str
+        Provider identifier.
+
+    Returns
+    -------
+    LabeledData[float]
+        Provider with ``length`` observations of 1.0 and no change points.
+    """
+    return LabeledData(raw_data=[1.0] * length, change_points=[], name=name)
+
+
+# ---------------------------------------------------------------------------
+# 1. BenchmarkRecord
+# ---------------------------------------------------------------------------
+class TestBenchmarkRecord:
+    """Tests for BenchmarkRecord dataclass."""
+
+    def test_key_returns_correct_tuple(self) -> None:
+        """Key property should return (algorithm, config_hash, data, threshold)."""
+        record: BenchmarkRecord = BenchmarkRecord(
+            algorithm="TestAlgo",
+            configuration_hash=42,
+            data="dataset",
+            threshold=2.5,
+            trace_path="/tmp/trace.pkl",
+        )
+        expected: tuple[str, int, str, float] = ("TestAlgo", 42, "dataset", 2.5)
+        assert record.key == expected
+
+    def test_default_trace_path_is_none(self) -> None:
+        """trace_path should default to None when not provided."""
+        record: BenchmarkRecord = BenchmarkRecord(
+            algorithm="A",
+            configuration_hash=0,
+            data="d",
+            threshold=1.0,
+        )
+        assert record.trace_path is None
+
+
+# ---------------------------------------------------------------------------
+# 2. Basic execution - result counts
+# ---------------------------------------------------------------------------
+class TestBenchmarkExecutorBasic:
+    """Tests for correct number of results across combinations."""
+
+    def test_single_combination(self) -> None:
+        """1 algorithm x 1 threshold x 1 provider -> 1 result."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0])],
+            providers=[provider],
+            solver=solver,
+        )
+        results: list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]] = executor.execute()
+        assert len(results) == 1
+
+    def test_multiple_thresholds(self) -> None:
+        """1 algorithm x 3 thresholds x 1 provider -> 3 results."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0, 2.0, 3.0])],
+            providers=[provider],
+            solver=solver,
+        )
+        results = executor.execute()
+        assert len(results) == 3
+
+    def test_multiple_providers(self) -> None:
+        """1 algorithm x 1 threshold x 3 providers -> 3 results."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        providers: list[LabeledData[float]] = [
+            _make_provider(5, name="p1"),
+            _make_provider(5, name="p2"),
+            _make_provider(5, name="p3"),
+        ]
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0])],
+            providers=providers,
+            solver=solver,
+        )
+        results = executor.execute()
+        assert len(results) == 3
+
+    def test_multiple_algorithms(self) -> None:
+        """2 algorithms x 1 threshold each x 1 provider -> 2 results."""
+        algo1: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A1", return_sequence=[0.0])
+        algo2: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A2", return_sequence=[1.0])
+        provider: LabeledData[float] = _make_provider(5)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo1, [1.0]), (algo2, [2.0])],
+            providers=[provider],
+            solver=solver,
+        )
+        results = executor.execute()
+        assert len(results) == 2
+
+    def test_cartesian_product(self) -> None:
+        """2 algorithms x 2 thresholds x 2 providers -> 8 results."""
+        algo1: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A1", return_sequence=[0.0])
+        algo2: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A2", return_sequence=[0.0])
+        providers: list[LabeledData[float]] = [
+            _make_provider(5, name="p1"),
+            _make_provider(5, name="p2"),
+        ]
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo1, [1.0, 2.0]), (algo2, [3.0, 4.0])],
+            providers=providers,
+            solver=solver,
+        )
+        results = executor.execute()
+        assert len(results) == 8
+
+    def test_empty_algorithms(self) -> None:
+        """No algorithms -> empty results."""
+        provider: LabeledData[float] = _make_provider(5)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[],
+            providers=[provider],
+            solver=solver,
+        )
+        results = executor.execute()
+        assert results == []
+
+    def test_empty_providers(self) -> None:
+        """No providers -> empty results."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0])],
+            providers=[],
+            solver=solver,
+        )
+        results = executor.execute()
+        assert results == []
+
+    def test_empty_thresholds(self) -> None:
+        """Algorithm with no thresholds -> no results for that algorithm."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [])],
+            providers=[provider],
+            solver=solver,
+        )
+        results = executor.execute()
+        assert results == []
+
+
+# ---------------------------------------------------------------------------
+# 3. Trace content
+# ---------------------------------------------------------------------------
+class TestBenchmarkExecutorTraceContent:
+    """Tests for detection trace correctness."""
+
+    def test_detections_at_correct_steps(self) -> None:
+        """Verify detected change points match expected steps.
+
+        Algorithm [0.0, 0.0, 5.0], threshold=3.0, 6 observations.
+        Step 0: 0.0 (no), Step 1: 0.0 (no), Step 2: 5.0 (yes, reset).
+        Step 3: 0.0 (no), Step 4: 0.0 (no), Step 5: 5.0 (yes, reset).
+        Detections at steps 2 and 5 (0-based).
+        """
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0, 0.0, 5.0])
+        provider: LabeledData[float] = _make_provider(6)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [3.0])],
+            providers=[provider],
+            solver=solver,
+        )
+        results = executor.execute()
+        trace: OnlineDetectionTrace[Any] = results[0][1]
+
+        assert list(trace.detected_change_points) == [2, 5]
+
+    def test_no_detections_with_high_threshold(self) -> None:
+        """No detections when threshold is unreachable."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[5.0])
+        provider: LabeledData[float] = _make_provider(10)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [100.0])],
+            providers=[provider],
+            solver=solver,
+        )
+        results = executor.execute()
+        trace: OnlineDetectionTrace[Any] = results[0][1]
+
+        assert list(trace.detected_change_points) == []
+
+    def test_trace_algorithm_name(self) -> None:
+        """Trace should carry the str(algorithm) as algorithm_name."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="NamedAlgo", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0])],
+            providers=[provider],
+            solver=solver,
+        )
+        results = executor.execute()
+        trace: OnlineDetectionTrace[Any] = results[0][1]
+
+        assert trace.algorithm_name == str(algo)
+
+    def test_detection_function_values(self) -> None:
+        """Detection function array should contain correct statistic values.
+
+        Algorithm [1.0, 2.0, 3.0], threshold=inf (no detections/resets).
+        6 observations -> values cycle: [1, 2, 3, 1, 2, 3].
+        """
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[1.0, 2.0, 3.0])
+        provider: LabeledData[float] = _make_provider(6)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [float("inf")])],
+            providers=[provider],
+            solver=solver,
+        )
+        results = executor.execute()
+        trace: OnlineDetectionTrace[Any] = results[0][1]
+
+        expected: list[float] = [1.0, 2.0, 3.0, 1.0, 2.0, 3.0]
+        np.testing.assert_array_almost_equal(trace.detection_function, expected)
+
+
+# ---------------------------------------------------------------------------
+# 4. Record content
+# ---------------------------------------------------------------------------
+class TestBenchmarkExecutorRecordContent:
+    """Tests for BenchmarkRecord fields in executor output."""
+
+    def test_record_fields_match_input(self) -> None:
+        """Record fields should match the algorithm, provider, and threshold."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="RecAlgo", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5, name="my_data")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [7.5])],
+            providers=[provider],
+            solver=solver,
+        )
+        results = executor.execute()
+        record: BenchmarkRecord = results[0][0]
+
+        assert record.algorithm == str(algo)
+        assert record.configuration_hash == hash(algo.configuration)
+        assert record.data == "my_data"
+        assert record.threshold == 7.5
+
+    def test_record_trace_path_none_without_dump_dir(self) -> None:
+        """trace_path should be None when dump_dir is not set."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5)
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=None,
+        )
+        results = executor.execute()
+        record: BenchmarkRecord = results[0][0]
+
+        assert record.trace_path is None
+
+    def test_record_trace_path_set_with_dump_dir(self, tmp_path: Path) -> None:
+        """trace_path should point to an existing pickle file when dump_dir is set."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5, name="data")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=tmp_path,
+        )
+        results = executor.execute()
+        record: BenchmarkRecord = results[0][0]
+
+        assert record.trace_path is not None
+        assert Path(record.trace_path).exists()
+        assert record.trace_path.endswith(".pkl")
+
+
+# ---------------------------------------------------------------------------
+# 5. Caching
+# ---------------------------------------------------------------------------
+class TestBenchmarkExecutorCaching:
+    """Tests for disk caching via CSV registry and pickle files."""
+
+    def test_creates_registry_and_pickle_files(self, tmp_path: Path) -> None:
+        """Execute with dump_dir should create registry CSV and pickle file(s)."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5, name="data")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=tmp_path,
+        )
+        executor.execute()
+
+        registry_path: Path = tmp_path / "benchmark_registry.csv"
+        assert registry_path.exists()
+
+        pkl_files: list[Path] = list(tmp_path.glob("*.pkl"))
+        assert len(pkl_files) == 1
+
+    def test_cache_prevents_reprocessing(self, tmp_path: Path) -> None:
+        """Second execute should load from cache without calling solver.
+
+        MockOnlineAlgorithm._call_history accumulates across reset() calls
+        and is never cleared. If caching works, second execute adds no
+        new entries.
+        """
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5, name="data")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        # First run - solver executes, algorithm processes observations
+        executor1: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=tmp_path,
+        )
+        executor1.execute()
+        history_after_first: int = len(algo.get_call_history())
+        assert history_after_first == 5
+
+        # Second run - should load from cache
+        executor2: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=tmp_path,
+        )
+        executor2.execute()
+        history_after_second: int = len(algo.get_call_history())
+
+        assert history_after_second == history_after_first
+
+    def test_cached_trace_matches_original(self, tmp_path: Path) -> None:
+        """Trace loaded from cache should have identical detected_change_points.
+
+        Algorithm [0.0, 0.0, 5.0], threshold=3.0, 6 observations.
+        Detections at steps 2 and 5.
+        """
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0, 0.0, 5.0])
+        provider: LabeledData[float] = _make_provider(6, name="data")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor1: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [3.0])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=tmp_path,
+        )
+        results1 = executor1.execute()
+
+        executor2: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [3.0])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=tmp_path,
+        )
+        results2 = executor2.execute()
+
+        trace1: OnlineDetectionTrace[Any] = results1[0][1]
+        trace2: OnlineDetectionTrace[Any] = results2[0][1]
+
+        assert list(trace1.detected_change_points) == list(trace2.detected_change_points)
+        assert trace1.algorithm_name == trace2.algorithm_name
+        np.testing.assert_array_almost_equal(trace1.detection_function, trace2.detection_function)
+
+    def test_registry_csv_has_correct_structure(self, tmp_path: Path) -> None:
+        """Registry CSV should have expected columns and matching row data."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="CsvAlgo", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5, name="csv_data")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [2.5])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=tmp_path,
+        )
+        executor.execute()
+
+        registry_path: Path = tmp_path / "benchmark_registry.csv"
+        with open(registry_path, encoding="utf-8") as f:
+            reader = csv.DictReader(f)
+            rows: list[dict[str, str]] = list(reader)
+
+        assert len(rows) == 1
+        row: dict[str, str] = rows[0]
+
+        expected_columns: set[str] = {
+            "algorithm",
+            "configuration_hash",
+            "data",
+            "threshold",
+            "trace_path",
+        }
+        assert set(row.keys()) == expected_columns
+        assert row["algorithm"] == str(algo)
+        assert row["data"] == "csv_data"
+        assert float(row["threshold"]) == 2.5
+        assert row["trace_path"] != ""
+
+    def test_inf_threshold_in_pickle_filename(self, tmp_path: Path) -> None:
+        """Pickle filename for infinite threshold should contain 'inf'."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5, name="data")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [float("inf")])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=tmp_path,
+        )
+        executor.execute()
+
+        pkl_files: list[Path] = list(tmp_path.glob("*.pkl"))
+        assert len(pkl_files) == 1
+        assert "inf" in pkl_files[0].name
+
+    def test_multiple_thresholds_create_separate_pickle_files(self, tmp_path: Path) -> None:
+        """Each threshold should produce its own pickle file."""
+        algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0])
+        provider: LabeledData[float] = _make_provider(5, name="data")
+        solver: OnlineCpdSolver = OnlineCpdSolver()
+
+        executor: BenchmarkExecutor[float] = BenchmarkExecutor(
+            algorithms=[(algo, [1.0, 2.0, 3.0])],
+            providers=[provider],
+            solver=solver,
+            dump_dir=tmp_path,
+        )
+        executor.execute()
+
+        pkl_files: list[Path] = list(tmp_path.glob("*.pkl"))
+        assert len(pkl_files) == 3
+
+        registry_path: Path = tmp_path / "benchmark_registry.csv"
+        with open(registry_path, encoding="utf-8") as f:
+            rows: list[dict[str, str]] = list(csv.DictReader(f))
+        assert len(rows) == 3