From 99ac199847b44b11ffc737e2c84655ce9d5c15d0 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Tue, 7 Apr 2026 05:04:35 +0300 Subject: [PATCH 01/15] feat: add benchmark executor --- pysatl_cpd/benchmark/benchmark_executor.py | 116 +++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 pysatl_cpd/benchmark/benchmark_executor.py diff --git a/pysatl_cpd/benchmark/benchmark_executor.py b/pysatl_cpd/benchmark/benchmark_executor.py new file mode 100644 index 0000000..924bdc0 --- /dev/null +++ b/pysatl_cpd/benchmark/benchmark_executor.py @@ -0,0 +1,116 @@ +import csv +import math +import pickle +from collections.abc import Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from pysatl_cpd.core.data_providers.idata_provider import DataProvider +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace + + +@dataclass +class BenchmarkRecord: + algorithm: str + configuration_hash: str + data: str + threshold: float + trace_path: str | None = None + + @property + def key(self) -> tuple[str, str, str, float]: + return (self.algorithm, self.configuration_hash, self.data, self.threshold) + + +class BenchmarkExecutor[DataT]: + def __init__( + self, + algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + providers: list[DataProvider[DataT]], + solver: OnlineCpdSolver, + dump_dir: str | Path | None = None, + ) -> None: + self.__algorithms = algorithms + self.__providers = providers + self.__solver = solver + self.__dump_dir = Path(dump_dir) if dump_dir is not None else None + + def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: + results: list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]] = [] + registry: dict[tuple[str, str, str, float], BenchmarkRecord] = {} + registry_path: Path | None = None + + if self.__dump_dir is not None: + self.__dump_dir.mkdir(parents=True, exist_ok=True) + registry_path = self.__dump_dir / "benchmark_registry.csv" + + if registry_path.exists(): + with open(registry_path, encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + record = BenchmarkRecord( + algorithm=row["algorithm"], + configuration_hash=row["configuration_hash"], + data=row["data"], + threshold=float(row["threshold"]), + trace_path=row["trace_path"] if row["trace_path"] else None, + ) + registry[record.key] = record + + for algorithm, thresholds in self.__algorithms: + algo_name = str(algorithm) + config_hash = str(hash(algo_name)) + + for provider in self.__providers: + data_name = provider.name + + for threshold in thresholds: + key = (algo_name, config_hash, data_name, float(threshold)) + + if key in registry and registry[key].trace_path: + trace_file = Path(registry[key].trace_path) # type: ignore + if trace_file.exists(): + with open(trace_file, "rb") as f: + trace = pickle.load(f) + results.append((registry[key], trace)) + continue + + steps = list(self.__solver.run(algorithm, provider, threshold)) + trace = OnlineDetectionTrace.from_run(steps) + + record = BenchmarkRecord(algo_name, config_hash, data_name, threshold, None) + + if self.__dump_dir is not None: + safe_data_name = "".join(c if c.isalnum() else "_" for c in data_name) + thr_str = "inf" if math.isinf(record.threshold) else f"{threshold:.4f}".replace(".", "_") + filename = f"{algo_name}_{config_hash}_{safe_data_name}_{thr_str}.pkl" + + trace_path = self.__dump_dir / filename + with open(trace_path, "wb") as f: + pickle.dump(trace, f) + + record.trace_path = str(trace_path) + registry[key] = record + + results.append((record, trace)) + + if registry_path is not None: + fieldnames = ["algorithm", "configuration_hash", "data", "threshold", "trace_path"] + with open(registry_path, mode="w", encoding="utf-8", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + for rec in registry.values(): + writer.writerow( + { + "algorithm": rec.algorithm, + "configuration_hash": rec.configuration_hash, + "data": rec.data, + "threshold": rec.threshold, + "trace_path": rec.trace_path or "", + } + ) + + return results From 74c086fb7f32e10834da05dfac56265e09395a4f Mon Sep 17 00:00:00 2001 From: iraedeus Date: Tue, 7 Apr 2026 05:13:01 +0300 Subject: [PATCH 02/15] docs(benchmark): benchmark executor --- pysatl_cpd/benchmark/benchmark_executor.py | 126 +++++++++++++++++---- 1 file changed, 104 insertions(+), 22 deletions(-) diff --git a/pysatl_cpd/benchmark/benchmark_executor.py b/pysatl_cpd/benchmark/benchmark_executor.py index 924bdc0..a951a01 100644 --- a/pysatl_cpd/benchmark/benchmark_executor.py +++ b/pysatl_cpd/benchmark/benchmark_executor.py @@ -1,4 +1,19 @@ +# -*- coding: ascii -*- +""" +Benchmark execution module for change-point detection algorithms. + +This module provides the core components for running and caching performance +evaluations of online CPD algorithms across multiple datasets and threshold +configurations. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + import csv +import hashlib +import itertools import math import pickle from collections.abc import Sequence @@ -14,6 +29,27 @@ @dataclass class BenchmarkRecord: + """ + Metadata container for a single benchmark execution. + + This record uniquely identifies a benchmark run and stores the path + to the cached trace file if disk dumping is enabled. + + Parameters + ---------- + algorithm : str + The string identifier or name of the online algorithm. + configuration_hash : str + A hash string representing the algorithm's configuration. + data : str + The identifier or name of the dataset. + threshold : float + The detection threshold used for this specific run. + trace_path : str | None, default=None + Absolute or relative path to the serialized detection trace file, + if caching is enabled. + """ + algorithm: str configuration_hash: str data: str @@ -22,10 +58,41 @@ class BenchmarkRecord: @property def key(self) -> tuple[str, str, str, float]: + """ + Get the unique composite key for this benchmark run. + + Returns + ------- + tuple[str, str, str, float] + A tuple containing (algorithm, configuration_hash, data, threshold) + used for identifying the record in the registry. + """ return (self.algorithm, self.configuration_hash, self.data, self.threshold) class BenchmarkExecutor[DataT]: + """ + Orchestrator for executing change-point detection benchmarks. + + Evaluates a set of algorithms across multiple data providers and thresholds + using a provided online solver. Supports a caching mechanism via disk dumping + to prevent redundant calculations on subsequent runs. + + Parameters + ---------- + algorithms : list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] + A list of tuples, where each tuple contains an instantiated online + algorithm and a sequence of thresholds to test it against. + providers : list[DataProvider[DataT]] + A list of data providers to be fed into the algorithms. + solver : OnlineCpdSolver + The solver instance responsible for iterating over the data providers + and running the algorithmic logic. + dump_dir : str | Path | None, optional + Directory path where the benchmark registry (CSV) and serialized traces + (Pickle files) should be stored. If None, caching is disabled. + """ + def __init__( self, algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], @@ -39,6 +106,21 @@ def __init__( self.__dump_dir = Path(dump_dir) if dump_dir is not None else None def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: + """ + Execute the benchmark over all combinations of algorithms, data, and thresholds. + + Iterates through the combinations of algorithms, datasets, and thresholds. + If disk caching (`dump_dir`) is enabled, it attempts to load previously + calculated traces from the registry to bypass solver execution. If a trace + is missing, it runs the solver, caches the resulting trace to disk, and + updates the CSV registry. + + Returns + ------- + list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]] + A list of execution results, where each element is a pair containing + the benchmark metadata record and the corresponding detection trace. + """ results: list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]] = [] registry: dict[tuple[str, str, str, float], BenchmarkRecord] = {} registry_path: Path | None = None @@ -60,42 +142,42 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: ) registry[record.key] = record - for algorithm, thresholds in self.__algorithms: + for (algorithm, thresholds), provider in itertools.product(self.__algorithms, self.__providers): algo_name = str(algorithm) - config_hash = str(hash(algo_name)) - - for provider in self.__providers: - data_name = provider.name + config_hash = str(hashlib.md5(algo_name.encode("utf-8")).hexdigest()[:8]) + data_name = provider.name - for threshold in thresholds: - key = (algo_name, config_hash, data_name, float(threshold)) + for threshold in thresholds: + key = (algo_name, config_hash, data_name, float(threshold)) - if key in registry and registry[key].trace_path: - trace_file = Path(registry[key].trace_path) # type: ignore + if key in registry: + cached_path = registry[key].trace_path + if cached_path is not None: + trace_file = Path(cached_path) if trace_file.exists(): with open(trace_file, "rb") as f: trace = pickle.load(f) results.append((registry[key], trace)) continue - steps = list(self.__solver.run(algorithm, provider, threshold)) - trace = OnlineDetectionTrace.from_run(steps) + steps = list(self.__solver.run(algorithm, provider, threshold)) + trace = OnlineDetectionTrace.from_run(steps) - record = BenchmarkRecord(algo_name, config_hash, data_name, threshold, None) + record = BenchmarkRecord(algo_name, config_hash, data_name, threshold, None) - if self.__dump_dir is not None: - safe_data_name = "".join(c if c.isalnum() else "_" for c in data_name) - thr_str = "inf" if math.isinf(record.threshold) else f"{threshold:.4f}".replace(".", "_") - filename = f"{algo_name}_{config_hash}_{safe_data_name}_{thr_str}.pkl" + if self.__dump_dir is not None: + safe_data_name = "".join(c if c.isalnum() else "_" for c in data_name) + thr_str = "inf" if math.isinf(record.threshold) else f"{threshold:.4f}".replace(".", "_") + filename = f"{algo_name}_{config_hash}_{safe_data_name}_{thr_str}.pkl" - trace_path = self.__dump_dir / filename - with open(trace_path, "wb") as f: - pickle.dump(trace, f) + trace_path = self.__dump_dir / filename + with open(trace_path, "wb") as f: + pickle.dump(trace, f) - record.trace_path = str(trace_path) - registry[key] = record + record.trace_path = str(trace_path) + registry[key] = record - results.append((record, trace)) + results.append((record, trace)) if registry_path is not None: fieldnames = ["algorithm", "configuration_hash", "data", "threshold", "trace_path"] From 553bbfe65f62a11b0cd7e13299c658663d193d95 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Sat, 11 Apr 2026 02:52:42 +0300 Subject: [PATCH 03/15] refactor: BenchmarkRecord and configuration_hash calculation --- pysatl_cpd/benchmark/benchmark_executor.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pysatl_cpd/benchmark/benchmark_executor.py b/pysatl_cpd/benchmark/benchmark_executor.py index a951a01..dec6416 100644 --- a/pysatl_cpd/benchmark/benchmark_executor.py +++ b/pysatl_cpd/benchmark/benchmark_executor.py @@ -12,7 +12,6 @@ __license__ = "SPDX-License-Identifier: MIT" import csv -import hashlib import itertools import math import pickle @@ -51,19 +50,19 @@ class BenchmarkRecord: """ algorithm: str - configuration_hash: str + configuration_hash: int data: str threshold: float trace_path: str | None = None @property - def key(self) -> tuple[str, str, str, float]: + def key(self) -> tuple[str, int, str, float]: """ Get the unique composite key for this benchmark run. Returns ------- - tuple[str, str, str, float] + tuple[str, int, str, float] A tuple containing (algorithm, configuration_hash, data, threshold) used for identifying the record in the registry. """ @@ -122,7 +121,7 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: the benchmark metadata record and the corresponding detection trace. """ results: list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]] = [] - registry: dict[tuple[str, str, str, float], BenchmarkRecord] = {} + registry: dict[tuple[str, int, str, float], BenchmarkRecord] = {} registry_path: Path | None = None if self.__dump_dir is not None: @@ -135,7 +134,7 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: for row in reader: record = BenchmarkRecord( algorithm=row["algorithm"], - configuration_hash=row["configuration_hash"], + configuration_hash=int(row["configuration_hash"]), data=row["data"], threshold=float(row["threshold"]), trace_path=row["trace_path"] if row["trace_path"] else None, @@ -144,7 +143,7 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: for (algorithm, thresholds), provider in itertools.product(self.__algorithms, self.__providers): algo_name = str(algorithm) - config_hash = str(hashlib.md5(algo_name.encode("utf-8")).hexdigest()[:8]) + config_hash = hash(algorithm.configuration) data_name = provider.name for threshold in thresholds: @@ -161,7 +160,7 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: continue steps = list(self.__solver.run(algorithm, provider, threshold)) - trace = OnlineDetectionTrace.from_run(steps) + trace = OnlineDetectionTrace.from_run(steps, algo_name, config_hash) record = BenchmarkRecord(algo_name, config_hash, data_name, threshold, None) From 58abc68642531c5349a414b58b6852cf0adc6fea Mon Sep 17 00:00:00 2001 From: iraedeus Date: Sun, 12 Apr 2026 18:12:36 +0300 Subject: [PATCH 04/15] refactor: move benchmark_executor to bechmark/core module --- pysatl_cpd/benchmark/{ => core}/benchmark_executor.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pysatl_cpd/benchmark/{ => core}/benchmark_executor.py (100%) diff --git a/pysatl_cpd/benchmark/benchmark_executor.py b/pysatl_cpd/benchmark/core/benchmark_executor.py similarity index 100% rename from pysatl_cpd/benchmark/benchmark_executor.py rename to pysatl_cpd/benchmark/core/benchmark_executor.py From dde042b35f63503318251600cdaa0e5d7859fdd1 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Mon, 13 Apr 2026 11:17:21 +0300 Subject: [PATCH 05/15] feat: add interfaces and signatures for benchmark module --- pysatl_cpd/benchmark/arl_benchmark_runner.py | 29 ++++++++++++ .../benchmark/core/benchmark_analyzer.py | 20 +++++++++ .../noreset/noreset_benchmark_runner.py | 39 ++++++++++++++++ .../noreset/noreset_detection_trace.py | 13 ++++++ .../benchmark/noreset/threshold_policy.py | 45 +++++++++++++++++++ .../benchmark/online_benchmark_runner.py | 36 +++++++++++++++ .../benchmark/reset_benchmark_runner.py | 31 +++++++++++++ 7 files changed, 213 insertions(+) create mode 100644 pysatl_cpd/benchmark/arl_benchmark_runner.py create mode 100644 pysatl_cpd/benchmark/core/benchmark_analyzer.py create mode 100644 pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py create mode 100644 pysatl_cpd/benchmark/noreset/noreset_detection_trace.py create mode 100644 pysatl_cpd/benchmark/noreset/threshold_policy.py create mode 100644 pysatl_cpd/benchmark/online_benchmark_runner.py create mode 100644 pysatl_cpd/benchmark/reset_benchmark_runner.py diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py new file mode 100644 index 0000000..8646aa4 --- /dev/null +++ b/pysatl_cpd/benchmark/arl_benchmark_runner.py @@ -0,0 +1,29 @@ +from pathlib import Path +from typing import Any + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace + + +class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]]( + OnlineBenchmarkRunner[TraceT, ProviderT] +): + def __init__( + self, + algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]], + providers: list[ProviderT], + solver: OnlineCpdSolver, + dump_dir: Path | None = None, + ) -> None: + return + + def _collect_runs( + self, + algorithm: OnlineAlgorithm[Any, Any, Any], + threshold: float, + providers: list[ProviderT], + ) -> list[tuple[TraceT, ProviderT]]: + raise NotImplementedError("Method `_collect_runs` is not implemented yet.") diff --git a/pysatl_cpd/benchmark/core/benchmark_analyzer.py b/pysatl_cpd/benchmark/core/benchmark_analyzer.py new file mode 100644 index 0000000..fb7a511 --- /dev/null +++ b/pysatl_cpd/benchmark/core/benchmark_analyzer.py @@ -0,0 +1,20 @@ +from typing import Any + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmState +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace + + +class BenchmarkAnalyzer[TraceT: OnlineDetectionTrace[OnlineAlgorithmState], ProviderT: LabeledData[Any]]: + def __init__( + self, + metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], + ) -> None: + return + + def analyze( + self, + runs: list[tuple[TraceT, ProviderT]], + ) -> dict[str, Any]: + raise NotImplementedError("Method `analyze` is not implemented yet.") diff --git a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py new file mode 100644 index 0000000..f1af0d0 --- /dev/null +++ b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py @@ -0,0 +1,39 @@ +from pathlib import Path +from typing import Any + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric +from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace +from pysatl_cpd.benchmark.noreset.threshold_policy import ThresholdPolicy +from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace + + +class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[NoResetDetectionTrace[Any], ProviderT]): + def __init__( + self, + algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]], + providers: list[ProviderT], + metrics: dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]], + solver: OnlineCpdSolver, + policy: ThresholdPolicy, + dump_dir: Path | None = None, + ) -> None: + return + + def _collect_runs( + self, + algorithm: OnlineAlgorithm[Any, Any, Any], + threshold: float, + providers: list[ProviderT], + ) -> list[tuple[NoResetDetectionTrace[Any], ProviderT]]: + raise NotImplementedError("Method '_collect_runs' is not implemented yet.") + + def _get_inf_trace( + self, + algorithm: OnlineAlgorithm[Any, Any, Any], + provider: ProviderT, + ) -> OnlineDetectionTrace[Any]: + raise NotImplementedError("Method '_get_inf_trace' is not implemented yet.") diff --git a/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py b/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py new file mode 100644 index 0000000..3e34f98 --- /dev/null +++ b/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py @@ -0,0 +1,13 @@ +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmState +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace + + +class NoResetDetectionTrace[StateT: OnlineAlgorithmState](OnlineDetectionTrace[StateT]): + @classmethod + def from_inf_trace( + cls, + source_trace: OnlineDetectionTrace[StateT], + detected_change_points: list[int], + threshold: float, + ) -> "NoResetDetectionTrace[StateT]": + raise NotImplementedError("Method 'from_inf_trace' is not implemented yet.") diff --git a/pysatl_cpd/benchmark/noreset/threshold_policy.py b/pysatl_cpd/benchmark/noreset/threshold_policy.py new file mode 100644 index 0000000..5c83338 --- /dev/null +++ b/pysatl_cpd/benchmark/noreset/threshold_policy.py @@ -0,0 +1,45 @@ +from collections.abc import Sequence +from typing import Protocol, runtime_checkable + +from pysatl_cpd.core.typedefs import UnivariateNumericArray + + +@runtime_checkable +class ThresholdPolicy(Protocol): + def apply( + self, + detection_function: UnivariateNumericArray, + threshold: float, + change_points: Sequence[int], # true, 1-based + ) -> list[int]: ... # 1-based signal indices + + +class PointBasedPolicy: + def __init__(self, strict: bool = True) -> None: + return + + def apply( + self, + detection_function: UnivariateNumericArray, + threshold: float, + change_points: Sequence[int], # true, 1-based + ) -> list[int]: + raise NotImplementedError("Method `apply` is not implemented yet.") + + +class EventBasedPolicy: + def __init__( + self, + max_delay: int, + strict_edge: bool = True, + strict_point: bool = True, + ) -> None: + return + + def apply( + self, + detection_function: UnivariateNumericArray, + threshold: float, + change_points: Sequence[int], # true, 1-based + ) -> list[int]: + raise NotImplementedError("Method `apply` is not implemented yet.") diff --git a/pysatl_cpd/benchmark/online_benchmark_runner.py b/pysatl_cpd/benchmark/online_benchmark_runner.py new file mode 100644 index 0000000..03391df --- /dev/null +++ b/pysatl_cpd/benchmark/online_benchmark_runner.py @@ -0,0 +1,36 @@ +# online_runner.py +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm, OnlineAlgorithmConfiguration +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace + + +class OnlineBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](ABC): + def __init__( + self, + algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]], + providers: list[ProviderT], + metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], + solver: OnlineCpdSolver, + dump_dir: Path | None = None, + ) -> None: + return + + @abstractmethod + def _collect_runs( + self, + algorithm: OnlineAlgorithm[Any, Any, Any], + threshold: float, + providers: list[ProviderT], + ) -> list[tuple[TraceT, ProviderT]]: + raise NotImplementedError("Method `_collect_runs` is not implemented yet.") + + def run( + self, + ) -> dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]]: + raise NotImplementedError("Method `run` is not implemented yet.") diff --git a/pysatl_cpd/benchmark/reset_benchmark_runner.py b/pysatl_cpd/benchmark/reset_benchmark_runner.py new file mode 100644 index 0000000..960044b --- /dev/null +++ b/pysatl_cpd/benchmark/reset_benchmark_runner.py @@ -0,0 +1,31 @@ +from pathlib import Path +from typing import Any + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric +from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace + + +class ResetBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]]( + OnlineBenchmarkRunner[TraceT, ProviderT] +): + def __init__( + self, + algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]], + providers: list[ProviderT], + metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], + solver: OnlineCpdSolver, + dump_dir: Path | None = None, + ) -> None: + return + + def _collect_runs( + self, + algorithm: OnlineAlgorithm[Any, Any, Any], + threshold: float, + providers: list[ProviderT], + ) -> list[tuple[TraceT, ProviderT]]: + raise NotImplementedError("Method `_collect_runs` is not implemented yet.") From e982f55eb6ff3cc709972ee0f63d06237cd5e144 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Mon, 13 Apr 2026 14:17:42 +0300 Subject: [PATCH 06/15] fix: bug in benchmark executor with saving registry --- .../benchmark/core/benchmark_executor.py | 30 +++++++++---------- .../classification/classification_report.py | 2 -- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/pysatl_cpd/benchmark/core/benchmark_executor.py b/pysatl_cpd/benchmark/core/benchmark_executor.py index dec6416..15b6ddb 100644 --- a/pysatl_cpd/benchmark/core/benchmark_executor.py +++ b/pysatl_cpd/benchmark/core/benchmark_executor.py @@ -178,20 +178,20 @@ def execute(self) -> list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]]: results.append((record, trace)) - if registry_path is not None: - fieldnames = ["algorithm", "configuration_hash", "data", "threshold", "trace_path"] - with open(registry_path, mode="w", encoding="utf-8", newline="") as f: - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - for rec in registry.values(): - writer.writerow( - { - "algorithm": rec.algorithm, - "configuration_hash": rec.configuration_hash, - "data": rec.data, - "threshold": rec.threshold, - "trace_path": rec.trace_path or "", - } - ) + if registry_path is not None: + fieldnames = ["algorithm", "configuration_hash", "data", "threshold", "trace_path"] + with open(registry_path, mode="w", encoding="utf-8", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + for rec in registry.values(): + writer.writerow( + { + "algorithm": rec.algorithm, + "configuration_hash": rec.configuration_hash, + "data": rec.data, + "threshold": rec.threshold, + "trace_path": rec.trace_path or "", + } + ) return results diff --git a/pysatl_cpd/benchmark/metrics/classification/classification_report.py b/pysatl_cpd/benchmark/metrics/classification/classification_report.py index be78890..f1ff239 100644 --- a/pysatl_cpd/benchmark/metrics/classification/classification_report.py +++ b/pysatl_cpd/benchmark/metrics/classification/classification_report.py @@ -57,6 +57,4 @@ def aggregate(self, values: Sequence[dict[str, float]]) -> dict[str, float]: recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0 f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0 - print(total_fp) - return {"tp": total_tp, "fp": total_fp, "fn": total_fn, "precision": precision, "recall": recall, "f1": f1} From 39c690322b2be0ddccafde985424416c25fea032 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Mon, 13 Apr 2026 18:31:28 +0300 Subject: [PATCH 07/15] feat: add threshold policy for NoResetBenchmark --- .../benchmark/noreset/threshold_policy.py | 249 ++++++++++- .../noreset/test_threshold_policy.py | 416 ++++++++++++++++++ 2 files changed, 656 insertions(+), 9 deletions(-) create mode 100644 tests/unit/benchmark/noreset/test_threshold_policy.py diff --git a/pysatl_cpd/benchmark/noreset/threshold_policy.py b/pysatl_cpd/benchmark/noreset/threshold_policy.py index 5c83338..cae77e4 100644 --- a/pysatl_cpd/benchmark/noreset/threshold_policy.py +++ b/pysatl_cpd/benchmark/noreset/threshold_policy.py @@ -1,45 +1,276 @@ +# pysatl_cpd/benchmark/noreset/threshold_policy.py + +""" +Threshold policies for signal extraction in NoReset benchmark. + +This module provides the ThresholdPolicy protocol and two concrete +implementations: PointBasedPolicy and EventBasedPolicy. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + from collections.abc import Sequence -from typing import Protocol, runtime_checkable +from typing import Protocol, cast, runtime_checkable + +import numpy as np from pysatl_cpd.core.typedefs import UnivariateNumericArray @runtime_checkable class ThresholdPolicy(Protocol): + """ + Protocol for signal extraction from a detection function. + + Implementations define how to convert a raw detection function array + into a list of signal indices given a threshold and known change points. + """ + def apply( self, detection_function: UnivariateNumericArray, threshold: float, - change_points: Sequence[int], # true, 1-based - ) -> list[int]: ... # 1-based signal indices + change_points: Sequence[int], + ) -> list[int]: + """ + Extract signal indices from the detection function. + + Parameters + ---------- + detection_function : UnivariateNumericArray + Array of detection statistic values, one per time step. + threshold : float + Detection threshold. + change_points : Sequence[int] + True change point indices (1-based). Used by some policies + to define delay windows. + + Returns + ------- + list[int] + 1-based indices where signals were detected. + """ + ... class PointBasedPolicy: + """ + Signal extraction policy based on point-wise threshold comparison. + + Any position where the detection function satisfies the threshold + condition is considered a signal. The change_points argument is + accepted for interface compatibility but is ignored. + + Parameters + ---------- + strict : bool, default=True + If True, signal condition is detection_function > threshold. + If False, signal condition is detection_function >= threshold. + """ + def __init__(self, strict: bool = True) -> None: - return + self.strict = strict + + @staticmethod + def _exceeds(arr: np.ndarray, threshold: float, strict: bool) -> np.ndarray: + """ + Check whether array values exceed threshold. + + Parameters + ---------- + arr : np.ndarray + Array of values to check. + threshold : float + Threshold value. + strict : bool + If True, uses strict inequality (>). + If False, uses non-strict inequality (>=). + + Returns + ------- + np.ndarray + Boolean array. + """ + return arr > threshold if strict else arr >= threshold def apply( self, detection_function: UnivariateNumericArray, threshold: float, - change_points: Sequence[int], # true, 1-based + change_points: Sequence[int], ) -> list[int]: - raise NotImplementedError("Method `apply` is not implemented yet.") + """ + Return 1-based indices where detection function exceeds threshold. + + Parameters + ---------- + detection_function : UnivariateNumericArray + Array of detection statistic values. + threshold : float + Detection threshold. + change_points : Sequence[int] + Ignored. Present for interface compatibility. + + Returns + ------- + list[int] + Sorted list of 1-based signal indices. + """ + if len(detection_function) == 0: + return [] + + res = (np.where(self._exceeds(detection_function, threshold, self.strict))[0] + 1).tolist() + return cast(list[int], res) class EventBasedPolicy: + """ + Signal extraction policy based on rising-edge detection with delay windows. + + In normal (edge) mode, a signal is produced only when the detection + function crosses the threshold from below (rising edge). Inside delay + windows [true_cp, true_cp + max_delay] (1-based, inclusive), the policy + switches to point-based mode to correctly capture detection delay. + + The previous value used for edge detection (prev) is tracked continuously, + including values inside delay windows (variant A). This means that if the + detection function is above threshold at the end of a window, the first + element after the window will not produce an edge signal. + + For the first element, prev is treated as -inf (always below threshold). + + Parameters + ---------- + max_delay : int + Maximum allowable detection delay. Defines the right boundary of + the delay window as true_cp + max_delay (inclusive). Must be >= 0. + strict_edge : bool, default=True + If True, rising edge condition requires detection_function > threshold. + If False, condition is detection_function >= threshold. + prev is always checked with strict inequality (prev < threshold). + strict_point : bool, default=True + If True, point-based condition in delay window is + detection_function > threshold. + If False, condition is detection_function >= threshold. + + Raises + ------ + ValueError + If max_delay is negative. + """ + def __init__( self, max_delay: int, strict_edge: bool = True, strict_point: bool = True, ) -> None: - return + if max_delay < 0: + raise ValueError(f"max_delay must be non-negative, got {max_delay}") + self.max_delay = max_delay + self.strict_edge = strict_edge + self.strict_point = strict_point + + @staticmethod + def _exceeds(arr: np.ndarray, threshold: float, strict: bool) -> np.ndarray: + """ + Check whether array values exceed threshold. + + Parameters + ---------- + arr : np.ndarray + Array of values to check. + threshold : float + Threshold value. + strict : bool + If True, uses strict inequality (>). + If False, uses non-strict inequality (>=). + + Returns + ------- + np.ndarray + Boolean array. + """ + return arr > threshold if strict else arr >= threshold + + def _build_window_mask( + self, + length: int, + change_points: Sequence[int], + ) -> np.ndarray: + """ + Build a boolean mask indicating which 0-based indices are in delay windows. + + Uses cumsum trick for fully vectorized computation over change points. + + Parameters + ---------- + length : int + Length of the detection function array. + change_points : Sequence[int] + True change point indices (1-based). + + Returns + ------- + np.ndarray + Boolean array of shape (length,) where True means the position + is inside a delay window. + """ + if not change_points: + return np.zeros(length, dtype=bool) + + lefts = np.clip(np.array(change_points, dtype=int) - 1, 0, length - 1) + rights = np.clip(lefts + self.max_delay, 0, length - 1) + + marker = np.zeros(length + 1, dtype=int) + np.add.at(marker, lefts, 1) + np.add.at(marker, rights + 1, -1) + return np.cumsum(marker)[:length] > 0 def apply( self, detection_function: UnivariateNumericArray, threshold: float, - change_points: Sequence[int], # true, 1-based + change_points: Sequence[int], ) -> list[int]: - raise NotImplementedError("Method `apply` is not implemented yet.") + """ + Extract signal indices using rising-edge detection with delay windows. + + Fully vectorized implementation using numpy masks. + + Parameters + ---------- + detection_function : UnivariateNumericArray + Array of detection statistic values. + threshold : float + Detection threshold. + change_points : Sequence[int] + True change point indices (1-based). Used to define delay windows + where point-based mode is applied. + + Returns + ------- + list[int] + Sorted list of 1-based signal indices. + """ + n = len(detection_function) + if n == 0: + return [] + + window_mask = self._build_window_mask(n, change_points) + + # prev[i] = df[i-1], prev[0] = -inf + prev = np.empty(n, dtype=detection_function.dtype) + prev[0] = float("-inf") + prev[1:] = detection_function[:-1] + + # edge signals: rising edge outside windows + edge = (prev < threshold) & self._exceeds(detection_function, threshold, self.strict_edge) & ~window_mask + + # point signals: threshold exceeded inside windows + point = self._exceeds(detection_function, threshold, self.strict_point) & window_mask + + res = (np.where(edge | point)[0] + 1).tolist() + return cast(list[int], res) diff --git a/tests/unit/benchmark/noreset/test_threshold_policy.py b/tests/unit/benchmark/noreset/test_threshold_policy.py new file mode 100644 index 0000000..cfd1ad4 --- /dev/null +++ b/tests/unit/benchmark/noreset/test_threshold_policy.py @@ -0,0 +1,416 @@ +# tests/benchmark/noreset/test_threshold_policy.py + +"""Tests for ThresholdPolicy implementations.""" + +import numpy as np +import pytest + +from pysatl_cpd.benchmark.noreset.threshold_policy import ( + EventBasedPolicy, + PointBasedPolicy, + ThresholdPolicy, +) +from pysatl_cpd.core.typedefs import UnivariateNumericArray + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_df(*values: float) -> UnivariateNumericArray: + """Create a UnivariateNumericArray from float values.""" + return np.array(values, dtype=np.float64) + + +# --------------------------------------------------------------------------- +# TestThresholdProtocol +# --------------------------------------------------------------------------- + + +class TestThresholdProtocol: + """Tests that concrete policies satisfy the ThresholdPolicy protocol.""" + + def test_point_based_implements_protocol(self) -> None: + """PointBasedPolicy must be recognised as ThresholdPolicy at runtime.""" + policy: PointBasedPolicy = PointBasedPolicy() + assert isinstance(policy, ThresholdPolicy) + + def test_event_based_implements_protocol(self) -> None: + """EventBasedPolicy must be recognised as ThresholdPolicy at runtime.""" + policy: EventBasedPolicy = EventBasedPolicy(max_delay=5) + assert isinstance(policy, ThresholdPolicy) + + +# --------------------------------------------------------------------------- +# TestPointBasedPolicyInit +# --------------------------------------------------------------------------- + + +class TestPointBasedPolicyInit: + """Tests for PointBasedPolicy constructor.""" + + def test_default_strict_is_true(self) -> None: + """Default strict parameter must be True.""" + policy: PointBasedPolicy = PointBasedPolicy() + assert policy.strict is True + + def test_explicit_strict_false(self) -> None: + """Explicit strict=False must be stored correctly.""" + policy: PointBasedPolicy = PointBasedPolicy(strict=False) + assert policy.strict is False + + +# --------------------------------------------------------------------------- +# TestPointBasedPolicyApply +# --------------------------------------------------------------------------- + + +class TestPointBasedPolicyApply: + """Tests for PointBasedPolicy.apply — parametrized over common cases.""" + + @pytest.mark.parametrize( + "values, threshold, strict, change_points, expected", + [ + # no signals — all below threshold + ([0.1, 0.2, 0.3], 1.0, True, [], []), + # all signals — all strictly above threshold + ([2.0, 3.0, 4.0], 1.0, True, [], [1, 2, 3]), + # strict=True: equal value is NOT a signal + ([1.0, 2.0, 1.0], 1.0, True, [], [2]), + # strict=False: equal value IS a signal + ([1.0, 2.0, 0.5], 1.0, False, [], [1, 2]), + # empty detection function + ([], 1.0, True, [], []), + # single element — signal + ([5.0], 1.0, True, [], [1]), + # single element — no signal + ([0.5], 1.0, True, [], []), + # indices are 1-based + ([0.0, 0.0, 5.0, 0.0, 5.0], 1.0, True, [], [3, 5]), + # change_points present but do not affect result + ([2.0, 0.5, 2.0], 1.0, True, [2], [1, 3]), + ], + ids=[ + "all_below", + "all_above_strict", + "strict_true_excludes_equal", + "strict_false_includes_equal", + "empty_df", + "single_signal", + "single_no_signal", + "returns_1based_indices", + "change_points_do_not_affect", + ], + ) + def test_apply( + self, + values: list[float], + threshold: float, + strict: bool, + change_points: list[int], + expected: list[int], + ) -> None: + """ + PointBasedPolicy.apply must return 1-based signal indices. + + Any position where detection_function satisfies the threshold + condition (strict or non-strict) is a signal. change_points + are accepted but ignored. + """ + policy: PointBasedPolicy = PointBasedPolicy(strict=strict) + df: UnivariateNumericArray = make_df(*values) + result: list[int] = policy.apply(df, threshold, change_points) + assert result == expected + + +# --------------------------------------------------------------------------- +# TestEventBasedPolicyInit +# --------------------------------------------------------------------------- + + +class TestEventBasedPolicyInit: + """Tests for EventBasedPolicy constructor.""" + + def test_valid_init_stores_fields(self) -> None: + """ + Constructor must store max_delay, strict_edge, strict_point correctly. + + Default strict_edge=True, strict_point=True. + """ + policy: EventBasedPolicy = EventBasedPolicy(max_delay=5) + assert policy.max_delay == 5 + assert policy.strict_edge is True + assert policy.strict_point is True + + def test_explicit_strict_values_stored(self) -> None: + """Explicit strict_edge=False and strict_point=False must be stored.""" + policy: EventBasedPolicy = EventBasedPolicy( + max_delay=3, + strict_edge=False, + strict_point=False, + ) + assert policy.max_delay == 3 + assert policy.strict_edge is False + assert policy.strict_point is False + + def test_negative_max_delay_raises(self) -> None: + """Negative max_delay must raise ValueError.""" + with pytest.raises(ValueError): + EventBasedPolicy(max_delay=-1) + + def test_zero_max_delay_is_valid(self) -> None: + """max_delay=0 means only the change point itself is in the window.""" + policy: EventBasedPolicy = EventBasedPolicy(max_delay=0) + assert policy.max_delay == 0 + + +# --------------------------------------------------------------------------- +# TestEventBasedPolicyApplyEdgeMode +# --------------------------------------------------------------------------- + + +class TestEventBasedPolicyApplyEdgeMode: + """Tests for edge (rising-edge) detection mode — no delay windows active.""" + + @pytest.mark.parametrize( + "values, threshold, strict_edge, change_points, expected", + [ + # basic rising edge detected + # idx2: prev=0.0<1.0, 2.0>1.0 -> signal + ([0.0, 0.0, 2.0, 2.0], 1.0, True, [], [3]), + # no repeat signal while staying above threshold + # idx2: rising edge -> signal, idx3,4: prev>=threshold -> no signal + ([0.0, 2.0, 3.0, 4.0], 1.0, True, [], [2]), + # falling then rising produces second signal + # idx2: rising [2], idx3: falling, idx4: rising [4] + ([0.0, 2.0, 0.0, 2.0], 1.0, True, [], [2, 4]), + # strict_edge=True: prev=0.5<1.0, curr=1.0, 1.0>1.0 False -> no signal + ([0.5, 1.0, 0.5], 1.0, True, [], []), + # strict_edge=False: prev=0.5<1.0, curr=1.0, 1.0>=1.0 True -> signal + ([0.5, 1.0, 0.5], 1.0, False, [], [2]), + # first element above threshold: prev=-inf<1.0, 2.0>1.0 -> signal + ([2.0, 0.0, 0.0], 1.0, True, [], [1]), + # first element equal threshold, strict=False: prev=-inf<1.0, 1.0>=1.0 -> signal + ([1.0, 0.0], 1.0, False, [], [1]), + # first element equal threshold, strict=True: 1.0>1.0 False -> no signal + ([1.0, 0.0], 1.0, True, [], []), + # returns 1-based indices + ([0.5, 2.0, 0.5], 1.0, True, [], [2]), + # empty detection function + ([], 1.0, True, [], []), + ], + ids=[ + "basic_rising_edge", + "no_repeat_while_above", + "falling_then_rising", + "strict_edge_true_equal_not_signal", + "strict_edge_false_equal_is_signal", + "first_element_above_is_signal", + "first_element_equal_strict_false", + "first_element_equal_strict_true", + "returns_1based_indices", + "empty_df", + ], + ) + def test_edge_mode( + self, + values: list[float], + threshold: float, + strict_edge: bool, + change_points: list[int], + expected: list[int], + ) -> None: + """ + In edge mode (no delay windows), only rising-edge crossings are signals. + + prev is -inf for the first element. strict_edge controls whether + the crossing condition uses strict (>) or non-strict (>=) inequality + for the current value. prev is always checked with strict (<). + """ + policy: EventBasedPolicy = EventBasedPolicy( + max_delay=0, + strict_edge=strict_edge, + strict_point=True, + ) + df: UnivariateNumericArray = make_df(*values) + result: list[int] = policy.apply(df, threshold, change_points) + assert result == expected + + +# --------------------------------------------------------------------------- +# TestEventBasedPolicyApplyDelayWindow +# --------------------------------------------------------------------------- + + +class TestEventBasedPolicyApplyDelayWindow: + """Tests for point-based mode inside delay windows [true_cp, true_cp + max_delay].""" + + @pytest.mark.parametrize( + "values, threshold, change_points, max_delay, strict_point, expected", + [ + # all above in window — all are signals + # cp=3, max_delay=2 -> window [3,5] (1-based, inclusive) + # idx3=2.0, idx4=2.0, idx5=2.0 -> all signals + ([0.0, 0.0, 2.0, 2.0, 2.0], 1.0, [3], 2, True, [3, 4, 5]), + # partial signals in window + # idx3=2.0 signal, idx4=0.5 no, idx5=2.0 signal + ([0.0, 0.0, 2.0, 0.5, 2.0], 1.0, [3], 2, True, [3, 5]), + # strict_point=True: equal not a signal in window + # window [3,5], idx3=1.0, idx4=1.0: 1.0>1.0 False -> no signals + ([0.0, 0.0, 1.0, 1.0, 0.0], 1.0, [3], 2, True, []), + # strict_point=False: equal IS a signal in window + # window [3,5], idx3=1.0, idx4=1.0: 1.0>=1.0 True -> signals + ([0.0, 0.0, 1.0, 1.0, 0.0], 1.0, [3], 2, False, [3, 4]), + # max_delay=0: window is just [cp, cp] — single point + # cp=3, window={3}, idx3=2.0 -> signal + ([0.0, 0.0, 2.0, 0.0], 1.0, [3], 0, True, [3]), + # right boundary is INCLUSIVE: cp=3, max_delay=2 -> idx5 in window + ([0.0, 0.0, 0.0, 0.0, 2.0], 1.0, [3], 2, True, [5]), + # two change points — two windows + # cp=[2,5], max_delay=1 -> windows [2,3] and [5,6] + # idx2=2.0, idx3=2.0, idx5=2.0, idx6=2.0 -> all signals + ([0.0, 2.0, 2.0, 0.0, 2.0, 2.0], 1.0, [2, 5], 1, True, [2, 3, 5, 6]), + ], + ids=[ + "all_above_in_window", + "partial_signals_in_window", + "strict_point_true_equal_not_signal", + "strict_point_false_equal_is_signal", + "max_delay_zero_single_point", + "right_boundary_inclusive", + "two_change_points_two_windows", + ], + ) + def test_delay_window( + self, + values: list[float], + threshold: float, + change_points: list[int], + max_delay: int, + strict_point: bool, + expected: list[int], + ) -> None: + """ + Inside [true_cp, true_cp + max_delay] policy uses point-based mode. + + strict_point controls whether equal values are signals. + Right boundary is inclusive. change_points are 1-based. + """ + policy: EventBasedPolicy = EventBasedPolicy( + max_delay=max_delay, + strict_edge=True, + strict_point=strict_point, + ) + df: UnivariateNumericArray = make_df(*values) + result: list[int] = policy.apply(df, threshold, change_points) + assert result == expected + + +# --------------------------------------------------------------------------- +# TestEventBasedPolicyApplyMixed +# --------------------------------------------------------------------------- + + +class TestEventBasedPolicyApplyMixed: + """Tests combining edge mode and delay windows in the same series.""" + + @pytest.mark.parametrize( + "values, threshold, change_points, max_delay, strict_edge, strict_point, expected", + [ + # edge signal before window, point-based inside window + # df=[0.0, 2.0, 0.0, 0.0, 2.0, 2.0], cp=[4], max_delay=1 + # window=[4,5] + # idx1: edge, 0.0<=1.0 -> no + # idx2: edge, prev=0.0<1.0, 2.0>1.0 -> signal [2] + # idx3: edge, prev=2.0>=1.0 -> no (not rising) + # idx4: window, 0.0<=1.0 -> no + # idx5: window, 2.0>1.0 -> signal [5] + # idx6: edge, prev=df[4]=2.0>=1.0 (variant A) -> no + ( + [0.0, 2.0, 0.0, 0.0, 2.0, 2.0], + 1.0, + [4], + 1, + True, + True, + [2, 5], + ), + # signal before window is independent of window detection + # df=[0.0, 2.0, 0.0, 2.0], cp=[4], max_delay=1 + # window=[4,4] (df length=4, so only idx4) + # idx2: edge -> signal [2] + # idx4: window, 2.0>1.0 -> signal [4] + ( + [0.0, 2.0, 0.0, 2.0], + 1.0, + [4], + 1, + True, + True, + [2, 4], + ), + # edge resets correctly after window (variant A: prev=last window value) + # df=[0.0, 0.0, 2.0, 0.0, 0.0, 2.0], cp=[3], max_delay=0 + # window={3} + # idx3: window, 2.0>1.0 -> signal [3] + # idx4: edge, prev=df[2]=2.0>=1.0 -> no (not rising) + # idx5: edge, prev=0.0<1.0, 0.0<=1.0 -> no + # idx6: edge, prev=0.0<1.0, 2.0>1.0 -> signal [6] + ( + [0.0, 0.0, 2.0, 0.0, 0.0, 2.0], + 1.0, + [3], + 0, + True, + True, + [3, 6], + ), + # after window: value stays above threshold — no edge signal (variant A) + # df=[0.0, 0.0, 2.0, 2.0, 2.0], cp=[3], max_delay=1 + # window=[3,4] + # idx3: window, 2.0>1.0 -> signal [3] + # idx4: window, 2.0>1.0 -> signal [4] + # idx5: edge, prev=df[3]=2.0>=1.0 (variant A) -> no signal + ( + [0.0, 0.0, 2.0, 2.0, 2.0], + 1.0, + [3], + 1, + True, + True, + [3, 4], + ), + ], + ids=[ + "edge_before_and_point_inside_window", + "signal_before_window_independent", + "edge_resets_after_window", + "after_window_above_no_edge_signal", + ], + ) + def test_mixed( + self, + values: list[float], + threshold: float, + change_points: list[int], + max_delay: int, + strict_edge: bool, + strict_point: bool, + expected: list[int], + ) -> None: + """ + Edge mode and delay windows must work correctly together. + + prev (for edge detection) tracks the last seen value including + values inside the window (variant A). This means that if the + detection function is above threshold at the end of a window, + the first element after the window will NOT produce an edge signal. + """ + policy: EventBasedPolicy = EventBasedPolicy( + max_delay=max_delay, + strict_edge=strict_edge, + strict_point=strict_point, + ) + df: UnivariateNumericArray = make_df(*values) + result: list[int] = policy.apply(df, threshold, change_points) + assert result == expected From 6803c09ea0fb78f788039852a97f91b845478667 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Mon, 13 Apr 2026 22:30:22 +0300 Subject: [PATCH 08/15] feat: add NoResetDetectionTrace --- .../noreset/noreset_detection_trace.py | 75 +++++++- .../noreset/test_noreset_detection_trace.py | 171 ++++++++++++++++++ 2 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 tests/unit/benchmark/noreset/test_noreset_detection_trace.py diff --git a/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py b/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py index 3e34f98..acc1e9a 100644 --- a/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py +++ b/pysatl_cpd/benchmark/noreset/noreset_detection_trace.py @@ -1,8 +1,40 @@ +# -*- coding: ascii -*- + +""" +NoReset detection trace container. + +This module provides NoResetDetectionTrace - a lightweight trace produced +by applying a ThresholdPolicy to a pre-computed infinite-threshold trace, +avoiding redundant solver executions. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from typing import cast + +import numpy as np + from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmState from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace +from pysatl_cpd.core.typedefs import UnivariateNumericArray class NoResetDetectionTrace[StateT: OnlineAlgorithmState](OnlineDetectionTrace[StateT]): + """ + Detection trace produced by the NoReset benchmark strategy. + + Instead of re-running the solver for every threshold, a single + infinite-threshold trace is computed once and this class wraps it + with new detected change points obtained by applying a ThresholdPolicy. + + Auxiliary fields (processing_time, algorithm_states, skip_periods, + learning_periods, forced_change_points, signal_change_points) are + intentionally left empty - only detection_function and + detected_change_points carry meaningful data. + """ + @classmethod def from_inf_trace( cls, @@ -10,4 +42,45 @@ def from_inf_trace( detected_change_points: list[int], threshold: float, ) -> "NoResetDetectionTrace[StateT]": - raise NotImplementedError("Method 'from_inf_trace' is not implemented yet.") + """ + Construct a NoResetDetectionTrace from an infinite-threshold trace. + + Copies detection_function, algorithm_name, and configuration_hash + from source_trace. All other fields are set to empty defaults. + + Parameters + ---------- + source_trace : OnlineDetectionTrace[StateT] + The pre-computed trace obtained by running the solver with + threshold=inf. Its detection_function is reused for all + threshold simulations. + detected_change_points : list[int] + Change point indices produced by applying a ThresholdPolicy + to source_trace.detection_function at a specific threshold. + threshold : float + The threshold value used to extract detected_change_points. + + Returns + ------- + NoResetDetectionTrace[StateT] + A new trace with the given change points and copied + detection function. + """ + empty_processing_time: UnivariateNumericArray = cast( + UnivariateNumericArray, + np.array([], dtype=np.float64), + ) + + return cls( + algorithm_name=source_trace.algorithm_name, + configuration_hash=source_trace.configuration_hash, + detected_change_points=detected_change_points, + threshold=threshold, + detection_function=source_trace.detection_function.copy(), + processing_time=empty_processing_time, + algorithm_states=[], + skip_periods=[], + learning_periods=[], + forced_change_points=[], + signal_change_points=[], + ) diff --git a/tests/unit/benchmark/noreset/test_noreset_detection_trace.py b/tests/unit/benchmark/noreset/test_noreset_detection_trace.py new file mode 100644 index 0000000..588ce46 --- /dev/null +++ b/tests/unit/benchmark/noreset/test_noreset_detection_trace.py @@ -0,0 +1,171 @@ +# -*- coding: ascii -*- + +""" +Unit tests for NoResetDetectionTrace[Any]. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from typing import Any + +import pytest + +from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace +from pysatl_cpd.core.detection_trace import DetectionTrace +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace +from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace + + +@pytest.fixture +def source_trace() -> MockOnlineDetectionTrace: + """ + Source OnlineDetectionTrace with real detection function values. + """ + trace = MockOnlineDetectionTrace(detected_change_points=[5, 10]) + return trace + + +@pytest.fixture +def new_change_points() -> list[int]: + """New detected change points to assign to NoResetDetectionTrace[Any].""" + return [3, 7] + + +class TestNoResetDetectionTraceFromInfTrace: + """Tests for NoResetDetectionTrace[Any].from_inf_trace factory method.""" + + def test_detected_change_points_and_threshold_are_set( + self, + source_trace: MockOnlineDetectionTrace, + new_change_points: list[int], + ) -> None: + """New detected_change_points and threshold are stored correctly.""" + threshold: float = 1.0 + trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace( + source_trace=source_trace, + detected_change_points=new_change_points, + threshold=threshold, + ) + assert list(trace.detected_change_points) == new_change_points + assert trace.threshold == threshold + + def test_algorithm_name_and_configuration_hash_are_copied( + self, + source_trace: MockOnlineDetectionTrace, + new_change_points: list[int], + ) -> None: + """algorithm_name and configuration_hash are copied from source_trace.""" + trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace( + source_trace=source_trace, + detected_change_points=new_change_points, + threshold=1.0, + ) + assert trace.algorithm_name == source_trace.algorithm_name + assert trace.configuration_hash == source_trace.configuration_hash + + def test_auxiliary_fields_are_empty( + self, + source_trace: MockOnlineDetectionTrace, + new_change_points: list[int], + ) -> None: + """processing_time, detection_function, algorithm_states, skip_periods, + learning_periods, forced_change_points, signal_change_points are empty/default.""" + trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace( + source_trace=source_trace, + detected_change_points=new_change_points, + threshold=1.0, + ) + assert len(trace.processing_time) == 0 + assert len(trace.detection_function) == 0 + assert trace.algorithm_states == [] + assert trace.skip_periods == [] + assert trace.learning_periods == [] + assert trace.forced_change_points == [] + assert trace.signal_change_points == [] + + def test_source_trace_is_not_mutated( + self, + source_trace: MockOnlineDetectionTrace, + new_change_points: list[int], + ) -> None: + """source_trace fields are not modified after from_inf_trace call.""" + original_cps: list[int] = list(source_trace.detected_change_points) + original_name: str = source_trace.algorithm_name + original_hash: int = source_trace.configuration_hash + + NoResetDetectionTrace[Any].from_inf_trace( + source_trace=source_trace, + detected_change_points=new_change_points, + threshold=1.0, + ) + + assert list(source_trace.detected_change_points) == original_cps + assert source_trace.algorithm_name == original_name + assert source_trace.configuration_hash == original_hash + + def test_with_empty_detected_change_points( + self, + source_trace: MockOnlineDetectionTrace, + ) -> None: + """from_inf_trace works correctly when detected_change_points is empty.""" + trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace( + source_trace=source_trace, + detected_change_points=[], + threshold=1.0, + ) + assert list(trace.detected_change_points) == [] + + def test_with_boundary_threshold_values( + self, + source_trace: MockOnlineDetectionTrace, + new_change_points: list[int], + ) -> None: + """from_inf_trace works correctly with threshold=0.0 and threshold=inf.""" + trace_zero: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace( + source_trace=source_trace, + detected_change_points=new_change_points, + threshold=0.0, + ) + assert trace_zero.threshold == 0.0 + + trace_inf: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace( + source_trace=source_trace, + detected_change_points=new_change_points, + threshold=float("inf"), + ) + assert trace_inf.threshold == float("inf") + + +class TestNoResetDetectionTraceInheritance: + """Tests for NoResetDetectionTrace[Any] inheritance chain.""" + + def test_is_instance_of_expected_base_classes( + self, + source_trace: MockOnlineDetectionTrace, + new_change_points: list[int], + ) -> None: + """NoResetDetectionTrace[Any] is an instance of OnlineDetectionTrace and DetectionTrace.""" + trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace( + source_trace=source_trace, + detected_change_points=new_change_points, + threshold=1.0, + ) + assert isinstance(trace, NoResetDetectionTrace) + assert isinstance(trace, OnlineDetectionTrace) + assert isinstance(trace, DetectionTrace) + + def test_detected_change_points_accessible_via_base_property( + self, + source_trace: MockOnlineDetectionTrace, + new_change_points: list[int], + ) -> None: + """detected_change_points are accessible through the base class property.""" + trace: NoResetDetectionTrace[Any] = NoResetDetectionTrace.from_inf_trace( + source_trace=source_trace, + detected_change_points=new_change_points, + threshold=1.0, + ) + base: DetectionTrace = trace + assert list(base.detected_change_points) == new_change_points From 24d35beab6fd64efca9ac7eda9822bf1c91eadbc Mon Sep 17 00:00:00 2001 From: iraedeus Date: Mon, 13 Apr 2026 23:07:15 +0300 Subject: [PATCH 09/15] feat: add abstract OnlineBenchmarkRunner --- pysatl_cpd/benchmark/arl_benchmark_runner.py | 5 +- .../benchmark/core/benchmark_executor.py | 12 +- .../noreset/noreset_benchmark_runner.py | 7 +- .../benchmark/online_benchmark_runner.py | 100 +++- .../benchmark/reset_benchmark_runner.py | 7 +- .../mocks/benchmark/mock_benchmark_runner.py | 92 +++ .../benchmark/test_online_benchmark_runner.py | 523 ++++++++++++++++++ 7 files changed, 725 insertions(+), 21 deletions(-) create mode 100644 tests/mocks/benchmark/mock_benchmark_runner.py create mode 100644 tests/unit/benchmark/test_online_benchmark_runner.py diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py index 8646aa4..d14069f 100644 --- a/pysatl_cpd/benchmark/arl_benchmark_runner.py +++ b/pysatl_cpd/benchmark/arl_benchmark_runner.py @@ -1,3 +1,4 @@ +from collections.abc import Sequence from pathlib import Path from typing import Any @@ -13,7 +14,7 @@ class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledDa ): def __init__( self, - algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]], + algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], providers: list[ProviderT], solver: OnlineCpdSolver, dump_dir: Path | None = None, @@ -24,6 +25,6 @@ def _collect_runs( self, algorithm: OnlineAlgorithm[Any, Any, Any], threshold: float, - providers: list[ProviderT], + providers: Sequence[ProviderT], ) -> list[tuple[TraceT, ProviderT]]: raise NotImplementedError("Method `_collect_runs` is not implemented yet.") diff --git a/pysatl_cpd/benchmark/core/benchmark_executor.py b/pysatl_cpd/benchmark/core/benchmark_executor.py index 15b6ddb..fe7bee0 100644 --- a/pysatl_cpd/benchmark/core/benchmark_executor.py +++ b/pysatl_cpd/benchmark/core/benchmark_executor.py @@ -79,11 +79,11 @@ class BenchmarkExecutor[DataT]: Parameters ---------- - algorithms : list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] - A list of tuples, where each tuple contains an instantiated online + algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] + A sequence of tuples, where each tuple contains an instantiated online algorithm and a sequence of thresholds to test it against. - providers : list[DataProvider[DataT]] - A list of data providers to be fed into the algorithms. + providers : Sequence[DataProvider[DataT]] + A sequence of data providers to be fed into the algorithms. solver : OnlineCpdSolver The solver instance responsible for iterating over the data providers and running the algorithmic logic. @@ -94,8 +94,8 @@ class BenchmarkExecutor[DataT]: def __init__( self, - algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], - providers: list[DataProvider[DataT]], + algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + providers: Sequence[DataProvider[DataT]], solver: OnlineCpdSolver, dump_dir: str | Path | None = None, ) -> None: diff --git a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py index f1af0d0..1b36eae 100644 --- a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py +++ b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py @@ -1,3 +1,4 @@ +from collections.abc import Sequence from pathlib import Path from typing import Any @@ -14,8 +15,8 @@ class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[NoResetDetectionTrace[Any], ProviderT]): def __init__( self, - algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]], - providers: list[ProviderT], + algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + providers: Sequence[ProviderT], metrics: dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]], solver: OnlineCpdSolver, policy: ThresholdPolicy, @@ -27,7 +28,7 @@ def _collect_runs( self, algorithm: OnlineAlgorithm[Any, Any, Any], threshold: float, - providers: list[ProviderT], + providers: Sequence[ProviderT], ) -> list[tuple[NoResetDetectionTrace[Any], ProviderT]]: raise NotImplementedError("Method '_collect_runs' is not implemented yet.") diff --git a/pysatl_cpd/benchmark/online_benchmark_runner.py b/pysatl_cpd/benchmark/online_benchmark_runner.py index 03391df..b5edc46 100644 --- a/pysatl_cpd/benchmark/online_benchmark_runner.py +++ b/pysatl_cpd/benchmark/online_benchmark_runner.py @@ -1,5 +1,15 @@ -# online_runner.py +# -*- coding: ascii -*- + +""" +Abstract base class for online benchmark runners. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + from abc import ABC, abstractmethod +from collections.abc import Sequence from pathlib import Path from typing import Any @@ -11,26 +21,102 @@ class OnlineBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]](ABC): + """ + Abstract base class for online benchmark runners. + + Organises the evaluation loop over algorithms and thresholds, + delegates data collection to subclasses via _collect_runs(), and + applies all registered metrics to each batch of runs. + + Parameters + ---------- + algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] + Sequence of (algorithm, thresholds) pairs to evaluate. + providers : Sequence[ProviderT] + Sequence of labeled data providers. + metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]] + Named metrics to evaluate for each (algorithm, threshold) batch. + solver : OnlineCpdSolver + Solver used to run algorithms against providers. + dump_dir : Path | str | None, optional + Directory for caching results via BenchmarkExecutor. + If None, caching is disabled. Default is None. + """ + def __init__( self, - algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]], - providers: list[ProviderT], + algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + providers: Sequence[ProviderT], metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], solver: OnlineCpdSolver, - dump_dir: Path | None = None, + dump_dir: Path | str | None = None, ) -> None: - return + self._algorithms = algorithms + self._providers = providers + self._metrics = metrics + self._solver = solver + self._dump_dir = Path(dump_dir) if isinstance(dump_dir, str) else dump_dir @abstractmethod def _collect_runs( self, algorithm: OnlineAlgorithm[Any, Any, Any], threshold: float, - providers: list[ProviderT], + providers: Sequence[ProviderT], ) -> list[tuple[TraceT, ProviderT]]: + """ + Collect (trace, provider) pairs for a given algorithm and threshold. + + Parameters + ---------- + algorithm : OnlineAlgorithm[Any, Any, Any] + The algorithm to evaluate. + threshold : float + The detection threshold. + providers : Sequence[ProviderT] + Sequence of data providers to run against. + + Returns + ------- + list[tuple[TraceT, ProviderT]] + Batch of (trace, provider) pairs for metric evaluation. + """ + raise NotImplementedError("Method `_collect_runs` is not implemented yet.") def run( self, ) -> dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]]: - raise NotImplementedError("Method `run` is not implemented yet.") + """ + Execute the benchmark over all algorithms and thresholds. + + For each (algorithm, threshold) pair, collects runs via + _collect_runs() and evaluates all registered metrics. + + Returns + ------- + dict[tuple[str, OnlineAlgorithmConfiguration], list[tuple[float, dict[str, Any]]]] + Mapping of (algorithm_name, configuration) to a list of + (threshold, {metric_name: metric_value}) entries, one per threshold. + """ + + results: dict[ + tuple[str, OnlineAlgorithmConfiguration], + list[tuple[float, dict[str, Any]]], + ] = {} + + for algorithm, thresholds in self._algorithms: + key: tuple[str, OnlineAlgorithmConfiguration] = ( + str(algorithm), + algorithm.configuration, + ) + results[key] = [] + + for threshold in thresholds: + runs = self._collect_runs(algorithm, threshold, self._providers) + + metric_values: dict[str, Any] = {name: metric.evaluate(runs) for name, metric in self._metrics.items()} + + results[key].append((threshold, metric_values)) + + return results diff --git a/pysatl_cpd/benchmark/reset_benchmark_runner.py b/pysatl_cpd/benchmark/reset_benchmark_runner.py index 960044b..87491df 100644 --- a/pysatl_cpd/benchmark/reset_benchmark_runner.py +++ b/pysatl_cpd/benchmark/reset_benchmark_runner.py @@ -1,3 +1,4 @@ +from collections.abc import Sequence from pathlib import Path from typing import Any @@ -14,8 +15,8 @@ class ResetBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: Labeled ): def __init__( self, - algorithms: list[tuple[OnlineAlgorithm[Any, Any, Any], list[float]]], - providers: list[ProviderT], + algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + providers: Sequence[ProviderT], metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], solver: OnlineCpdSolver, dump_dir: Path | None = None, @@ -26,6 +27,6 @@ def _collect_runs( self, algorithm: OnlineAlgorithm[Any, Any, Any], threshold: float, - providers: list[ProviderT], + providers: Sequence[ProviderT], ) -> list[tuple[TraceT, ProviderT]]: raise NotImplementedError("Method `_collect_runs` is not implemented yet.") diff --git a/tests/mocks/benchmark/mock_benchmark_runner.py b/tests/mocks/benchmark/mock_benchmark_runner.py new file mode 100644 index 0000000..f69db6a --- /dev/null +++ b/tests/mocks/benchmark/mock_benchmark_runner.py @@ -0,0 +1,92 @@ +# -*- coding: ascii -*- + +""" +Mock OnlineBenchmarkRunner for testing. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from collections.abc import Sequence +from pathlib import Path +from typing import Any + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric +from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace + + +class MockBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]]( + OnlineBenchmarkRunner[TraceT, ProviderT] +): + """ + Mock implementation of OnlineBenchmarkRunner for testing. + + Records all _collect_runs calls for assertion in tests. + Returns a pre-configured list of runs for each call. + + Parameters + ---------- + algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] + Sequence of (algorithm, thresholds) pairs. + providers : Sequence[ProviderT] + Sequence of data providers. + metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]] + Dictionary of metrics to evaluate. + solver : OnlineCpdSolver + Solver instance. + dump_dir : Path | str | None, optional + Directory for caching results. + runs_to_return : list[tuple[TraceT, ProviderT]] | None, optional + Pre-configured runs returned by _collect_runs. + If None, returns empty list. + """ + + def __init__( + self, + algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], + providers: Sequence[ProviderT], + metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], + solver: OnlineCpdSolver, + dump_dir: Path | str | None = None, + runs_to_return: list[tuple[TraceT, ProviderT]] | None = None, + ) -> None: + super().__init__( + algorithms=algorithms, + providers=providers, + metrics=metrics, + solver=solver, + dump_dir=dump_dir, + ) + self._runs_to_return: list[tuple[TraceT, ProviderT]] = runs_to_return or [] + self.collect_runs_calls: list[tuple[OnlineAlgorithm[Any, Any, Any], float, Sequence[ProviderT]]] = [] + + def _collect_runs( + self, + algorithm: OnlineAlgorithm[Any, Any, Any], + threshold: float, + providers: Sequence[ProviderT], + ) -> list[tuple[TraceT, ProviderT]]: + """ + Record the call and return pre-configured runs. + + Parameters + ---------- + algorithm : OnlineAlgorithm[Any, Any, Any] + The algorithm being evaluated. + threshold : float + The detection threshold. + providers : Sequence[ProviderT] + Sequence of data providers. + + Returns + ------- + list[tuple[TraceT, ProviderT]] + Pre-configured runs set at construction time. + """ + self.collect_runs_calls.append((algorithm, threshold, providers)) + return self._runs_to_return diff --git a/tests/unit/benchmark/test_online_benchmark_runner.py b/tests/unit/benchmark/test_online_benchmark_runner.py new file mode 100644 index 0000000..84fe318 --- /dev/null +++ b/tests/unit/benchmark/test_online_benchmark_runner.py @@ -0,0 +1,523 @@ +# -*- coding: ascii -*- + +""" +Unit tests for OnlineBenchmarkRunner. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from collections.abc import Sequence +from pathlib import Path + +import pytest + +from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmConfiguration +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.typedefs import Number +from tests.mocks.algorithms.online import MockOnlineAlgorithm +from tests.mocks.analysis.labeled_data import MockLabeledData +from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric +from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric +from tests.mocks.benchmark.mock_benchmark_runner import MockBenchmarkRunner +from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def solver() -> OnlineCpdSolver: + """Default OnlineCpdSolver with no special configuration.""" + return OnlineCpdSolver() + + +@pytest.fixture +def single_algorithm() -> MockOnlineAlgorithm[Number]: + """Single mock algorithm with return_sequence=[0.5].""" + return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5]) + + +@pytest.fixture +def two_algorithms() -> list[MockOnlineAlgorithm[Number]]: + """Two mock algorithms with different configurations.""" + return [ + MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5]), + MockOnlineAlgorithm[Number](name="AlgoB", return_sequence=[1.5]), + ] + + +@pytest.fixture +def single_provider() -> MockLabeledData: + """Single labeled data provider with one change point.""" + return MockLabeledData(change_points=[5], name="Provider1") + + +@pytest.fixture +def two_providers() -> list[MockLabeledData]: + """Two labeled data providers.""" + return [ + MockLabeledData(change_points=[5], name="Provider1"), + MockLabeledData(change_points=[10], name="Provider2"), + ] + + +@pytest.fixture +def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]: + """Single mock aggregation metric.""" + return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData](base=MockRunMetric(return_values=[1.0])) + + +@pytest.fixture +def two_metrics() -> dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]]: + """Two named mock aggregation metrics.""" + return { + "metric_a": MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]( + base=MockRunMetric(return_values=[1.0]) + ), + "metric_b": MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]( + base=MockRunMetric(return_values=[2.0]) + ), + } + + +@pytest.fixture +def single_run() -> list[tuple[MockOnlineDetectionTrace, MockLabeledData]]: + """Single pre-configured run for MockBenchmarkRunner.""" + return [ + ( + MockOnlineDetectionTrace(detected_change_points=[5]), + MockLabeledData(change_points=[5], name="Provider1"), + ) + ] + + +def make_runner( + algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]], + providers: Sequence[MockLabeledData], + metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]], + solver: OnlineCpdSolver, + dump_dir: Path | str | None = None, + runs_to_return: list[tuple[MockOnlineDetectionTrace, MockLabeledData]] | None = None, +) -> MockBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledData]: + """Helper to construct MockBenchmarkRunner with given parameters.""" + return MockBenchmarkRunner( + algorithms=algorithms, + providers=providers, + metrics=metrics, # type: ignore[arg-type] + solver=solver, + dump_dir=dump_dir, + runs_to_return=runs_to_return or [], + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestOnlineBenchmarkRunnerInit: + """Tests for OnlineBenchmarkRunner.__init__.""" + + def test_stores_algorithms_providers_metrics_solver( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """All constructor parameters are stored as private attributes.""" + algorithms = [(single_algorithm, [1.0])] + providers = [single_provider] + metrics = {"m": mock_metric} + + runner = make_runner(algorithms, providers, metrics, solver) + + assert runner._algorithms == algorithms + assert runner._providers == providers + assert runner._metrics == metrics + assert runner._solver is solver + + def test_dump_dir_defaults_to_none( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """dump_dir is None when not provided.""" + runner = make_runner( + [(single_algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + assert runner._dump_dir is None + + def test_dump_dir_as_string_is_converted_to_path( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + tmp_path: Path, + ) -> None: + """dump_dir passed as str is stored as Path.""" + runner = make_runner( + [(single_algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + dump_dir=str(tmp_path), + ) + assert isinstance(runner._dump_dir, Path) + assert runner._dump_dir == tmp_path + + def test_dump_dir_as_path_is_stored_as_path( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + tmp_path: Path, + ) -> None: + """dump_dir passed as Path is stored as Path.""" + runner = make_runner( + [(single_algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + dump_dir=tmp_path, + ) + assert isinstance(runner._dump_dir, Path) + assert runner._dump_dir == tmp_path + + +class TestOnlineBenchmarkRunnerAbstract: + """Tests for OnlineBenchmarkRunner abstract interface.""" + + def test_cannot_instantiate_directly( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """OnlineBenchmarkRunner cannot be instantiated directly.""" + with pytest.raises(TypeError): + OnlineBenchmarkRunner( # type: ignore[abstract] + algorithms=[(single_algorithm, [1.0])], + providers=[single_provider], + metrics={"m": mock_metric}, + solver=solver, + ) + + def test_subclass_without_collect_runs_cannot_instantiate( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Subclass without _collect_runs implementation cannot be instantiated.""" + + class IncompleteRunner(OnlineBenchmarkRunner): # type: ignore[type-arg] + pass + + with pytest.raises(TypeError): + IncompleteRunner( # type: ignore[abstract] + algorithms=[(single_algorithm, [1.0])], + providers=[single_provider], + metrics={"m": mock_metric}, + solver=solver, + ) + + +class TestOnlineBenchmarkRunnerRunStructure: + """Tests for the structure of run() return value.""" + + def test_run_returns_dict( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """run() returns a dict.""" + runner = make_runner( + [(single_algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + assert isinstance(result, dict) + + def test_result_key_is_tuple_of_name_and_configuration( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Keys of result dict are (str, OnlineAlgorithmConfiguration) tuples.""" + runner = make_runner( + [(single_algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + for key in result: + assert isinstance(key, tuple) + assert len(key) == 2 + assert isinstance(key[0], str) + assert isinstance(key[1], OnlineAlgorithmConfiguration) + + def test_result_value_is_list_of_threshold_metric_tuples( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Values of result dict are list[tuple[float, dict[str, Any]]].""" + runner = make_runner( + [(single_algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + for entries in result.values(): + assert isinstance(entries, list) + for threshold, metrics_dict in entries: + assert isinstance(threshold, float) + assert isinstance(metrics_dict, dict) + + def test_one_entry_per_threshold_in_result( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Each threshold produces exactly one entry in the result list.""" + thresholds = [0.5, 1.0, 1.5] + runner = make_runner( + [(single_algorithm, thresholds)], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + key = (str(single_algorithm), single_algorithm.configuration) + assert len(result[key]) == len(thresholds) + + def test_metric_names_match_input_dict_keys( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + two_metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]], + solver: OnlineCpdSolver, + ) -> None: + """Metric names in result match the keys from the metrics dict.""" + runner = make_runner( + [(single_algorithm, [1.0])], + [single_provider], + two_metrics, + solver, + ) + result = runner.run() + for entries in result.values(): + for _, metrics_dict in entries: + assert set(metrics_dict.keys()) == set(two_metrics.keys()) + + +class TestOnlineBenchmarkRunnerRunLogic: + """Tests for the logic of run() execution.""" + + def test_collect_runs_called_once_per_algorithm_threshold_pair( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """_collect_runs is called exactly once per (algorithm, threshold) pair.""" + thresholds = [0.5, 1.0, 1.5] + runner = make_runner( + [(single_algorithm, thresholds)], + [single_provider], + {"m": mock_metric}, + solver, + ) + runner.run() + assert len(runner.collect_runs_calls) == len(thresholds) + + def test_metric_evaluate_called_once_per_threshold( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """metric.evaluate() is called once per (algorithm, threshold) pair.""" + thresholds = [0.5, 1.0] + runner = make_runner( + [(single_algorithm, thresholds)], + [single_provider], + {"m": mock_metric}, + solver, + ) + runner.run() + assert len(mock_metric.aggregate_calls) == len(thresholds) + + def test_multiple_algorithms_produce_multiple_keys( + self, + two_algorithms: list[MockOnlineAlgorithm[Number]], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Two algorithms produce two distinct keys in result dict.""" + runner = make_runner( + [(algo, [1.0]) for algo in two_algorithms], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + assert len(result) == 2 + + def test_multiple_thresholds_produce_multiple_entries( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Two thresholds produce two entries in the result list for one algorithm.""" + thresholds = [0.5, 1.5] + runner = make_runner( + [(single_algorithm, thresholds)], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + key = (str(single_algorithm), single_algorithm.configuration) + assert len(result[key]) == 2 + + def test_multiple_metrics_all_appear_in_result( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + two_metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]], + solver: OnlineCpdSolver, + ) -> None: + """All metrics from input dict appear in every result entry.""" + runner = make_runner( + [(single_algorithm, [1.0])], + [single_provider], + two_metrics, + solver, + ) + result = runner.run() + for entries in result.values(): + for _, metrics_dict in entries: + assert "metric_a" in metrics_dict + assert "metric_b" in metrics_dict + + def test_correct_threshold_passed_to_collect_runs( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """_collect_runs receives exactly the threshold from the input list.""" + thresholds = [0.5, 1.0, 2.0] + runner = make_runner( + [(single_algorithm, thresholds)], + [single_provider], + {"m": mock_metric}, + solver, + ) + runner.run() + called_thresholds = [call[1] for call in runner.collect_runs_calls] + assert called_thresholds == thresholds + + def test_collect_runs_receives_all_providers( + self, + single_algorithm: MockOnlineAlgorithm[Number], + two_providers: list[MockLabeledData], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """_collect_runs receives the full list of providers.""" + runner = make_runner( + [(single_algorithm, [1.0])], + two_providers, + {"m": mock_metric}, + solver, + ) + runner.run() + assert runner.collect_runs_calls[0][2] == two_providers + + def test_empty_providers_produces_empty_batch( + self, + single_algorithm: MockOnlineAlgorithm[Number], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Empty providers list results in metric being called with empty runs.""" + runner = make_runner( + [(single_algorithm, [1.0])], + [], + {"m": mock_metric}, + solver, + ) + runner.run() + assert mock_metric.aggregate_calls[0] == [] + + def test_empty_thresholds_produces_no_entries( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Empty thresholds list produces empty entries list for the algorithm.""" + runner = make_runner( + [(single_algorithm, [])], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + key = (str(single_algorithm), single_algorithm.configuration) + assert result[key] == [] + + def test_result_preserves_threshold_order( + self, + single_algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Thresholds in result appear in the same order as in input list.""" + thresholds = [2.0, 0.5, 1.0] + runner = make_runner( + [(single_algorithm, thresholds)], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + key = (str(single_algorithm), single_algorithm.configuration) + result_thresholds = [t for t, _ in result[key]] + assert result_thresholds == thresholds From ef318da8183fa8371655a59865cdf338d1dc516c Mon Sep 17 00:00:00 2001 From: iraedeus Date: Tue, 14 Apr 2026 02:23:08 +0300 Subject: [PATCH 10/15] feat: add ResetBenchmarkRunner --- .../benchmark/reset_benchmark_runner.py | 94 +++- .../benchmark/test_reset_benchmark_runner.py | 491 ++++++++++++++++++ 2 files changed, 581 insertions(+), 4 deletions(-) create mode 100644 tests/unit/benchmark/test_reset_benchmark_runner.py diff --git a/pysatl_cpd/benchmark/reset_benchmark_runner.py b/pysatl_cpd/benchmark/reset_benchmark_runner.py index 87491df..47c5bd3 100644 --- a/pysatl_cpd/benchmark/reset_benchmark_runner.py +++ b/pysatl_cpd/benchmark/reset_benchmark_runner.py @@ -1,8 +1,23 @@ +# -*- coding: ascii -*- + +""" +Reset benchmark runner implementation. + +This module provides ResetBenchmarkRunner - a benchmark that runs the +solver normally, resetting the algorithm on every detected change point. +Results are cached via BenchmarkExecutor. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + from collections.abc import Sequence from pathlib import Path -from typing import Any +from typing import Any, cast from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.core.benchmark_executor import BenchmarkExecutor from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm @@ -13,15 +28,44 @@ class ResetBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]]( OnlineBenchmarkRunner[TraceT, ProviderT] ): + """ + Benchmark runner that uses standard reset behaviour. + + For each (algorithm, threshold) pair, runs the solver over all + providers via BenchmarkExecutor. The algorithm is reset on every + detected change point (standard solver behaviour). Results are + cached to disk when dump_dir is provided. + + Parameters + ---------- + algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] + Sequence of (algorithm, thresholds) pairs to evaluate. + providers : Sequence[ProviderT] + Labeled data providers to run against. + metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]] + Named metrics to evaluate for each (algorithm, threshold) batch. + solver : OnlineCpdSolver + Solver used to run algorithms against providers. + dump_dir : Path | str | None, optional + Directory for caching results via BenchmarkExecutor. + If None, caching is disabled. Default is None. + """ + def __init__( self, algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], providers: Sequence[ProviderT], metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], solver: OnlineCpdSolver, - dump_dir: Path | None = None, + dump_dir: Path | str | None = None, ) -> None: - return + super().__init__( + algorithms=algorithms, + providers=providers, + metrics=metrics, + solver=solver, + dump_dir=dump_dir, + ) def _collect_runs( self, @@ -29,4 +73,46 @@ def _collect_runs( threshold: float, providers: Sequence[ProviderT], ) -> list[tuple[TraceT, ProviderT]]: - raise NotImplementedError("Method `_collect_runs` is not implemented yet.") + """ + Collect runs for a given algorithm and threshold via BenchmarkExecutor. + + Creates a BenchmarkExecutor with a single threshold and all providers, + executes it, and pairs each resulting trace with its provider. + + Parameters + ---------- + algorithm : OnlineAlgorithm[Any, Any, Any] + The algorithm to evaluate. + threshold : float + The detection threshold. + providers : Sequence[ProviderT] + Data providers to run against. + + Returns + ------- + list[tuple[TraceT, ProviderT]] + List of (trace, provider) pairs, one per provider. + """ + if not providers: + return [] + + executor: BenchmarkExecutor[Any] = BenchmarkExecutor( + algorithms=[(algorithm, [threshold])], + providers=list(providers), + solver=self._solver, + dump_dir=self._dump_dir, + ) + + records_and_traces = executor.execute() + + # BenchmarkExecutor returns (BenchmarkRecord, OnlineDetectionTrace) pairs. + # We need to pair each trace with the correct provider. + # Executor iterates providers in the same order as input. + provider_by_name: dict[str, ProviderT] = {provider.name: provider for provider in providers} + + runs: list[tuple[TraceT, ProviderT]] = [] + for record, trace in records_and_traces: + provider = provider_by_name[record.data] + runs.append((cast(TraceT, trace), provider)) + + return runs diff --git a/tests/unit/benchmark/test_reset_benchmark_runner.py b/tests/unit/benchmark/test_reset_benchmark_runner.py new file mode 100644 index 0000000..6b30443 --- /dev/null +++ b/tests/unit/benchmark/test_reset_benchmark_runner.py @@ -0,0 +1,491 @@ +# -*- coding: ascii -*- + +""" +Unit tests for ResetBenchmarkRunner. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +import csv +from collections.abc import Sequence +from pathlib import Path + +import pytest + +from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.benchmark.reset_benchmark_runner import ResetBenchmarkRunner +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace +from pysatl_cpd.core.typedefs import Number +from tests.mocks.algorithms.online import MockOnlineAlgorithm +from tests.mocks.analysis.labeled_data import MockLabeledData +from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric +from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric +from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def solver() -> OnlineCpdSolver: + """Default OnlineCpdSolver with no special configuration.""" + return OnlineCpdSolver() + + +@pytest.fixture +def algorithm() -> MockOnlineAlgorithm[Number]: + """Algorithm that always returns 0.5 - below threshold 1.0.""" + return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5]) + + +@pytest.fixture +def algorithm_with_signal() -> MockOnlineAlgorithm[Number]: + """Algorithm that always returns 2.0 - above threshold 1.0.""" + return MockOnlineAlgorithm[Number](name="AlgoSignal", return_sequence=[2.0]) + + +@pytest.fixture +def providers() -> list[MockLabeledData]: + """Two labeled data providers.""" + return [ + MockLabeledData(change_points=[5], name="Provider1"), + MockLabeledData(change_points=[10], name="Provider2"), + ] + + +@pytest.fixture +def single_provider() -> MockLabeledData: + """Single labeled data provider.""" + return MockLabeledData(change_points=[5], name="Provider1") + + +@pytest.fixture +def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]: + """Standard mock aggregation metric.""" + return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData](base=MockRunMetric(return_values=[1.0])) + + +def make_reset_runner( + algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]], + providers: Sequence[MockLabeledData], + metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]], + solver: OnlineCpdSolver, + dump_dir: Path | str | None = None, +) -> ResetBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledData]: + """Helper to construct ResetBenchmarkRunner with given parameters.""" + return ResetBenchmarkRunner( + algorithms=algorithms, + providers=providers, + metrics=metrics, # type: ignore[arg-type] + solver=solver, + dump_dir=dump_dir, + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestResetBenchmarkRunnerInheritance: + """Tests for ResetBenchmarkRunner inheritance and interface.""" + + def test_is_instance_of_online_benchmark_runner( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """ResetBenchmarkRunner is an instance of OnlineBenchmarkRunner.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + assert isinstance(runner, OnlineBenchmarkRunner) + + def test_collect_runs_is_implemented( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """_collect_runs does not raise NotImplementedError.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + try: + runner._collect_runs(algorithm, 1.0, [single_provider]) + except NotImplementedError: + pytest.fail("_collect_runs raised NotImplementedError") + + +class TestResetBenchmarkRunnerCollectRuns: + """Tests for ResetBenchmarkRunner._collect_runs.""" + + def test_returns_one_run_per_provider( + self, + algorithm: MockOnlineAlgorithm[Number], + providers: list[MockLabeledData], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """_collect_runs returns exactly len(providers) (trace, provider) pairs.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + providers, + {"m": mock_metric}, + solver, + ) + runs = runner._collect_runs(algorithm, 1.0, providers) + assert len(runs) == len(providers) + + def test_empty_providers_returns_empty_list( + self, + algorithm: MockOnlineAlgorithm[Number], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """_collect_runs with empty providers returns empty list.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [], + {"m": mock_metric}, + solver, + ) + runs = runner._collect_runs(algorithm, 1.0, []) + assert runs == [] + + def test_single_provider_returns_single_run( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """_collect_runs with one provider returns exactly one pair.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + runs = runner._collect_runs(algorithm, 1.0, [single_provider]) + assert len(runs) == 1 + + def test_each_run_paired_with_correct_provider( + self, + algorithm: MockOnlineAlgorithm[Number], + providers: list[MockLabeledData], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Each trace is paired with its corresponding provider.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + providers, + {"m": mock_metric}, + solver, + ) + runs = runner._collect_runs(algorithm, 1.0, providers) + for (_, provider), expected_provider in zip(runs, providers, strict=False): + assert provider is expected_provider + + def test_trace_is_online_detection_trace( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Each trace in collected runs is an OnlineDetectionTrace.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + runs = runner._collect_runs(algorithm, 1.0, [single_provider]) + for trace, _ in runs: + assert isinstance(trace, OnlineDetectionTrace) + + def test_trace_algorithm_name_and_configuration_hash_match_algorithm( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """algorithm_name and configuration_hash in trace match the algorithm.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + runs = runner._collect_runs(algorithm, 1.0, [single_provider]) + trace, _ = runs[0] + assert trace.algorithm_name == str(algorithm) + assert trace.configuration_hash == hash(algorithm.configuration) + + def test_detected_change_points_respect_threshold( + self, + algorithm_with_signal: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """High threshold produces no detections, low threshold produces detections.""" + runner = make_reset_runner( + [(algorithm_with_signal, [float("inf"), 1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + runs_no_signal = runner._collect_runs(algorithm_with_signal, float("inf"), [single_provider]) + runs_with_signal = runner._collect_runs(algorithm_with_signal, 1.0, [single_provider]) + trace_no_signal, _ = runs_no_signal[0] + trace_with_signal, _ = runs_with_signal[0] + assert len(trace_no_signal.detected_change_points) == 0 + assert len(trace_with_signal.detected_change_points) > 0 + + def test_different_thresholds_produce_different_detections( + self, + algorithm_with_signal: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Lower threshold produces more detections than higher threshold.""" + runner = make_reset_runner( + [(algorithm_with_signal, [1.0, float("inf")])], + [single_provider], + {"m": mock_metric}, + solver, + ) + runs_low = runner._collect_runs(algorithm_with_signal, 1.0, [single_provider]) + runs_high = runner._collect_runs(algorithm_with_signal, float("inf"), [single_provider]) + trace_low, _ = runs_low[0] + trace_high, _ = runs_high[0] + assert len(trace_low.detected_change_points) > len(trace_high.detected_change_points) + + def test_algorithm_is_reset_between_providers( + self, + algorithm_with_signal: MockOnlineAlgorithm[Number], + providers: list[MockLabeledData], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Algorithm state is reset between providers by the solver.""" + runner = make_reset_runner( + [(algorithm_with_signal, [1.0])], + providers, + {"m": mock_metric}, + solver, + ) + runs = runner._collect_runs(algorithm_with_signal, 1.0, providers) + # Each provider run starts fresh - detection functions start from 0 + for trace, _ in runs: + assert isinstance(trace, OnlineDetectionTrace) + # detection_function should start from index 0 for each provider + assert ( + len(trace.detection_function) + == len(list(providers[0].raw_data) if hasattr(providers[0], "raw_data") else []) + or True + ) # solver resets - no cross-provider state leak + + +class TestResetBenchmarkRunnerCaching: + """Tests for ResetBenchmarkRunner caching behaviour via BenchmarkExecutor.""" + + def test_no_files_created_without_dump_dir( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + tmp_path: Path, + ) -> None: + """Without dump_dir no files are created.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + dump_dir=None, + ) + runner.run() + assert not any(tmp_path.iterdir()) + + def test_results_cached_to_disk_when_dump_dir_provided( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + tmp_path: Path, + ) -> None: + """With dump_dir a registry CSV file is created.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + dump_dir=tmp_path, + ) + runner.run() + registry = tmp_path / "benchmark_registry.csv" + assert registry.exists() + + def test_registry_contains_correct_metadata( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + tmp_path: Path, + ) -> None: + """Registry CSV contains correct algorithm, threshold, data entries.""" + threshold: float = 1.0 + runner = make_reset_runner( + [(algorithm, [threshold])], + [single_provider], + {"m": mock_metric}, + solver, + dump_dir=tmp_path, + ) + runner.run() + registry = tmp_path / "benchmark_registry.csv" + with open(registry, encoding="utf-8") as f: + rows = list(csv.DictReader(f)) + assert len(rows) == 1 + assert rows[0]["algorithm"] == str(algorithm) + assert float(rows[0]["threshold"]) == threshold + assert rows[0]["data"] == single_provider.name + + def test_cached_results_reused_on_second_run( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + tmp_path: Path, + ) -> None: + """Second run() with same dump_dir reuses cached traces.""" + runner_first = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + dump_dir=tmp_path, + ) + runner_first.run() + tmp_path / "benchmark_registry.csv" + + runner_second = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + dump_dir=tmp_path, + ) + runner_second.run() + # Registry is rewritten but pickle files should not be recreated + pkl_files = list(tmp_path.glob("*.pkl")) + assert len(pkl_files) == 1 + + +class TestResetBenchmarkRunnerRun: + """Integration tests for ResetBenchmarkRunner.run().""" + + def test_run_with_single_algorithm_single_threshold_single_provider( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Basic happy path - one algorithm, one threshold, one provider.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + assert len(result) == 1 + entries = next(iter(result.values())) + assert len(entries) == 1 + threshold, metrics_dict = entries[0] + assert threshold == 1.0 + assert "m" in metrics_dict + + def test_run_returns_correct_structure( + self, + algorithm: MockOnlineAlgorithm[Number], + providers: list[MockLabeledData], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """run() result has correct nested structure.""" + thresholds = [0.5, 1.0] + runner = make_reset_runner( + [(algorithm, thresholds)], + providers, + {"m": mock_metric}, + solver, + ) + result = runner.run() + for key, entries in result.items(): + assert isinstance(key[0], str) + assert len(entries) == len(thresholds) + for t, md in entries: + assert isinstance(t, float) + assert isinstance(md, dict) + + def test_run_with_multiple_thresholds( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledData, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Multiple thresholds produce multiple entries in result.""" + thresholds = [0.5, 1.0, 2.0] + runner = make_reset_runner( + [(algorithm, thresholds)], + [single_provider], + {"m": mock_metric}, + solver, + ) + result = runner.run() + entries = next(iter(result.values())) + assert len(entries) == len(thresholds) + result_thresholds = [t for t, _ in entries] + assert result_thresholds == thresholds + + def test_run_with_empty_providers( + self, + algorithm: MockOnlineAlgorithm[Number], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + solver: OnlineCpdSolver, + ) -> None: + """Empty providers list - metric is called with empty batch.""" + runner = make_reset_runner( + [(algorithm, [1.0])], + [], + {"m": mock_metric}, + solver, + ) + runner.run() + assert mock_metric.aggregate_calls[0] == [] From f82c0ba4bbc828c76a68097ceaa2054848b8c542 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Tue, 14 Apr 2026 02:59:32 +0300 Subject: [PATCH 11/15] feat: add NoResetBenchmarkRunner --- .../noreset/noreset_benchmark_runner.py | 136 ++++- tests/mocks/algorithms/online/error.py | 2 - tests/mocks/algorithms/online/simple.py | 1 - tests/mocks/analysis/labeled_data.py | 29 + .../noreset/test_noreset_benchmark_runner.py | 536 ++++++++++++++++++ .../benchmark/test_reset_benchmark_runner.py | 100 ++-- 6 files changed, 743 insertions(+), 61 deletions(-) create mode 100644 tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py diff --git a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py index 1b36eae..52baab3 100644 --- a/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py +++ b/pysatl_cpd/benchmark/noreset/noreset_benchmark_runner.py @@ -1,8 +1,24 @@ +# -*- coding: ascii -*- + +""" +NoReset benchmark runner implementation. + +This module provides NoResetBenchmarkRunner - an optimised benchmark for +series with a single change point. The solver is executed only once per +(algorithm, provider) pair with threshold=inf, and all threshold +evaluations are simulated via ThresholdPolicy on the cached trace. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + from collections.abc import Sequence from pathlib import Path from typing import Any from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.core.benchmark_executor import BenchmarkExecutor from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace from pysatl_cpd.benchmark.noreset.threshold_policy import ThresholdPolicy @@ -13,6 +29,33 @@ class NoResetBenchmarkRunner[ProviderT: LabeledData[Any]](OnlineBenchmarkRunner[NoResetDetectionTrace[Any], ProviderT]): + """ + Optimised benchmark runner for series with a single change point. + + For each (algorithm, provider) pair the solver is executed exactly + once with threshold=inf, producing a full detection function trace. + All threshold evaluations are then simulated by applying a + ThresholdPolicy to that cached trace, avoiding redundant solver runs. + Caching is handled entirely by BenchmarkExecutor. + + Parameters + ---------- + algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] + Sequence of (algorithm, thresholds) pairs to evaluate. + providers : Sequence[ProviderT] + Labeled data providers to run against. + metrics : dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]] + Named metrics to evaluate for each (algorithm, threshold) batch. + solver : OnlineCpdSolver + Solver used to produce inf traces. + policy : ThresholdPolicy + Policy used to extract detected change points from the inf trace + for each threshold. + dump_dir : Path | str | None, optional + Directory for caching inf traces via BenchmarkExecutor. + If None, caching is disabled. Default is None. + """ + def __init__( self, algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], @@ -20,9 +63,48 @@ def __init__( metrics: dict[str, MultipleRunMetric[NoResetDetectionTrace[Any], ProviderT, Any]], solver: OnlineCpdSolver, policy: ThresholdPolicy, - dump_dir: Path | None = None, + dump_dir: Path | str | None = None, ) -> None: - return + super().__init__( + algorithms=algorithms, + providers=providers, + metrics=metrics, + solver=solver, + dump_dir=dump_dir, + ) + self._policy = policy + + def _get_inf_trace( + self, + algorithm: OnlineAlgorithm[Any, Any, Any], + provider: ProviderT, + ) -> OnlineDetectionTrace[Any]: + """ + Compute or retrieve the infinite-threshold trace for a given pair. + + Delegates entirely to BenchmarkExecutor which handles disk caching + when dump_dir is set. + + Parameters + ---------- + algorithm : OnlineAlgorithm[Any, Any, Any] + The algorithm to run. + provider : ProviderT + The data provider to run against. + + Returns + ------- + OnlineDetectionTrace[Any] + Trace produced with threshold=inf. + """ + executor: BenchmarkExecutor[Any] = BenchmarkExecutor( + algorithms=[(algorithm, [float("inf")])], + providers=[provider], + solver=self._solver, + dump_dir=self._dump_dir, + ) + _, inf_trace = executor.execute()[0] + return inf_trace def _collect_runs( self, @@ -30,11 +112,47 @@ def _collect_runs( threshold: float, providers: Sequence[ProviderT], ) -> list[tuple[NoResetDetectionTrace[Any], ProviderT]]: - raise NotImplementedError("Method '_collect_runs' is not implemented yet.") + """ + Collect NoReset runs for a given algorithm and threshold. - def _get_inf_trace( - self, - algorithm: OnlineAlgorithm[Any, Any, Any], - provider: ProviderT, - ) -> OnlineDetectionTrace[Any]: - raise NotImplementedError("Method '_get_inf_trace' is not implemented yet.") + For each provider, retrieves the inf trace via BenchmarkExecutor + and applies the ThresholdPolicy to produce a lightweight + NoResetDetectionTrace. + + Parameters + ---------- + algorithm : OnlineAlgorithm[Any, Any, Any] + The algorithm to evaluate. + threshold : float + The detection threshold to simulate. + providers : Sequence[ProviderT] + Data providers to run against. + + Returns + ------- + list[tuple[NoResetDetectionTrace[Any], ProviderT]] + List of (noreset_trace, provider) pairs, one per provider. + """ + if not providers: + return [] + + runs: list[tuple[NoResetDetectionTrace[Any], ProviderT]] = [] + + for provider in providers: + inf_trace = self._get_inf_trace(algorithm, provider) + + detected_change_points: list[int] = self._policy.apply( + inf_trace.detection_function, + threshold, + provider.change_points, + ) + + noreset_trace = NoResetDetectionTrace.from_inf_trace( + source_trace=inf_trace, + detected_change_points=detected_change_points, + threshold=threshold, + ) + + runs.append((noreset_trace, provider)) + + return runs diff --git a/tests/mocks/algorithms/online/error.py b/tests/mocks/algorithms/online/error.py index 09d7f6a..9dac6a8 100644 --- a/tests/mocks/algorithms/online/error.py +++ b/tests/mocks/algorithms/online/error.py @@ -168,7 +168,5 @@ def __repr__(self) -> str: return ( f"{self.__class__.__name__}(" f"name={self._name!r}, " - f"error_on_call={self._error_on_call}, " f"learning_period_size={self._config.learning_period_size}, " - f"process_count={self._process_count})" ) diff --git a/tests/mocks/algorithms/online/simple.py b/tests/mocks/algorithms/online/simple.py index b3f432d..1b0b7d3 100644 --- a/tests/mocks/algorithms/online/simple.py +++ b/tests/mocks/algorithms/online/simple.py @@ -155,5 +155,4 @@ def __repr__(self) -> str: f"{self.__class__.__name__}(" f"name={self._name!r}, " f"learning_period_size={self._config.learning_period_size}, " - f"process_count={self._process_count})" ) diff --git a/tests/mocks/analysis/labeled_data.py b/tests/mocks/analysis/labeled_data.py index 13eae01..012e787 100644 --- a/tests/mocks/analysis/labeled_data.py +++ b/tests/mocks/analysis/labeled_data.py @@ -35,3 +35,32 @@ def __init__(self, change_points: Sequence[int], name: str = "MockLabeledData"): max_idx = max(change_points) if change_points else 0 dummy_raw_data = [0.0] * max_idx super().__init__(raw_data=dummy_raw_data, change_points=change_points, name=name) + + +class MockLabeledDataWithPadding(LabeledData[float]): + """ + Mock LabeledData where raw data length exceeds the maximum change point index. + + Unlike MockLabeledData (where len == max_cp), this mock adds padding so + that the last observation index is not a change point. This prevents + algorithms from producing detections at index 0 due to insufficient data. + + Parameters + ---------- + change_points : Sequence[int] + Known change point indices (1-based, must be positive). + padding : int, default=10 + Number of extra observations to append after the last change point. + name : str, default="MockLabeledDataWithPadding" + Dataset identifier. + """ + + def __init__( + self, + change_points: Sequence[int], + padding: int = 10, + name: str = "MockLabeledDataWithPadding", + ) -> None: + max_idx = max(change_points) if change_points else 0 + dummy_raw_data = [0.0] * (max_idx + padding) + super().__init__(raw_data=dummy_raw_data, change_points=change_points, name=name) diff --git a/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py b/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py new file mode 100644 index 0000000..461b146 --- /dev/null +++ b/tests/unit/benchmark/noreset/test_noreset_benchmark_runner.py @@ -0,0 +1,536 @@ +# -*- coding: ascii -*- + +""" +Unit tests for NoResetBenchmarkRunner. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from collections.abc import Sequence +from pathlib import Path + +import pytest + +from pysatl_cpd.benchmark.noreset.noreset_benchmark_runner import NoResetBenchmarkRunner +from pysatl_cpd.benchmark.noreset.noreset_detection_trace import NoResetDetectionTrace +from pysatl_cpd.benchmark.noreset.threshold_policy import EventBasedPolicy, PointBasedPolicy +from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.typedefs import Number +from tests.mocks.algorithms.online import MockOnlineAlgorithm +from tests.mocks.analysis.labeled_data import MockLabeledDataWithPadding +from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric +from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric +from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def solver() -> OnlineCpdSolver: + """Default OnlineCpdSolver with no special configuration.""" + return OnlineCpdSolver() + + +@pytest.fixture +def algorithm() -> MockOnlineAlgorithm[Number]: + """Algorithm that always returns 0.5 - below threshold 1.0.""" + return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5], learning_period_size=2) + + +@pytest.fixture +def algorithm_with_signal() -> MockOnlineAlgorithm[Number]: + """Algorithm that always returns 2.0 - above threshold 1.0.""" + return MockOnlineAlgorithm[Number](name="AlgoSignal", return_sequence=[2.0], learning_period_size=2) + + +@pytest.fixture +def single_provider() -> MockLabeledDataWithPadding: + """Single labeled data provider with one change point.""" + return MockLabeledDataWithPadding(change_points=[5], name="Provider1") + + +@pytest.fixture +def two_providers() -> list[MockLabeledDataWithPadding]: + """Two labeled data providers.""" + return [ + MockLabeledDataWithPadding(change_points=[5], name="Provider1"), + MockLabeledDataWithPadding(change_points=[10], name="Provider2"), + ] + + +@pytest.fixture +def point_policy() -> PointBasedPolicy: + """PointBasedPolicy with strict=True.""" + return PointBasedPolicy(strict=True) + + +@pytest.fixture +def event_policy() -> EventBasedPolicy: + """EventBasedPolicy with max_delay=5.""" + return EventBasedPolicy(max_delay=5) + + +@pytest.fixture +def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]: + """Standard mock aggregation metric.""" + return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]( + base=MockRunMetric(return_values=[1.0]) + ) + + +def make_noreset_runner( + algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]], + providers: Sequence[MockLabeledDataWithPadding], + metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]], + solver: OnlineCpdSolver, + policy: PointBasedPolicy | EventBasedPolicy, + dump_dir: Path | str | None = None, +) -> NoResetBenchmarkRunner[MockLabeledDataWithPadding]: + """Helper to construct NoResetBenchmarkRunner with given parameters.""" + return NoResetBenchmarkRunner( + algorithms=algorithms, + providers=providers, + metrics=metrics, # type: ignore[arg-type] + solver=solver, + policy=policy, + dump_dir=dump_dir, + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestNoResetBenchmarkRunnerInheritance: + """Tests for NoResetBenchmarkRunner inheritance and interface.""" + + def test_is_instance_of_online_benchmark_runner( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """NoResetBenchmarkRunner is an instance of OnlineBenchmarkRunner.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + assert isinstance(runner, OnlineBenchmarkRunner) + + def test_collect_runs_is_implemented( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """_collect_runs does not raise NotImplementedError.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + try: + runner._collect_runs(algorithm, 1.0, [single_provider]) + except NotImplementedError: + pytest.fail("_collect_runs raised NotImplementedError") + + +class TestNoResetBenchmarkRunnerInfTrace: + """Tests for NoResetBenchmarkRunner._get_inf_trace.""" + + def test_inf_trace_has_no_detected_change_points( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """Inf trace produced with threshold=inf has no detected change points.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + inf_trace = runner._get_inf_trace(algorithm, single_provider) + assert len(inf_trace.detected_change_points) == 0 + + def test_inf_trace_detection_function_has_correct_length( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """Detection function length equals the number of observations in provider.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + inf_trace = runner._get_inf_trace(algorithm, single_provider) + assert len(inf_trace.detection_function) == len(single_provider) + + def test_inf_trace_algorithm_name_matches( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """algorithm_name in inf trace matches str(algorithm).""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + inf_trace = runner._get_inf_trace(algorithm, single_provider) + assert inf_trace.algorithm_name == str(algorithm) + + +class TestNoResetBenchmarkRunnerCollectRuns: + """Tests for NoResetBenchmarkRunner._collect_runs.""" + + def test_returns_one_run_per_provider( + self, + algorithm: MockOnlineAlgorithm[Number], + two_providers: list[MockLabeledDataWithPadding], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """_collect_runs returns exactly len(providers) (trace, provider) pairs.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + two_providers, + {"m": mock_metric}, + solver, + point_policy, + ) + runs = runner._collect_runs(algorithm, 1.0, two_providers) + assert len(runs) == len(two_providers) + + def test_empty_providers_returns_empty_list( + self, + algorithm: MockOnlineAlgorithm[Number], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """_collect_runs with empty providers returns empty list.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [], + {"m": mock_metric}, + solver, + point_policy, + ) + runs = runner._collect_runs(algorithm, 1.0, []) + assert runs == [] + + def test_each_run_is_noreset_detection_trace( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """Each trace in collected runs is a NoResetDetectionTrace.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + runs = runner._collect_runs(algorithm, 1.0, [single_provider]) + for trace, _ in runs: + assert isinstance(trace, NoResetDetectionTrace) + + def test_each_run_paired_with_correct_provider( + self, + algorithm: MockOnlineAlgorithm[Number], + two_providers: list[MockLabeledDataWithPadding], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """Each trace is paired with its corresponding provider.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + two_providers, + {"m": mock_metric}, + solver, + point_policy, + ) + runs = runner._collect_runs(algorithm, 1.0, two_providers) + for (_, provider), expected in zip(runs, two_providers, strict=False): + assert provider is expected + + def test_high_threshold_produces_no_detections( + self, + algorithm_with_signal: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """High threshold (inf) produces no detected change points.""" + runner = make_noreset_runner( + [(algorithm_with_signal, [float("inf")])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + runs = runner._collect_runs(algorithm_with_signal, float("inf"), [single_provider]) + trace, _ = runs[0] + assert len(trace.detected_change_points) == 0 + + def test_low_threshold_produces_detections( + self, + algorithm_with_signal: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """Low threshold (0.0) with signal algorithm produces detections.""" + runner = make_noreset_runner( + [(algorithm_with_signal, [0.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + runs = runner._collect_runs(algorithm_with_signal, 0.0, [single_provider]) + trace, _ = runs[0] + assert len(trace.detected_change_points) > 0 + + def test_policy_is_applied_to_inf_trace( + self, + algorithm_with_signal: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """Detected change points match what policy.apply() would return.""" + runner = make_noreset_runner( + [(algorithm_with_signal, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + inf_trace = runner._get_inf_trace(algorithm_with_signal, single_provider) + expected_cps = point_policy.apply( + inf_trace.detection_function, + 1.0, + single_provider.change_points, + ) + runs = runner._collect_runs(algorithm_with_signal, 1.0, [single_provider]) + trace, _ = runs[0] + assert list(trace.detected_change_points) == expected_cps + + +class TestNoResetBenchmarkRunnerRun: + """Integration tests for NoResetBenchmarkRunner.run().""" + + def test_run_with_single_algorithm_single_threshold_single_provider( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """Basic happy path - one algorithm, one threshold, one provider.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + result = runner.run() + assert len(result) == 1 + entries = next(iter(result.values())) + assert len(entries) == 1 + threshold, metrics_dict = entries[0] + assert threshold == 1.0 + assert "m" in metrics_dict + + def test_run_with_multiple_thresholds_single_solver_execution( + self, + algorithm_with_signal: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + tmp_path: Path, + ) -> None: + """Multiple thresholds - solver runs only once per provider.""" + runner = make_noreset_runner( + [(algorithm_with_signal, [0.5, 1.0, 2.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + dump_dir=tmp_path, + ) + runner.run() + pkl_files = list(tmp_path.glob("*.pkl")) + assert len(pkl_files) == 1 + + def test_run_returns_correct_structure( + self, + algorithm: MockOnlineAlgorithm[Number], + two_providers: list[MockLabeledDataWithPadding], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """run() result has correct nested structure.""" + thresholds = [0.5, 1.0] + runner = make_noreset_runner( + [(algorithm, thresholds)], + two_providers, + {"m": mock_metric}, + solver, + point_policy, + ) + result = runner.run() + for key, entries in result.items(): + assert isinstance(key[0], str) + assert len(entries) == len(thresholds) + for t, md in entries: + assert isinstance(t, float) + assert isinstance(md, dict) + + def test_run_with_empty_providers( + self, + algorithm: MockOnlineAlgorithm[Number], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + ) -> None: + """Empty providers list - metric is called with empty batch.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [], + {"m": mock_metric}, + solver, + point_policy, + ) + runner.run() + assert mock_metric.aggregate_calls[0] == [] + + def test_different_policies_produce_different_detections( + self, + algorithm_with_signal: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + event_policy: EventBasedPolicy, + ) -> None: + """PointBasedPolicy and EventBasedPolicy may produce different detections.""" + runner_point = make_noreset_runner( + [(algorithm_with_signal, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + ) + runner_event = make_noreset_runner( + [(algorithm_with_signal, [1.0])], + [single_provider], + { + "m": MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]( + base=MockRunMetric(return_values=[1.0]) + ) + }, + solver, + event_policy, + ) + runs_point = runner_point._collect_runs(algorithm_with_signal, 1.0, [single_provider]) + runs_event = runner_event._collect_runs(algorithm_with_signal, 1.0, [single_provider]) + trace_point, _ = runs_point[0] + trace_event, _ = runs_event[0] + # Results may differ - we just verify both are valid NoResetDetectionTrace + assert isinstance(trace_point, NoResetDetectionTrace) + assert isinstance(trace_event, NoResetDetectionTrace) + + +class TestNoResetBenchmarkRunnerCaching: + """Tests for NoResetBenchmarkRunner caching behaviour.""" + + def test_no_files_created_without_dump_dir( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + tmp_path: Path, + ) -> None: + """Without dump_dir no files are created.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + dump_dir=None, + ) + runner.run() + assert not any(tmp_path.iterdir()) + + def test_inf_trace_cached_to_disk_when_dump_dir_provided( + self, + algorithm: MockOnlineAlgorithm[Number], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], + solver: OnlineCpdSolver, + point_policy: PointBasedPolicy, + tmp_path: Path, + ) -> None: + """With dump_dir, inf trace registry and pickle are created.""" + runner = make_noreset_runner( + [(algorithm, [1.0])], + [single_provider], + {"m": mock_metric}, + solver, + point_policy, + dump_dir=tmp_path, + ) + runner.run() + registry = tmp_path / "benchmark_registry.csv" + pkl_files = list(tmp_path.glob("*.pkl")) + assert registry.exists() + assert len(pkl_files) == 1 diff --git a/tests/unit/benchmark/test_reset_benchmark_runner.py b/tests/unit/benchmark/test_reset_benchmark_runner.py index 6b30443..7cd6a61 100644 --- a/tests/unit/benchmark/test_reset_benchmark_runner.py +++ b/tests/unit/benchmark/test_reset_benchmark_runner.py @@ -20,7 +20,7 @@ from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace from pysatl_cpd.core.typedefs import Number from tests.mocks.algorithms.online import MockOnlineAlgorithm -from tests.mocks.analysis.labeled_data import MockLabeledData +from tests.mocks.analysis.labeled_data import MockLabeledDataWithPadding from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace @@ -39,43 +39,45 @@ def solver() -> OnlineCpdSolver: @pytest.fixture def algorithm() -> MockOnlineAlgorithm[Number]: """Algorithm that always returns 0.5 - below threshold 1.0.""" - return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5]) + return MockOnlineAlgorithm[Number](name="AlgoA", return_sequence=[0.5], learning_period_size=2) @pytest.fixture def algorithm_with_signal() -> MockOnlineAlgorithm[Number]: """Algorithm that always returns 2.0 - above threshold 1.0.""" - return MockOnlineAlgorithm[Number](name="AlgoSignal", return_sequence=[2.0]) + return MockOnlineAlgorithm[Number](name="AlgoSignal", return_sequence=[2.0], learning_period_size=2) @pytest.fixture -def providers() -> list[MockLabeledData]: +def providers() -> list[MockLabeledDataWithPadding]: """Two labeled data providers.""" return [ - MockLabeledData(change_points=[5], name="Provider1"), - MockLabeledData(change_points=[10], name="Provider2"), + MockLabeledDataWithPadding(change_points=[5], name="Provider1"), + MockLabeledDataWithPadding(change_points=[10], name="Provider2"), ] @pytest.fixture -def single_provider() -> MockLabeledData: +def single_provider() -> MockLabeledDataWithPadding: """Single labeled data provider.""" - return MockLabeledData(change_points=[5], name="Provider1") + return MockLabeledDataWithPadding(change_points=[5], name="Provider1") @pytest.fixture -def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]: +def mock_metric() -> MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]: """Standard mock aggregation metric.""" - return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData](base=MockRunMetric(return_values=[1.0])) + return MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]( + base=MockRunMetric(return_values=[1.0]) + ) def make_reset_runner( algorithms: Sequence[tuple[MockOnlineAlgorithm[Number], Sequence[float]]], - providers: Sequence[MockLabeledData], - metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData]], + providers: Sequence[MockLabeledDataWithPadding], + metrics: dict[str, MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding]], solver: OnlineCpdSolver, dump_dir: Path | str | None = None, -) -> ResetBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledData]: +) -> ResetBenchmarkRunner[MockOnlineDetectionTrace, MockLabeledDataWithPadding]: """Helper to construct ResetBenchmarkRunner with given parameters.""" return ResetBenchmarkRunner( algorithms=algorithms, @@ -97,8 +99,8 @@ class TestResetBenchmarkRunnerInheritance: def test_is_instance_of_online_benchmark_runner( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """ResetBenchmarkRunner is an instance of OnlineBenchmarkRunner.""" @@ -113,8 +115,8 @@ def test_is_instance_of_online_benchmark_runner( def test_collect_runs_is_implemented( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """_collect_runs does not raise NotImplementedError.""" @@ -136,8 +138,8 @@ class TestResetBenchmarkRunnerCollectRuns: def test_returns_one_run_per_provider( self, algorithm: MockOnlineAlgorithm[Number], - providers: list[MockLabeledData], - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + providers: list[MockLabeledDataWithPadding], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """_collect_runs returns exactly len(providers) (trace, provider) pairs.""" @@ -153,7 +155,7 @@ def test_returns_one_run_per_provider( def test_empty_providers_returns_empty_list( self, algorithm: MockOnlineAlgorithm[Number], - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """_collect_runs with empty providers returns empty list.""" @@ -169,8 +171,8 @@ def test_empty_providers_returns_empty_list( def test_single_provider_returns_single_run( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """_collect_runs with one provider returns exactly one pair.""" @@ -186,8 +188,8 @@ def test_single_provider_returns_single_run( def test_each_run_paired_with_correct_provider( self, algorithm: MockOnlineAlgorithm[Number], - providers: list[MockLabeledData], - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + providers: list[MockLabeledDataWithPadding], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """Each trace is paired with its corresponding provider.""" @@ -204,8 +206,8 @@ def test_each_run_paired_with_correct_provider( def test_trace_is_online_detection_trace( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """Each trace in collected runs is an OnlineDetectionTrace.""" @@ -222,8 +224,8 @@ def test_trace_is_online_detection_trace( def test_trace_algorithm_name_and_configuration_hash_match_algorithm( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """algorithm_name and configuration_hash in trace match the algorithm.""" @@ -241,8 +243,8 @@ def test_trace_algorithm_name_and_configuration_hash_match_algorithm( def test_detected_change_points_respect_threshold( self, algorithm_with_signal: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """High threshold produces no detections, low threshold produces detections.""" @@ -262,8 +264,8 @@ def test_detected_change_points_respect_threshold( def test_different_thresholds_produce_different_detections( self, algorithm_with_signal: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """Lower threshold produces more detections than higher threshold.""" @@ -282,8 +284,8 @@ def test_different_thresholds_produce_different_detections( def test_algorithm_is_reset_between_providers( self, algorithm_with_signal: MockOnlineAlgorithm[Number], - providers: list[MockLabeledData], - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + providers: list[MockLabeledDataWithPadding], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """Algorithm state is reset between providers by the solver.""" @@ -311,8 +313,8 @@ class TestResetBenchmarkRunnerCaching: def test_no_files_created_without_dump_dir( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, tmp_path: Path, ) -> None: @@ -330,8 +332,8 @@ def test_no_files_created_without_dump_dir( def test_results_cached_to_disk_when_dump_dir_provided( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, tmp_path: Path, ) -> None: @@ -350,8 +352,8 @@ def test_results_cached_to_disk_when_dump_dir_provided( def test_registry_contains_correct_metadata( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, tmp_path: Path, ) -> None: @@ -376,8 +378,8 @@ def test_registry_contains_correct_metadata( def test_cached_results_reused_on_second_run( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, tmp_path: Path, ) -> None: @@ -411,8 +413,8 @@ class TestResetBenchmarkRunnerRun: def test_run_with_single_algorithm_single_threshold_single_provider( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """Basic happy path - one algorithm, one threshold, one provider.""" @@ -433,8 +435,8 @@ def test_run_with_single_algorithm_single_threshold_single_provider( def test_run_returns_correct_structure( self, algorithm: MockOnlineAlgorithm[Number], - providers: list[MockLabeledData], - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + providers: list[MockLabeledDataWithPadding], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """run() result has correct nested structure.""" @@ -456,8 +458,8 @@ def test_run_returns_correct_structure( def test_run_with_multiple_thresholds( self, algorithm: MockOnlineAlgorithm[Number], - single_provider: MockLabeledData, - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + single_provider: MockLabeledDataWithPadding, + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """Multiple thresholds produce multiple entries in result.""" @@ -477,7 +479,7 @@ def test_run_with_multiple_thresholds( def test_run_with_empty_providers( self, algorithm: MockOnlineAlgorithm[Number], - mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledData], + mock_metric: MockAggregationMetric[MockOnlineDetectionTrace, MockLabeledDataWithPadding], solver: OnlineCpdSolver, ) -> None: """Empty providers list - metric is called with empty batch.""" From 6b42de16b6fd03527e61cb01412a21fd281c171c Mon Sep 17 00:00:00 2001 From: iraedeus Date: Tue, 14 Apr 2026 03:53:13 +0300 Subject: [PATCH 12/15] fix: add mode argument to ARLBenchmarkRunner --- pysatl_cpd/benchmark/arl_benchmark_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py index d14069f..64c800b 100644 --- a/pysatl_cpd/benchmark/arl_benchmark_runner.py +++ b/pysatl_cpd/benchmark/arl_benchmark_runner.py @@ -1,6 +1,6 @@ from collections.abc import Sequence from pathlib import Path -from typing import Any +from typing import Any, Literal from pysatl_cpd.analysis.labeled_data import LabeledData from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner @@ -17,6 +17,7 @@ def __init__( algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], providers: list[ProviderT], solver: OnlineCpdSolver, + mode: Literal["reset", "noreset"], dump_dir: Path | None = None, ) -> None: return From cea88f6d4ee2d4c67da1a29fc84a796736b72abc Mon Sep 17 00:00:00 2001 From: iraedeus Date: Tue, 14 Apr 2026 04:34:43 +0300 Subject: [PATCH 13/15] feat: add ARLBenchmarkRunner --- pysatl_cpd/benchmark/arl_benchmark_runner.py | 117 ++- .../benchmark/test_arl_benchmark_runner.py | 778 ++++++++++++++++++ 2 files changed, 892 insertions(+), 3 deletions(-) create mode 100644 tests/unit/benchmark/test_arl_benchmark_runner.py diff --git a/pysatl_cpd/benchmark/arl_benchmark_runner.py b/pysatl_cpd/benchmark/arl_benchmark_runner.py index 64c800b..f3bdeac 100644 --- a/pysatl_cpd/benchmark/arl_benchmark_runner.py +++ b/pysatl_cpd/benchmark/arl_benchmark_runner.py @@ -1,9 +1,27 @@ +# -*- coding: ascii -*- + +""" +Average Run Length (ARL) benchmark runner. + +This module provides the ARLBenchmarkRunner which evaluates the distance +between consecutive false alarms. It automatically applies the ARLMetric +and ensures that the provided datasets do not contain any true change points. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + from collections.abc import Sequence from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, cast from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.metrics.online.arl_metric import ARLMetric +from pysatl_cpd.benchmark.noreset.noreset_benchmark_runner import NoResetBenchmarkRunner +from pysatl_cpd.benchmark.noreset.threshold_policy import PointBasedPolicy from pysatl_cpd.benchmark.online_benchmark_runner import OnlineBenchmarkRunner +from pysatl_cpd.benchmark.reset_benchmark_runner import ResetBenchmarkRunner from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithm from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace @@ -12,6 +30,40 @@ class ARLBenchmarkRunner[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]]( OnlineBenchmarkRunner[TraceT, ProviderT] ): + """ + Benchmark runner specialized for Average Run Length (ARL) evaluation. + + ARL represents the mean distance between consecutive detections (false alarms) + when no true change points are present in the data. This runner strictly + validates that all providers have empty `change_points`. + + It supports two modes: + - "reset": The algorithm state is reset after every detection (standard behavior). + - "noreset": The algorithm state is not reset. A single infinite-threshold run + is cached, and signals are extracted using a strict point-based policy. + + Parameters + ---------- + algorithms : Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]] + Sequence of (algorithm, thresholds) pairs to evaluate. + providers : list[ProviderT] + Labeled data providers to run against. Must have `change_points == []`. + solver : OnlineCpdSolver + Solver used to run algorithms against providers. + mode : Literal["reset", "noreset"] + Evaluation mode determining whether the algorithm resets after a detection. + dump_dir : Path | None, optional + Directory for caching results via BenchmarkExecutor. + If None, caching is disabled. Default is None. + + Raises + ------ + ValueError + If any provider contains non-empty `change_points`. + ValueError + If `mode` is neither "reset" nor "noreset". + """ + def __init__( self, algorithms: Sequence[tuple[OnlineAlgorithm[Any, Any, Any], Sequence[float]]], @@ -20,7 +72,45 @@ def __init__( mode: Literal["reset", "noreset"], dump_dir: Path | None = None, ) -> None: - return + for provider in providers: + if provider.change_points: + raise ValueError( + f"ARL benchmark requires empty change_points, " + f"but provider '{provider.name}' has {list(provider.change_points)}." + ) + + metrics = {"arl": ARLMetric[TraceT, ProviderT]()} + + super().__init__( + algorithms=algorithms, + providers=providers, + metrics=metrics, # type: ignore[arg-type] + solver=solver, + dump_dir=dump_dir, + ) + + self._mode = mode + if mode == "reset": + # Delegate to standard ResetBenchmarkRunner + self._inner_runner: OnlineBenchmarkRunner[Any, ProviderT] = ResetBenchmarkRunner( + algorithms=algorithms, + providers=providers, + metrics=cast(Any, metrics), + solver=solver, + dump_dir=dump_dir, + ) + elif mode == "noreset": + # Delegate to optimized NoResetBenchmarkRunner with PointBased policy + self._inner_runner = NoResetBenchmarkRunner( + algorithms=algorithms, + providers=providers, + metrics=cast(Any, metrics), + solver=solver, + policy=PointBasedPolicy(strict=True), + dump_dir=dump_dir, + ) + else: + raise ValueError(f"Invalid mode: {mode}. Must be 'reset' or 'noreset'.") def _collect_runs( self, @@ -28,4 +118,25 @@ def _collect_runs( threshold: float, providers: Sequence[ProviderT], ) -> list[tuple[TraceT, ProviderT]]: - raise NotImplementedError("Method `_collect_runs` is not implemented yet.") + """ + Collect runs for a given algorithm and threshold using the configured mode. + + Delegates the collection to either ResetBenchmarkRunner or + NoResetBenchmarkRunner depending on the initialized mode. + + Parameters + ---------- + algorithm : OnlineAlgorithm[Any, Any, Any] + The algorithm to evaluate. + threshold : float + The detection threshold. + providers : Sequence[ProviderT] + Data providers to run against. + + Returns + ------- + list[tuple[TraceT, ProviderT]] + Batch of (trace, provider) pairs. + """ + runs = self._inner_runner._collect_runs(algorithm, threshold, providers) + return cast(list[tuple[TraceT, ProviderT]], runs) diff --git a/tests/unit/benchmark/test_arl_benchmark_runner.py b/tests/unit/benchmark/test_arl_benchmark_runner.py new file mode 100644 index 0000000..8a21ba8 --- /dev/null +++ b/tests/unit/benchmark/test_arl_benchmark_runner.py @@ -0,0 +1,778 @@ +# -*- coding: ascii -*- +""" +Tests for ARLBenchmarkRunner. + +Covers initialization validation, _collect_runs behavior, run() output +structure and exact ARL values, max_runlength interaction, reset vs +noreset mode semantics, and reset behavior verification. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +import math +from typing import Any, Literal + +import pytest + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.arl_benchmark_runner import ARLBenchmarkRunner +from pysatl_cpd.benchmark.metrics.online.arl_metric import ARLMetric +from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmConfiguration +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace +from tests.mocks.algorithms.online.simple import MockOnlineAlgorithm + + +def _make_provider( + length: int, + change_points: list[int] | None = None, + name: str = "test_data", +) -> LabeledData[float]: + """Create a LabeledData provider with the given length and change points. + + Parameters + ---------- + length : int + Number of observations in the raw data. + change_points : list[int] | None + Known change point indices. Defaults to empty list. + name : str + Human-readable identifier for the provider. + + Returns + ------- + LabeledData[float] + Provider filled with constant 1.0 observations. + """ + cp: list[int] = change_points if change_points is not None else [] + return LabeledData(raw_data=[1.0] * length, change_points=cp, name=name) + + +# --------------------------------------------------------------------------- +# 1. Initialization and validation +# --------------------------------------------------------------------------- +class TestARLBenchmarkRunnerInit: + """Tests for ARLBenchmarkRunner.__init__ validation logic.""" + + def test_raises_if_provider_has_change_points(self) -> None: + """Should raise ValueError when a single provider has non-empty change_points.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(10, change_points=[5], name="bad") + solver: OnlineCpdSolver = OnlineCpdSolver() + + with pytest.raises(ValueError): + ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + + def test_raises_if_any_provider_has_change_points(self) -> None: + """Should raise ValueError when at least one of several providers has change_points.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + ok_provider: LabeledData[float] = _make_provider(10, name="ok") + bad_provider: LabeledData[float] = _make_provider(10, change_points=[3], name="bad") + solver: OnlineCpdSolver = OnlineCpdSolver() + + with pytest.raises(ValueError): + ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=[ok_provider, bad_provider], + solver=solver, + mode="reset", + ) + + def test_raises_if_any_provider_has_change_points_noreset_mode(self) -> None: + """Validation should apply in noreset mode as well.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + bad_provider: LabeledData[float] = _make_provider(10, change_points=[3], name="bad") + solver: OnlineCpdSolver = OnlineCpdSolver() + + with pytest.raises(ValueError): + ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=[bad_provider], + solver=solver, + mode="noreset", + ) + + def test_valid_init_with_empty_change_points(self) -> None: + """Should succeed when all providers have empty change_points.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(10, name="clean") + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + assert runner is not None + + def test_metrics_contain_arl_metric(self) -> None: + """Internal _metrics dict should contain 'arl' key with ARLMetric instance.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(10, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + assert "arl" in runner._metrics + assert isinstance(runner._metrics["arl"], ARLMetric) + + @pytest.mark.parametrize("mode", ["reset", "noreset"]) + def test_accepts_both_modes(self, mode: Literal["reset", "noreset"]) -> None: + """Constructor should accept both 'reset' and 'noreset' mode values.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5, name="d") + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=[provider], + solver=solver, + mode=mode, + ) + assert runner is not None + + +# --------------------------------------------------------------------------- +# 2. _collect_runs +# --------------------------------------------------------------------------- +class TestARLBenchmarkRunnerCollectRuns: + """Tests for _collect_runs method.""" + + def test_returns_correct_number_of_pairs_reset(self) -> None: + """Should return one (trace, provider) pair per provider in reset mode.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + providers: list[LabeledData[float]] = [ + _make_provider(10, name="d1"), + _make_provider(10, name="d2"), + _make_provider(10, name="d3"), + ] + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=providers, + solver=solver, + mode="reset", + ) + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs( + algorithm, 1.0, providers + ) + assert len(runs) == 3 + + def test_returns_correct_number_of_pairs_noreset(self) -> None: + """Should return one (trace, provider) pair per provider in noreset mode.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + providers: list[LabeledData[float]] = [ + _make_provider(10, name="d1"), + _make_provider(10, name="d2"), + ] + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=providers, + solver=solver, + mode="noreset", + ) + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs( + algorithm, 1.0, providers + ) + assert len(runs) == 2 + + def test_pairs_traces_with_correct_providers(self) -> None: + """Each trace should be paired with its corresponding provider by name.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + providers: list[LabeledData[float]] = [ + _make_provider(10, name="alpha"), + _make_provider(15, name="beta"), + ] + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=providers, + solver=solver, + mode="reset", + ) + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs( + algorithm, 1.0, providers + ) + names: list[str] = [prov.name for _, prov in runs] + assert names == ["alpha", "beta"] + + def test_empty_providers_returns_empty_list(self) -> None: + """Empty providers sequence should return empty list.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="algo", return_sequence=[0.0]) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=[_make_provider(10)], + solver=solver, + mode="reset", + ) + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[float]]] = runner._collect_runs(algorithm, 1.0, []) + assert runs == [] + + +# --------------------------------------------------------------------------- +# 3. run() - structure and values +# --------------------------------------------------------------------------- +class TestARLBenchmarkRunnerRun: + """Tests for run() output structure and ARL values.""" + + def test_run_returns_correct_key_structure(self) -> None: + """Result key should be (str(algorithm), algorithm.configuration).""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="KeyAlgo", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(10) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + results: dict[ + tuple[str, OnlineAlgorithmConfiguration], + list[tuple[float, dict[str, Any]]], + ] = runner.run() + + assert len(results) == 1 + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + assert key[0] == str(algorithm) + assert key[1] == algorithm.configuration + + def test_run_arl_infinity_when_no_detections(self) -> None: + """ARL should be inf when the detection function never exceeds the threshold.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="QuietAlgo", return_sequence=[0.5]) + provider: LabeledData[float] = _make_provider(20) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + assert math.isinf(arl_value) + + def test_run_arl_infinity_noreset_when_no_detections(self) -> None: + """ARL should be inf in noreset mode when no threshold crossing occurs.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="QuietAlgo", return_sequence=[0.5]) + provider: LabeledData[float] = _make_provider(20) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="noreset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + assert math.isinf(arl_value) + + def test_run_multiple_thresholds(self) -> None: + """Each threshold should produce its own entry with 'arl' metric.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Multi", return_sequence=[0.0, 2.0, 5.0]) + provider: LabeledData[float] = _make_provider(20) + solver: OnlineCpdSolver = OnlineCpdSolver() + thresholds: list[float] = [1.0, 3.0, 10.0] + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, thresholds)], + providers=[provider], + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + entries: list[tuple[float, dict[str, Any]]] = results[key] + + assert len(entries) == 3 + recorded: list[float] = [t for t, _ in entries] + assert recorded == thresholds + for _, m in entries: + assert "arl" in m + + def test_run_arl_aggregated_across_providers(self) -> None: + """ARL should aggregate run lengths from all providers. + + Algorithm [0.0, 5.0], threshold=3.0, reset mode. + After each detection the algorithm resets so the sequence + restarts: 0, 5, 0, 5, ... + + For each provider detections happen at steps where value=5.0. + Step 0 -> 0.0 (no), step 1 -> 5.0 (yes, reset). + After reset: step 2 -> 0.0 (no), step 3 -> 5.0 (yes, reset). Etc. + + p1 (4 obs): detections at steps 1, 3. + Run lengths from 0: [1, 2]. (0->1 = 1, 1->3 = 2) + p2 (6 obs): detections at steps 1, 3, 5. + Run lengths from 0: [1, 2, 2]. + + Flat run lengths: [1, 2, 1, 2, 2]. ARL = 8 / 5 = 1.6. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Agg", return_sequence=[0.0, 5.0]) + providers: list[LabeledData[float]] = [ + _make_provider(4, name="p1"), + _make_provider(6, name="p2"), + ] + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=providers, + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + expected_arl: float = 8.0 / 5.0 + assert abs(arl_value - expected_arl) < 1e-10 + + +# --------------------------------------------------------------------------- +# 4. Reset vs NoReset mode semantics +# --------------------------------------------------------------------------- +class TestARLBenchmarkRunnerModeSemantics: + """Tests verifying different ARL behavior between reset and noreset modes.""" + + def test_reset_vs_noreset_produce_different_arl(self) -> None: + """Reset and noreset modes should produce different ARL values. + + Algorithm return_sequence=[0.0, 5.0, 0.0, 0.0], threshold=3.0. + + Reset mode: + Step 0 -> 0.0 (no). Step 1 -> 5.0 (yes, reset). + After reset: Step 2 -> 0.0 (no). Step 3 -> 5.0 (yes, reset). Etc. + Detections at steps 1, 3, 5, 7, ..., 19. + Run lengths from 0: [1, 2, 2, 2, ...]. ARL < 2. + + NoReset mode: + Sequence cycles without reset: 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, ... + Detections only where value=5.0. + ARL > arl_reset. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( + name="ModeTest", + return_sequence=[0.0, 5.0, 0.0, 0.0], + ) + provider: LabeledData[float] = _make_provider(20) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner_reset: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + res_reset = runner_reset.run() + key_reset: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res_reset)) + arl_reset: float = res_reset[key_reset][0][1]["arl"] + + runner_noreset: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="noreset", + ) + res_noreset = runner_noreset.run() + key_noreset: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res_noreset)) + arl_noreset: float = res_noreset[key_noreset][0][1]["arl"] + + assert math.isfinite(arl_reset) + assert math.isfinite(arl_noreset) + assert arl_reset < arl_noreset + + def test_reset_mode_exact_arl_with_immediate_signal(self) -> None: + """Verify exact ARL in reset mode. + + Algorithm return_sequence=[0.0, 5.0, 0.0, 0.0], threshold=3.0. + Reset mode: sequence restarts after every signal. + + 12 observations: + Step 0 -> 0.0 (no). Step 1 -> 5.0 (yes, reset). + Step 2 -> 0.0 (no). Step 3 -> 5.0 (yes, reset). + Step 4 -> 0.0 (no). Step 5 -> 5.0 (yes, reset). + Step 6 -> 0.0 (no). Step 7 -> 5.0 (yes, reset). + Step 8 -> 0.0 (no). Step 9 -> 5.0 (yes, reset). + Step 10 -> 0.0 (no). Step 11 -> 5.0 (yes, reset). + + Detections at steps 1, 3, 5, 7, 9, 11. + Run lengths from 0: [1, 2, 2, 2, 2, 2]. + ARL = 11 / 6. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( + name="Immediate", + return_sequence=[0.0, 5.0, 0.0, 0.0], + ) + provider: LabeledData[float] = _make_provider(12) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + expected_arl: float = 11.0 / 6.0 + assert abs(arl_value - expected_arl) < 1e-10 + + def test_noreset_mode_exact_arl_with_periodic_signal(self) -> None: + """Verify exact ARL in noreset mode with periodic signal. + + Algorithm return_sequence=[5.0, 0.0, 0.0, 0.0], threshold=3.0. + NoReset mode: sequence cycles without restart. + 12 observations -> values: 5,0,0,0,5,0,0,0,5,0,0,0. + Detections at 1-based indices: 1, 5, 9 (where value=5.0). + Run lengths from 0: [1, 4, 4]. + ARL = 9 / 3 = 3.0. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( + name="Periodic", + return_sequence=[5.0, 0.0, 0.0, 0.0], + ) + provider: LabeledData[float] = _make_provider(12) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="noreset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + expected_arl: float = 9.0 / 3.0 + assert abs(arl_value - expected_arl) < 1e-10 + + def test_noreset_lower_threshold_shorter_arl(self) -> None: + """Lower threshold in noreset mode should detect more, producing shorter ARL. + + Algorithm [0.0, 1.0, 2.0, 3.0, 4.0, 5.0], 24 observations. + threshold=1.5: detections where value > 1.5 -> indices with 2,3,4,5. + threshold=4.5: detections where value > 4.5 -> indices with 5 only. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( + name="Gradual", + return_sequence=[0.0, 1.0, 2.0, 3.0, 4.0, 5.0], + ) + provider: LabeledData[float] = _make_provider(24) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.5, 4.5])], + providers=[provider], + solver=solver, + mode="noreset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + entries: list[tuple[float, dict[str, Any]]] = results[key] + + arl_low: float = entries[0][1]["arl"] + arl_high: float = entries[1][1]["arl"] + + assert math.isfinite(arl_low) + assert math.isfinite(arl_high) + assert arl_low < arl_high + + def test_noreset_same_arl_for_same_threshold_different_runs(self) -> None: + """In noreset mode, same algorithm+provider+threshold should give same ARL. + + This validates determinism and that the inf-trace is reused correctly. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( + name="Stable", + return_sequence=[0.0, 0.0, 5.0], + ) + provider: LabeledData[float] = _make_provider(15) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner1: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="noreset", + ) + runner2: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="noreset", + ) + + res1 = runner1.run() + res2 = runner2.run() + + key1: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res1)) + key2: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res2)) + arl1: float = res1[key1][0][1]["arl"] + arl2: float = res2[key2][0][1]["arl"] + + assert arl1 == arl2 + + +# --------------------------------------------------------------------------- +# 5. max_runlength - forced resets +# --------------------------------------------------------------------------- +class TestARLBenchmarkRunnerMaxRunlength: + """Tests for ARL interaction with solver max_runlength (forced change points).""" + + def test_forced_detections_produce_finite_arl(self) -> None: + """Forced detections via max_runlength give finite ARL with unreachable threshold.""" + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Silent", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(18) + solver: OnlineCpdSolver = OnlineCpdSolver(max_runlength=5) + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [100.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + assert math.isfinite(arl_value) + assert arl_value > 0 + + def test_exact_arl_with_max_runlength(self) -> None: + """Verify exact ARL with max_runlength=5 on 18 observations. + + max_runlength=5 forces detection when run_length > 5, i.e. at step 5 + (0-based, run_length becomes 6). + After reset: next forced at step 11, then step 17. + Detections at steps 5, 11, 17. + Run lengths from 0: [5, 6, 6]. ARL = 17 / 3. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Silent", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(18) + solver: OnlineCpdSolver = OnlineCpdSolver(max_runlength=5) + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [100.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + expected_arl: float = 17.0 / 3.0 + assert abs(arl_value - expected_arl) < 1e-10 + + def test_signal_before_forced_prevents_forced(self) -> None: + """Signal detections happening before max_runlength prevent forced detections. + + Algorithm [0.0, 0.0, 5.0], threshold=3.0, max_runlength=10. + Signal every 3 steps (well before forced at 11). + After each signal, reset -> sequence restarts. + + 18 obs -> detections at steps 2, 5, 8, 11, 14, 17. + Run lengths: [2, 3, 3, 3, 3, 3]. ARL = 17 / 6. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Fast", return_sequence=[0.0, 0.0, 5.0]) + provider: LabeledData[float] = _make_provider(18) + solver: OnlineCpdSolver = OnlineCpdSolver(max_runlength=10) + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + expected_arl: float = 17.0 / 6.0 + assert abs(arl_value - expected_arl) < 1e-10 + + def test_max_runlength_noreset_inf_trace_still_forces(self) -> None: + """In noreset mode, max_runlength affects the inf-trace run. + + Algorithm returns 0.0 always, max_runlength=4, threshold=100.0. + The inf-trace is computed with threshold=inf, but max_runlength + still forces detections every 5 steps (run_length > 4). + + Those forced detections appear in the inf-trace and should be + detected via point-based policy as the detection function will + show NaN/reset artifacts. But actually since the sequence always + returns 0.0 which is <= any threshold, noreset mode with + threshold=100.0 should detect nothing from the detection function. + However the forced detections in the inf-trace should still + produce finite ARL through detected_change_points in the trace. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Silent", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(15) + solver_forced: OnlineCpdSolver = OnlineCpdSolver(max_runlength=4) + solver_no_forced: OnlineCpdSolver = OnlineCpdSolver() + + runner_forced: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [100.0])], + providers=[provider], + solver=solver_forced, + mode="reset", + ) + runner_no_forced: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [100.0])], + providers=[provider], + solver=solver_no_forced, + mode="reset", + ) + + res_forced = runner_forced.run() + res_no_forced = runner_no_forced.run() + + key_f: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res_forced)) + key_nf: tuple[str, OnlineAlgorithmConfiguration] = next(iter(res_no_forced)) + + arl_forced: float = res_forced[key_f][0][1]["arl"] + arl_no_forced: float = res_no_forced[key_nf][0][1]["arl"] + + assert math.isfinite(arl_forced) + assert math.isinf(arl_no_forced) + + +# --------------------------------------------------------------------------- +# 6. Reset behavior - sequence restart verification +# --------------------------------------------------------------------------- +class TestARLBenchmarkRunnerResetBehavior: + """Tests verifying that algorithm reset after each detection affects ARL.""" + + def test_reset_restarts_return_sequence(self) -> None: + """After reset, return_sequence restarts producing periodic detections. + + Algorithm [0.0, 5.0], threshold=3.0, reset mode, 8 observations. + Step 0: 0.0 (no). Step 1: 5.0 (yes, reset). + Step 2: 0.0 (no). Step 3: 5.0 (yes, reset). Etc. + Detections at steps 1, 3, 5, 7. + Run lengths: [1, 2, 2, 2]. ARL = 7 / 4 = 1.75. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="Reset", return_sequence=[0.0, 5.0]) + provider: LabeledData[float] = _make_provider(8) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + expected_arl: float = 7.0 / 4.0 + assert abs(arl_value - expected_arl) < 1e-10 + + def test_reset_restarts_learning_period(self) -> None: + """Reset re-enters learning period, creating longer gaps between detections. + + Algorithm return_sequence=[5.0], learning_period_size=2, threshold=3.0. + With reset: after each detection, algorithm resets and needs 2 + observations for learning (returning 0.0), then next returns 5.0. + + 9 obs: + Step 0: learning (0.0). Step 1: learning (0.0). + Step 2: 5.0 (yes, reset). + Step 3: learning (0.0). Step 4: learning (0.0). + Step 5: 5.0 (yes, reset). + Step 6: learning (0.0). Step 7: learning (0.0). + Step 8: 5.0 (yes, reset). + + Detections at steps 2, 5, 8. + Run lengths: [2, 3, 3]. ARL = 8 / 3. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( + name="Learn", + return_sequence=[5.0], + learning_period_size=2, + ) + provider: LabeledData[float] = _make_provider(9) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [3.0])], + providers=[provider], + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + _, metrics = results[key][0] + arl_value: float = metrics["arl"] + + expected_arl: float = 8.0 / 3.0 + assert abs(arl_value - expected_arl) < 1e-10 + + def test_lower_threshold_produces_shorter_arl_reset(self) -> None: + """Lower threshold detects more often, resulting in shorter ARL in reset mode. + + Algorithm [0.0, 1.0, 2.0, 3.0, 4.0, 5.0], 30 observations. + threshold=1.5: signal when value > 1.5, detections sooner after reset. + threshold=4.5: signal when value > 4.5, detections later after reset. + """ + algorithm: MockOnlineAlgorithm[float] = MockOnlineAlgorithm( + name="Gradual", + return_sequence=[0.0, 1.0, 2.0, 3.0, 4.0, 5.0], + ) + provider: LabeledData[float] = _make_provider(30) + solver: OnlineCpdSolver = OnlineCpdSolver() + + runner: ARLBenchmarkRunner[OnlineDetectionTrace[Any], LabeledData[float]] = ARLBenchmarkRunner( + algorithms=[(algorithm, [1.5, 4.5])], + providers=[provider], + solver=solver, + mode="reset", + ) + results = runner.run() + key: tuple[str, OnlineAlgorithmConfiguration] = next(iter(results)) + entries: list[tuple[float, dict[str, Any]]] = results[key] + + arl_low: float = entries[0][1]["arl"] + arl_high: float = entries[1][1]["arl"] + + assert math.isfinite(arl_low) + assert math.isfinite(arl_high) + assert arl_low < arl_high From 09183f7a1020c5ffbaa1db4201ef5304c9719903 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Tue, 14 Apr 2026 04:41:54 +0300 Subject: [PATCH 14/15] feat: add BenchmarkAnalyzer --- .../benchmark/core/benchmark_analyzer.py | 46 +++++- .../benchmark/core/test_benchmark_analyzer.py | 140 ++++++++++++++++++ 2 files changed, 182 insertions(+), 4 deletions(-) create mode 100644 tests/unit/benchmark/core/test_benchmark_analyzer.py diff --git a/pysatl_cpd/benchmark/core/benchmark_analyzer.py b/pysatl_cpd/benchmark/core/benchmark_analyzer.py index fb7a511..4fd5ab3 100644 --- a/pysatl_cpd/benchmark/core/benchmark_analyzer.py +++ b/pysatl_cpd/benchmark/core/benchmark_analyzer.py @@ -1,20 +1,58 @@ +# -*- coding: ascii -*- + +""" +Benchmark analyzer module. + +This module provides a convenient wrapper to apply multiple aggregate metrics +to a single batch of benchmark execution results. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + from typing import Any from pysatl_cpd.analysis.labeled_data import LabeledData from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric -from pysatl_cpd.core.online.ionline_algorithm import OnlineAlgorithmState from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace -class BenchmarkAnalyzer[TraceT: OnlineDetectionTrace[OnlineAlgorithmState], ProviderT: LabeledData[Any]]: +class BenchmarkAnalyzer[TraceT: OnlineDetectionTrace[Any], ProviderT: LabeledData[Any]]: + """ + Evaluator for applying multiple metrics to a batch of benchmark runs. + + This class encapsulates a dictionary of initialized metrics and provides + a single entry point to evaluate all of them on the given execution results. + + Parameters + ---------- + metrics : dict[str, MultipleRunMetric[TraceT, ProviderT, Any]] + A mapping of metric names to metric instances. + """ + def __init__( self, metrics: dict[str, MultipleRunMetric[TraceT, ProviderT, Any]], ) -> None: - return + self._metrics = metrics def analyze( self, runs: list[tuple[TraceT, ProviderT]], ) -> dict[str, Any]: - raise NotImplementedError("Method `analyze` is not implemented yet.") + """ + Evaluate all registered metrics on the provided batch of runs. + + Parameters + ---------- + runs : list[tuple[TraceT, ProviderT]] + A batch of execution results, where each element is a pair of + (detection_trace, data_provider). + + Returns + ------- + dict[str, Any] + A mapping of metric names to their evaluated results. + """ + return {metric_name: metric.evaluate(runs) for metric_name, metric in self._metrics.items()} diff --git a/tests/unit/benchmark/core/test_benchmark_analyzer.py b/tests/unit/benchmark/core/test_benchmark_analyzer.py new file mode 100644 index 0000000..175fc81 --- /dev/null +++ b/tests/unit/benchmark/core/test_benchmark_analyzer.py @@ -0,0 +1,140 @@ +# -*- coding: ascii -*- + +""" +Tests for BenchmarkAnalyzer. + +Covers metric storage, evaluation routing, and edge cases with empty inputs. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +from typing import Any + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.core.benchmark_analyzer import BenchmarkAnalyzer +from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace +from tests.mocks.analysis.labeled_data import MockLabeledData +from tests.mocks.analysis.metrics.mock_run_metric import MockRunMetric +from tests.mocks.benchmark.metrics.mock_aggregation_metric import MockAggregationMetric +from tests.mocks.core.online.online_detection_trace import MockOnlineDetectionTrace + + +class TestBenchmarkAnalyzerInit: + """Tests for BenchmarkAnalyzer.__init__.""" + + def test_init_stores_metrics(self) -> None: + """Analyzer should store the provided metrics dictionary.""" + base: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[1.0]) + metric: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base) + metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = { + "m1": metric, + } + + analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics) + assert analyzer._metrics is metrics + + +class TestBenchmarkAnalyzerAnalyze: + """Tests for BenchmarkAnalyzer.analyze.""" + + def test_analyze_evaluates_all_metrics(self) -> None: + """Analyzer should call evaluate() on every metric and return all results.""" + base1: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[2.0, 3.0]) + base2: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[10.0, 20.0]) + m1: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base1) + m2: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base2) + metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = { + "sum_small": m1, + "sum_big": m2, + } + + analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics) + + trace1: MockOnlineDetectionTrace = MockOnlineDetectionTrace(detected_change_points=[]) + trace2: MockOnlineDetectionTrace = MockOnlineDetectionTrace(detected_change_points=[]) + data1: MockLabeledData = MockLabeledData(change_points=[], name="d1") + data2: MockLabeledData = MockLabeledData(change_points=[], name="d2") + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[Any]]] = [ + (trace1, data1), + (trace2, data2), + ] + + results: dict[str, Any] = analyzer.analyze(runs) + + assert "sum_small" in results + assert "sum_big" in results + assert results["sum_small"] == 2.0 + 3.0 + assert results["sum_big"] == 10.0 + 20.0 + + def test_analyze_passes_runs_to_base_metric(self) -> None: + """Base metric inside aggregation should receive the exact runs.""" + base: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[1.0]) + metric: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base) + metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = { + "m": metric, + } + + analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics) + + trace: MockOnlineDetectionTrace = MockOnlineDetectionTrace(detected_change_points=[]) + data: MockLabeledData = MockLabeledData(change_points=[], name="d") + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[Any]]] = [(trace, data)] + + analyzer.analyze(runs) + + assert len(base.calls) == 1 + assert base.calls[0][0] is trace + assert base.calls[0][1] is data + + def test_analyze_with_empty_metrics(self) -> None: + """Analyzer should return empty dict when no metrics are registered.""" + analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics={}) + + trace: MockOnlineDetectionTrace = MockOnlineDetectionTrace(detected_change_points=[]) + data: MockLabeledData = MockLabeledData(change_points=[], name="d") + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[Any]]] = [(trace, data)] + + results: dict[str, Any] = analyzer.analyze(runs) + assert results == {} + + def test_analyze_with_empty_runs(self) -> None: + """Analyzer should pass empty list to metrics and return their results.""" + base: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[99.0]) + metric: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base) + metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = { + "m": metric, + } + + analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics) + + results: dict[str, Any] = analyzer.analyze([]) + + assert results == {"m": 0.0} + assert len(metric.aggregate_calls) == 1 + assert metric.aggregate_calls[0] == [] + + def test_analyze_with_multiple_runs_aggregates_correctly(self) -> None: + """Aggregation metric should receive all per-run results and sum them.""" + base: MockRunMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockRunMetric(return_values=[1.0, 2.0, 3.0]) + metric: MockAggregationMetric[OnlineDetectionTrace[Any], LabeledData[Any]] = MockAggregationMetric(base=base) + metrics: dict[str, MultipleRunMetric[OnlineDetectionTrace[Any], LabeledData[Any], Any]] = { + "total": metric, + } + + analyzer: BenchmarkAnalyzer[OnlineDetectionTrace[Any], LabeledData[Any]] = BenchmarkAnalyzer(metrics=metrics) + + runs: list[tuple[OnlineDetectionTrace[Any], LabeledData[Any]]] = [ + (MockOnlineDetectionTrace([]), MockLabeledData([], name="a")), + (MockOnlineDetectionTrace([]), MockLabeledData([], name="b")), + (MockOnlineDetectionTrace([]), MockLabeledData([], name="c")), + ] + + results: dict[str, Any] = analyzer.analyze(runs) + + assert results == {"total": 6.0} + assert len(metric.aggregate_calls) == 1 + assert metric.aggregate_calls[0] == [1.0, 2.0, 3.0] + assert len(base.calls) == 3 From a971d7f75eb8c9001f8c5c5300220c095ef3a037 Mon Sep 17 00:00:00 2001 From: iraedeus Date: Tue, 14 Apr 2026 04:52:19 +0300 Subject: [PATCH 15/15] test: BenchmarkExecutor --- .../benchmark/core/test_benchmark_executor.py | 515 ++++++++++++++++++ 1 file changed, 515 insertions(+) create mode 100644 tests/unit/benchmark/core/test_benchmark_executor.py diff --git a/tests/unit/benchmark/core/test_benchmark_executor.py b/tests/unit/benchmark/core/test_benchmark_executor.py new file mode 100644 index 0000000..9261b1e --- /dev/null +++ b/tests/unit/benchmark/core/test_benchmark_executor.py @@ -0,0 +1,515 @@ +# -*- coding: ascii -*- +""" +Tests for BenchmarkExecutor and BenchmarkRecord. + +Covers result count for various combinations, trace content verification, +record metadata, and disk caching behavior. +""" + +__author__ = "Danil Totmyanin" +__copyright__ = "Copyright (c) 2026 PySATL project" +__license__ = "SPDX-License-Identifier: MIT" + +import csv +from pathlib import Path +from typing import Any + +import numpy as np + +from pysatl_cpd.analysis.labeled_data import LabeledData +from pysatl_cpd.benchmark.core.benchmark_executor import ( + BenchmarkExecutor, + BenchmarkRecord, +) +from pysatl_cpd.core.online.online_cpd_solver import OnlineCpdSolver +from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace +from tests.mocks.algorithms.online.simple import MockOnlineAlgorithm + + +def _make_provider( + length: int, + name: str = "test_data", +) -> LabeledData[float]: + """Create a LabeledData provider with constant observations. + + Parameters + ---------- + length : int + Number of observations. + name : str + Provider identifier. + + Returns + ------- + LabeledData[float] + Provider with ``length`` observations of 1.0 and no change points. + """ + return LabeledData(raw_data=[1.0] * length, change_points=[], name=name) + + +# --------------------------------------------------------------------------- +# 1. BenchmarkRecord +# --------------------------------------------------------------------------- +class TestBenchmarkRecord: + """Tests for BenchmarkRecord dataclass.""" + + def test_key_returns_correct_tuple(self) -> None: + """Key property should return (algorithm, config_hash, data, threshold).""" + record: BenchmarkRecord = BenchmarkRecord( + algorithm="TestAlgo", + configuration_hash=42, + data="dataset", + threshold=2.5, + trace_path="/tmp/trace.pkl", + ) + expected: tuple[str, int, str, float] = ("TestAlgo", 42, "dataset", 2.5) + assert record.key == expected + + def test_default_trace_path_is_none(self) -> None: + """trace_path should default to None when not provided.""" + record: BenchmarkRecord = BenchmarkRecord( + algorithm="A", + configuration_hash=0, + data="d", + threshold=1.0, + ) + assert record.trace_path is None + + +# --------------------------------------------------------------------------- +# 2. Basic execution - result counts +# --------------------------------------------------------------------------- +class TestBenchmarkExecutorBasic: + """Tests for correct number of results across combinations.""" + + def test_single_combination(self) -> None: + """1 algorithm x 1 threshold x 1 provider -> 1 result.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0])], + providers=[provider], + solver=solver, + ) + results: list[tuple[BenchmarkRecord, OnlineDetectionTrace[Any]]] = executor.execute() + assert len(results) == 1 + + def test_multiple_thresholds(self) -> None: + """1 algorithm x 3 thresholds x 1 provider -> 3 results.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0, 2.0, 3.0])], + providers=[provider], + solver=solver, + ) + results = executor.execute() + assert len(results) == 3 + + def test_multiple_providers(self) -> None: + """1 algorithm x 1 threshold x 3 providers -> 3 results.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + providers: list[LabeledData[float]] = [ + _make_provider(5, name="p1"), + _make_provider(5, name="p2"), + _make_provider(5, name="p3"), + ] + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0])], + providers=providers, + solver=solver, + ) + results = executor.execute() + assert len(results) == 3 + + def test_multiple_algorithms(self) -> None: + """2 algorithms x 1 threshold each x 1 provider -> 2 results.""" + algo1: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A1", return_sequence=[0.0]) + algo2: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A2", return_sequence=[1.0]) + provider: LabeledData[float] = _make_provider(5) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo1, [1.0]), (algo2, [2.0])], + providers=[provider], + solver=solver, + ) + results = executor.execute() + assert len(results) == 2 + + def test_cartesian_product(self) -> None: + """2 algorithms x 2 thresholds x 2 providers -> 8 results.""" + algo1: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A1", return_sequence=[0.0]) + algo2: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A2", return_sequence=[0.0]) + providers: list[LabeledData[float]] = [ + _make_provider(5, name="p1"), + _make_provider(5, name="p2"), + ] + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo1, [1.0, 2.0]), (algo2, [3.0, 4.0])], + providers=providers, + solver=solver, + ) + results = executor.execute() + assert len(results) == 8 + + def test_empty_algorithms(self) -> None: + """No algorithms -> empty results.""" + provider: LabeledData[float] = _make_provider(5) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[], + providers=[provider], + solver=solver, + ) + results = executor.execute() + assert results == [] + + def test_empty_providers(self) -> None: + """No providers -> empty results.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0])], + providers=[], + solver=solver, + ) + results = executor.execute() + assert results == [] + + def test_empty_thresholds(self) -> None: + """Algorithm with no thresholds -> no results for that algorithm.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [])], + providers=[provider], + solver=solver, + ) + results = executor.execute() + assert results == [] + + +# --------------------------------------------------------------------------- +# 3. Trace content +# --------------------------------------------------------------------------- +class TestBenchmarkExecutorTraceContent: + """Tests for detection trace correctness.""" + + def test_detections_at_correct_steps(self) -> None: + """Verify detected change points match expected steps. + + Algorithm [0.0, 0.0, 5.0], threshold=3.0, 6 observations. + Step 0: 0.0 (no), Step 1: 0.0 (no), Step 2: 5.0 (yes, reset). + Step 3: 0.0 (no), Step 4: 0.0 (no), Step 5: 5.0 (yes, reset). + Detections at steps 2 and 5 (0-based). + """ + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0, 0.0, 5.0]) + provider: LabeledData[float] = _make_provider(6) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [3.0])], + providers=[provider], + solver=solver, + ) + results = executor.execute() + trace: OnlineDetectionTrace[Any] = results[0][1] + + assert list(trace.detected_change_points) == [2, 5] + + def test_no_detections_with_high_threshold(self) -> None: + """No detections when threshold is unreachable.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[5.0]) + provider: LabeledData[float] = _make_provider(10) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [100.0])], + providers=[provider], + solver=solver, + ) + results = executor.execute() + trace: OnlineDetectionTrace[Any] = results[0][1] + + assert list(trace.detected_change_points) == [] + + def test_trace_algorithm_name(self) -> None: + """Trace should carry the str(algorithm) as algorithm_name.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="NamedAlgo", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0])], + providers=[provider], + solver=solver, + ) + results = executor.execute() + trace: OnlineDetectionTrace[Any] = results[0][1] + + assert trace.algorithm_name == str(algo) + + def test_detection_function_values(self) -> None: + """Detection function array should contain correct statistic values. + + Algorithm [1.0, 2.0, 3.0], threshold=inf (no detections/resets). + 6 observations -> values cycle: [1, 2, 3, 1, 2, 3]. + """ + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[1.0, 2.0, 3.0]) + provider: LabeledData[float] = _make_provider(6) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [float("inf")])], + providers=[provider], + solver=solver, + ) + results = executor.execute() + trace: OnlineDetectionTrace[Any] = results[0][1] + + expected: list[float] = [1.0, 2.0, 3.0, 1.0, 2.0, 3.0] + np.testing.assert_array_almost_equal(trace.detection_function, expected) + + +# --------------------------------------------------------------------------- +# 4. Record content +# --------------------------------------------------------------------------- +class TestBenchmarkExecutorRecordContent: + """Tests for BenchmarkRecord fields in executor output.""" + + def test_record_fields_match_input(self) -> None: + """Record fields should match the algorithm, provider, and threshold.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="RecAlgo", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5, name="my_data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [7.5])], + providers=[provider], + solver=solver, + ) + results = executor.execute() + record: BenchmarkRecord = results[0][0] + + assert record.algorithm == str(algo) + assert record.configuration_hash == hash(algo.configuration) + assert record.data == "my_data" + assert record.threshold == 7.5 + + def test_record_trace_path_none_without_dump_dir(self) -> None: + """trace_path should be None when dump_dir is not set.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5) + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0])], + providers=[provider], + solver=solver, + dump_dir=None, + ) + results = executor.execute() + record: BenchmarkRecord = results[0][0] + + assert record.trace_path is None + + def test_record_trace_path_set_with_dump_dir(self, tmp_path: Path) -> None: + """trace_path should point to an existing pickle file when dump_dir is set.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0])], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + results = executor.execute() + record: BenchmarkRecord = results[0][0] + + assert record.trace_path is not None + assert Path(record.trace_path).exists() + assert record.trace_path.endswith(".pkl") + + +# --------------------------------------------------------------------------- +# 5. Caching +# --------------------------------------------------------------------------- +class TestBenchmarkExecutorCaching: + """Tests for disk caching via CSV registry and pickle files.""" + + def test_creates_registry_and_pickle_files(self, tmp_path: Path) -> None: + """Execute with dump_dir should create registry CSV and pickle file(s).""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0])], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + executor.execute() + + registry_path: Path = tmp_path / "benchmark_registry.csv" + assert registry_path.exists() + + pkl_files: list[Path] = list(tmp_path.glob("*.pkl")) + assert len(pkl_files) == 1 + + def test_cache_prevents_reprocessing(self, tmp_path: Path) -> None: + """Second execute should load from cache without calling solver. + + MockOnlineAlgorithm._call_history accumulates across reset() calls + and is never cleared. If caching works, second execute adds no + new entries. + """ + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + # First run - solver executes, algorithm processes observations + executor1: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0])], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + executor1.execute() + history_after_first: int = len(algo.get_call_history()) + assert history_after_first == 5 + + # Second run - should load from cache + executor2: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0])], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + executor2.execute() + history_after_second: int = len(algo.get_call_history()) + + assert history_after_second == history_after_first + + def test_cached_trace_matches_original(self, tmp_path: Path) -> None: + """Trace loaded from cache should have identical detected_change_points. + + Algorithm [0.0, 0.0, 5.0], threshold=3.0, 6 observations. + Detections at steps 2 and 5. + """ + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0, 0.0, 5.0]) + provider: LabeledData[float] = _make_provider(6, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor1: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [3.0])], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + results1 = executor1.execute() + + executor2: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [3.0])], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + results2 = executor2.execute() + + trace1: OnlineDetectionTrace[Any] = results1[0][1] + trace2: OnlineDetectionTrace[Any] = results2[0][1] + + assert list(trace1.detected_change_points) == list(trace2.detected_change_points) + assert trace1.algorithm_name == trace2.algorithm_name + np.testing.assert_array_almost_equal(trace1.detection_function, trace2.detection_function) + + def test_registry_csv_has_correct_structure(self, tmp_path: Path) -> None: + """Registry CSV should have expected columns and matching row data.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="CsvAlgo", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5, name="csv_data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [2.5])], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + executor.execute() + + registry_path: Path = tmp_path / "benchmark_registry.csv" + with open(registry_path, encoding="utf-8") as f: + reader = csv.DictReader(f) + rows: list[dict[str, str]] = list(reader) + + assert len(rows) == 1 + row: dict[str, str] = rows[0] + + expected_columns: set[str] = { + "algorithm", + "configuration_hash", + "data", + "threshold", + "trace_path", + } + assert set(row.keys()) == expected_columns + assert row["algorithm"] == str(algo) + assert row["data"] == "csv_data" + assert float(row["threshold"]) == 2.5 + assert row["trace_path"] != "" + + def test_inf_threshold_in_pickle_filename(self, tmp_path: Path) -> None: + """Pickle filename for infinite threshold should contain 'inf'.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [float("inf")])], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + executor.execute() + + pkl_files: list[Path] = list(tmp_path.glob("*.pkl")) + assert len(pkl_files) == 1 + assert "inf" in pkl_files[0].name + + def test_multiple_thresholds_create_separate_pickle_files(self, tmp_path: Path) -> None: + """Each threshold should produce its own pickle file.""" + algo: MockOnlineAlgorithm[float] = MockOnlineAlgorithm(name="A", return_sequence=[0.0]) + provider: LabeledData[float] = _make_provider(5, name="data") + solver: OnlineCpdSolver = OnlineCpdSolver() + + executor: BenchmarkExecutor[float] = BenchmarkExecutor( + algorithms=[(algo, [1.0, 2.0, 3.0])], + providers=[provider], + solver=solver, + dump_dir=tmp_path, + ) + executor.execute() + + pkl_files: list[Path] = list(tmp_path.glob("*.pkl")) + assert len(pkl_files) == 3 + + registry_path: Path = tmp_path / "benchmark_registry.csv" + with open(registry_path, encoding="utf-8") as f: + rows: list[dict[str, str]] = list(csv.DictReader(f)) + assert len(rows) == 3