feat: add SegmentAggregationMetric

iraedeus · iraedeus · commit b8aea0fe8645 · 2026-04-15T01:48:27.000+03:00
diff --git a/pysatl_cpd/benchmark/metrics/segment_aggregation_metric.py b/pysatl_cpd/benchmark/metrics/segment_aggregation_metric.py
@@ -0,0 +1,89 @@
+# -*- coding: ascii -*-
+"""
+Module for computing aggregated metrics over specific dataset transitions (bisegments).
+"""
+
+__author__ = "Your Name"
+__copyright__ = "Copyright (c) 2026 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from collections.abc import Sequence
+from typing import Any, cast
+
+from pysatl_cpd.benchmark.metrics.aggregation_metric import AggregationMetric
+from pysatl_cpd.benchmark.metrics.multiple_run_metric import MultipleRunMetric
+from pysatl_cpd.core.data_providers.dataset import PandasLabeledDataProvider, SegmentFilter
+from pysatl_cpd.core.online.online_detection_trace import OnlineDetectionTrace
+
+
+class SegmentAggregationMetric[TraceT: OnlineDetectionTrace[Any], ResultInT, ResultOutT](
+    MultipleRunMetric[TraceT, PandasLabeledDataProvider, dict[str, ResultOutT]]
+):
+    """
+    Evaluates an aggregation metric exclusively on specific transition types (bisegments).
+
+    This metric slices both the input data providers and their corresponding
+    detection traces based on user-provided transition filters. It then groups
+    these slices by transition type and computes the underlying base metric for
+    each group independently.
+
+    Parameters
+    ----------
+    base_agg_metric : AggregationMetric[TraceT, PandasLabeledDataProvider, ResultInT, ResultOutT]
+        The underlying metric to compute (e.g., F1Metric, MeanDelayMetric) for each group.
+    transition_filters : dict[str, SegmentFilter]
+        A mapping where keys are human-readable transition names (e.g., 'A -> B')
+        and values are callable predicates that filter bisegments.
+    """
+
+    def __init__(
+        self,
+        base_agg_metric: AggregationMetric[TraceT, PandasLabeledDataProvider, ResultInT, ResultOutT],
+        transition_filters: dict[str, SegmentFilter],
+    ) -> None:
+        self._base_agg_metric = base_agg_metric
+        self._transition_filters = transition_filters
+
+    @property
+    def base_agg_metric(self) -> AggregationMetric[TraceT, PandasLabeledDataProvider, ResultInT, ResultOutT]:
+        """
+        Returns the underlying aggregation metric instance.
+        """
+
+        return self._base_agg_metric
+
+    def evaluate(self, runs: Sequence[tuple[TraceT, PandasLabeledDataProvider]]) -> dict[str, ResultOutT]:
+        """
+        Evaluate the metric grouped by segment transitions.
+
+        Parameters
+        ----------
+        runs : Sequence[tuple[TraceT, PandasLabeledDataProvider]]
+            The full benchmark execution results.
+
+        Returns
+        -------
+        dict[str, Rout]
+            A dictionary mapping the transition name to the computed metric result.
+            If a transition filter matches no segments, it is omitted from the output.
+        """
+
+        grouped_runs: dict[str, list[tuple[TraceT, PandasLabeledDataProvider]]] = {
+            name: [] for name in self._transition_filters
+        }
+
+        for trace, provider in runs:
+            for trans_name, filter_fn in self._transition_filters.items():
+                sub_providers = provider.query_bisegments(filter_fn)
+                sub_indices = provider.query_bisegments_indexes(filter_fn)
+
+                for sub_prov, (g_start, _, g_end) in zip(sub_providers, sub_indices, strict=False):
+                    sub_trace = cast(TraceT, trace.slice(g_start, g_end))
+                    grouped_runs[trans_name].append((sub_trace, sub_prov))
+
+        results: dict[str, ResultOutT] = {}
+        for trans_name, sub_runs in grouped_runs.items():
+            if sub_runs:
+                results[trans_name] = self._base_agg_metric.evaluate(sub_runs)
+
+        return results
diff --git a/pysatl_cpd/core/online/online_detection_trace.py b/pysatl_cpd/core/online/online_detection_trace.py
@@ -155,6 +155,41 @@ class OnlineDetectionTrace[StateT: OnlineAlgorithmState](DetectionTrace):
     learning_periods: list[tuple[int, int]] = field(default_factory=list)
     algorithm_states: list[StateT | None]
 
+    def slice(self, start: int, end: int) -> "OnlineDetectionTrace[StateT]":
+        """
+        Create a new trace representing a slice of the current trace [start, end] (inclusive).
+        Automatically recalculates all relative indices (change points, periods).
+        """
+        new_df = self.detection_function[start : end + 1].copy()
+        new_pt = self.processing_time[start : end + 1].copy()
+
+        new_states = self.algorithm_states[start : end + 1] if self.algorithm_states else []
+
+        def shift_points(pts: Sequence[int]) -> list[int]:
+            return [p - start for p in pts if start <= p <= end]
+
+        def shift_periods(periods: list[tuple[int, int]]) -> list[tuple[int, int]]:
+            res = []
+            for p_start, p_end in periods:
+                if p_end < start or p_start > end:
+                    continue
+                res.append((max(0, p_start - start), min(end - start, p_end - start)))
+            return res
+
+        return type(self)(
+            algorithm_name=self.algorithm_name,
+            configuration_hash=self.configuration_hash,
+            threshold=self.threshold,
+            detected_change_points=shift_points(self.detected_change_points),
+            forced_change_points=shift_points(self.forced_change_points),
+            signal_change_points=shift_points(self.signal_change_points),
+            detection_function=new_df,
+            processing_time=new_pt,
+            algorithm_states=new_states,
+            skip_periods=shift_periods(self.skip_periods),
+            learning_periods=shift_periods(self.learning_periods),
+        )
+
     @classmethod
     def from_run(
         cls,