feat(backtest): BacktestIndexRow DTO + Backtest.index_row() (epic #540 phase 1)

MDUYN · MDUYN · commit 0a6ff46fb3f1 · 2026-05-10T21:50:36.000+02:00
Lift the existing untyped flat-row index helper into a public, typed
Tier-1 contract:

* New BacktestIndexRow dataclass (domain/backtesting/backtest_index_row.py)
  with identity / provenance / config / nested summary_metrics +
  forward-compat extras. Lossless to_flat_dict / from_flat_dict round
  trip for Parquet, SQL and JSON sinks.
* New Backtest.index_row(bundle_path=None) method. Builds without
  decoding any v2 Parquet metric blobs, so it works against bundles
  loaded with Backtest.open(..., summary_only=True). This is the
  fast read path the upcoming 'iaf index' CLI (phase 2) and any
  tiered store implementation (phase 3) will rely on.
* _backtest_to_index_row in backtest_utils now delegates to
  BacktestIndexRow.to_flat_dict() so the wire shape and the in-memory
  shape are a single source of truth (no behavioural change for the
  existing index.parquet sidecar).
* Re-export BacktestIndexRow from the domain and top-level packages.
* docs/design/tiered-backtest-storage.md \xa73.1 + roadmap row updated
  to reference the typed contract.

Tests: 5 new (in-memory derivation, flat round-trip incl. NaN, unknown
columns landing in extras, derivation from a summary_only=True bundle
load). Full backtests suite green (29/29).
diff --git a/docs/design/tiered-backtest-storage.md b/docs/design/tiered-backtest-storage.md
@@ -69,11 +69,20 @@ Cross-bundle redundancy is the dominant unexploited source of size:
 | Identity | `run_id` (uuid7), `bundle_id`, `parent_sweep_id`, `tenant_id`, `project_id` |
 | Provenance | `algorithm_id`, `code_hash`, `framework_version`, `created_at` |
 | Config | `engine_type`, `params_hash`, `symbols_hash`, `date_range_name`, `start_date`, `end_date`, `tag` |
-| Scalar metrics | `BacktestSummary` fields — Sharpe, Sortino, max_dd, CAGR, total_net_gain, win_rate, … |
+| Scalar metrics | `BacktestSummaryMetrics` fields (nested in `BacktestIndexRow.summary_metrics`) — Sharpe, Sortino, max_dd, CAGR, total_net_gain, win_rate, … |
 | Refs | `snapshots_dataset_uri`, `trades_dataset_uri`, `metric_series_dataset_uri`, `ohlcv_chunk_hashes[]`, `code_chunk_hash`, `symbols_chunk_hash`, `params_chunk_hash` |
 
 Row size: ~1–2 KB. 12,500 rows ≈ 25 MB. Fits comfortably in SQLite for local users.
 
+> **Status (epic #540 phase 1, v8.10):** the typed contract for this row
+> ships as `investing_algorithm_framework.BacktestIndexRow`, derived
+> via `Backtest.index_row(bundle_path=...)`. The method works against
+> bundles loaded with `Backtest.open(path, summary_only=True)` — no
+> Parquet metric blobs are decoded on the fast index path. The
+> existing `BacktestIndex` Parquet sidecar is now built on top of this
+> typed row (`BacktestIndexRow.to_flat_dict()`), making the wire
+> shape and the in-memory shape a single source of truth.
+
 ### 3.2 Tier 2 schemas (Parquet, long format)
 
 `portfolio_snapshots/`:
@@ -215,7 +224,7 @@ Zero behavioural difference vs today. Single-file `.iafbt` users get `export()`
 | Phase | Change | Risk |
 |---|---|---|
 | **v8.9 (shipped)** | Bundle format v2; engine_type split; zstd 19; summary_only read | — |
-| **v8.10** | `Backtest.scalar_summary()` (no decode of bulk); `iaf index <dir>` builds a SQLite index over a folder of bundles; `BacktestSummary` DTO with stable schema | Low — additive read paths |
+| **v8.10** | `Backtest.index_row()` (no decode of bulk); `iaf index <dir>` builds a SQLite index over a folder of bundles; `BacktestIndexRow` DTO with stable schema | Low — additive read paths |
 | **v8.11** | `BacktestStore` interface with `LocalDirStore` (today) and `LocalTieredStore`. `.iafbt` becomes export format; service constructors accept a store | Medium — touches every backtest service constructor; deprecation flag for one minor cycle |
 | **Finterion (closed)** | `RemoteTieredStore` over Postgres + S3 + chunk service | Closed-source, unblocked by v8.11 |
 
diff --git a/investing_algorithm_framework/__init__.py b/investing_algorithm_framework/__init__.py
@@ -19,6 +19,7 @@
     Trade, APP_MODE, AppMode, DATETIME_FORMAT, load_backtests_from_directory, \
     iter_backtests_from_directory, \
     BacktestDateRange, convert_polars_to_pandas, BacktestRun, \
+    BacktestIndexRow, \
     DEFAULT_LOGGING_CONFIG, DataType, DataProvider, StopLossRule, \
     ScalingRule, TradingCost, \
     TradeStatus, generate_backtest_summary_metrics, generate_algorithm_id, \
@@ -222,6 +223,7 @@
     "get_positive_trades",
     "get_number_of_trades",
     "BacktestRun",
+    "BacktestIndexRow",
     "load_backtests_from_directory",
     "iter_backtests_from_directory",
     "save_backtests_to_directory",
diff --git a/investing_algorithm_framework/domain/__init__.py b/investing_algorithm_framework/domain/__init__.py
@@ -43,6 +43,7 @@
     csv_to_list, StoppableThread, load_csv_into_dict, tqdm, \
     is_timezone_aware, sync_timezones, get_timezone
 from .backtesting import BacktestRun, BacktestSummaryMetrics, \
+    BacktestIndexRow, \
     BacktestDateRange, Backtest, BacktestMetrics, combine_backtests, \
     BacktestPermutationTest, BacktestEvaluationFocus, \
     generate_backtest_summary_metrics, load_backtests_from_directory, \
diff --git a/investing_algorithm_framework/domain/backtesting/__init__.py b/investing_algorithm_framework/domain/backtesting/__init__.py
@@ -1,4 +1,5 @@
 from .backtest_summary_metrics import BacktestSummaryMetrics
+from .backtest_index_row import BacktestIndexRow
 from .backtest_date_range import BacktestDateRange
 from .backtest_metrics import BacktestMetrics
 from .backtest_run import BacktestRun
@@ -25,6 +26,7 @@
 __all__ = [
     "Backtest",
     "BacktestSummaryMetrics",
+    "BacktestIndexRow",
     "BacktestDateRange",
     "BacktestMetrics",
     "BacktestRun",
diff --git a/investing_algorithm_framework/domain/backtesting/backtest.py b/investing_algorithm_framework/domain/backtesting/backtest.py
@@ -13,6 +13,7 @@
 from .backtest_permutation_test import BacktestPermutationTest
 from .backtest_date_range import BacktestDateRange
 from .backtest_summary_metrics import BacktestSummaryMetrics
+from .backtest_index_row import BacktestIndexRow
 from .combine_backtests import generate_backtest_summary_metrics
 
 
@@ -215,6 +216,42 @@ def get_backtest_metrics(
             return run.backtest_metrics
         return None
 
+    def index_row(
+        self, bundle_path: Union[str, None] = None,
+    ) -> BacktestIndexRow:
+        """Return the typed Tier-1 row contract for this backtest.
+
+        The row carries identity, provenance, config and the scalar
+        :class:`BacktestSummaryMetrics`, but **no heavy time-series
+        data**. It can therefore be built without decoding any v2
+        Parquet metric blobs (``Backtest.open(path,
+        summary_only=True)`` is the canonical fast read path).
+
+        Args:
+            bundle_path: Optional location the bundle was loaded from
+                (relative or absolute). Stored verbatim in
+                :pyattr:`BacktestIndexRow.bundle_path` for downstream
+                indexers that need to round-trip back to the file.
+
+        Returns:
+            BacktestIndexRow: typed, flat-friendly row.
+
+        See also:
+            ``docs/design/tiered-backtest-storage.md`` §3.1 — the
+            authoritative schema this row implements.
+        """
+        return BacktestIndexRow(
+            algorithm_id=self.algorithm_id,
+            tag=self.tag,
+            bundle_path=bundle_path,
+            engine_type=self.engine_type,
+            risk_free_rate=self.risk_free_rate,
+            parameters=dict(self.parameters or {}),
+            strategy_ids=list(self.strategy_ids or []),
+            number_of_runs=len(self.backtest_runs or []),
+            summary_metrics=self.backtest_summary,
+        )
+
     def get_backtest_summary(self) -> Union[BacktestSummaryMetrics, None]:
         """
         Retrieve the cross-window BacktestSummaryMetrics roll-up for
diff --git a/investing_algorithm_framework/domain/backtesting/backtest_index_row.py b/investing_algorithm_framework/domain/backtesting/backtest_index_row.py
@@ -0,0 +1,174 @@
+"""Typed Tier-1 row contract for the tiered backtest store (epic #540).
+
+A :class:`BacktestIndexRow` is the authoritative *flat, scalar-only*
+view of a backtest. It is what gets stored as a single row in:
+
+* the :class:`BacktestIndex` Parquet sidecar produced by
+  :func:`save_backtests_to_directory`;
+* the SQLite index built by ``iaf index`` (epic #540 phase 2);
+* the Tier-1 SQL table in any tiered store implementation
+  (``LocalTieredStore`` and the closed-source remote stores).
+
+The schema is **deliberately frozen** — adding a new column is an
+explicit decision and a doc update. Callers can always stash
+non-canonical fields in :pyattr:`extras` (a JSON-friendly dict) which
+is round-tripped opaquely.
+
+This row is built without decoding any heavy time-series payloads;
+it is safe to materialise from a bundle opened with
+``Backtest.open(path, summary_only=True)``.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field, fields
+from typing import Any, Dict, List, Optional
+
+from .backtest_summary_metrics import BacktestSummaryMetrics
+
+
+# Prefix used when flattening the nested summary metrics into a
+# single-level dict (e.g. for Parquet / SQL columns). Kept as a
+# module-level constant so consumers can reuse it without hard-coding
+# the string in two places.
+SUMMARY_FIELD_PREFIX = "summary."
+
+
+@dataclass
+class BacktestIndexRow:
+    """One row of the backtest index — the Tier-1 contract.
+
+    Field groups follow the design doc
+    (``docs/design/tiered-backtest-storage.md`` §3.1):
+
+    * **Identity** — ``algorithm_id``, ``tag``, ``bundle_path``
+    * **Provenance** — ``framework_version``, ``engine_type``,
+      ``risk_free_rate``
+    * **Config** — ``parameters``, ``strategy_ids``, ``number_of_runs``
+    * **Scalar metrics** — :pyattr:`summary_metrics`, the existing
+      :class:`BacktestSummaryMetrics` dataclass
+    * **Forward-compat** — :pyattr:`extras`, a free-form dict the
+      bundle reader populates for non-canonical scalar fields
+
+    Notes:
+        The schema is intentionally flat for the wire shapes that need
+        flatness (Parquet, SQL). For ergonomic Python use, prefer
+        accessing :pyattr:`summary_metrics` directly.
+    """
+
+    # -- Identity --------------------------------------------------------
+    algorithm_id: Optional[str] = None
+    tag: Optional[str] = None
+    bundle_path: Optional[str] = None
+
+    # -- Provenance ------------------------------------------------------
+    framework_version: Optional[str] = None
+    engine_type: Optional[str] = None
+    risk_free_rate: Optional[float] = None
+
+    # -- Config ----------------------------------------------------------
+    parameters: Dict[str, Any] = field(default_factory=dict)
+    strategy_ids: List[Any] = field(default_factory=list)
+    number_of_runs: int = 0
+
+    # -- Scalar metrics --------------------------------------------------
+    summary_metrics: Optional[BacktestSummaryMetrics] = None
+
+    # -- Forward-compat --------------------------------------------------
+    extras: Dict[str, Any] = field(default_factory=dict)
+
+    # ------------------------------------------------------------------
+    # Flat-dict round-trip (Parquet / SQL / JSON wire shape)
+    # ------------------------------------------------------------------
+    def to_flat_dict(self) -> Dict[str, Any]:
+        """Flatten into a single-level dict.
+
+        Summary-metric scalars are emitted under
+        :data:`SUMMARY_FIELD_PREFIX` keys (``summary.sharpe_ratio``
+        etc.). Complex fields (``parameters``, ``strategy_ids``) are
+        JSON-encoded so the result fits any tabular sink.
+        """
+        out: Dict[str, Any] = {
+            "algorithm_id": self.algorithm_id,
+            "tag": self.tag,
+            "bundle_path": self.bundle_path,
+            "framework_version": self.framework_version,
+            "engine_type": self.engine_type,
+            "risk_free_rate": self.risk_free_rate,
+            "number_of_runs": self.number_of_runs,
+        }
+
+        # parameters / strategy_ids → JSON for tabular round-trip
+        out["parameters"] = (
+            _safe_json(self.parameters) if self.parameters else None
+        )
+        out["strategy_ids"] = (
+            _safe_json(self.strategy_ids) if self.strategy_ids else None
+        )
+
+        # Scalar summary metrics, prefixed
+        if self.summary_metrics is not None:
+            for k, v in self.summary_metrics.to_dict().items():
+                if isinstance(v, (int, float, str, bool)) or v is None:
+                    out[f"{SUMMARY_FIELD_PREFIX}{k}"] = v
+
+        # Forward-compat extras, prefixed to avoid colliding with the
+        # canonical column set.
+        for k, v in (self.extras or {}).items():
+            if isinstance(v, (int, float, str, bool)) or v is None:
+                out[f"extras.{k}"] = v
+
+        return out
+
+    @classmethod
+    def from_flat_dict(cls, row: Dict[str, Any]) -> "BacktestIndexRow":
+        """Reconstruct a row from the flat dict shape produced by
+        :meth:`to_flat_dict`. Unknown keys land in :pyattr:`extras`."""
+        canonical = {f.name for f in fields(cls)} - {
+            "summary_metrics", "extras"
+        }
+
+        kwargs: Dict[str, Any] = {}
+        summary_dict: Dict[str, Any] = {}
+        extras: Dict[str, Any] = {}
+
+        for k, v in row.items():
+            if k in canonical:
+                if k in ("parameters", "strategy_ids"):
+                    if v is None:
+                        kwargs[k] = {} if k == "parameters" else []
+                        continue
+                    if isinstance(v, str):
+                        try:
+                            kwargs[k] = json.loads(v)
+                            continue
+                        except (TypeError, ValueError):
+                            pass
+                kwargs[k] = v
+            elif k.startswith(SUMMARY_FIELD_PREFIX):
+                summary_dict[k[len(SUMMARY_FIELD_PREFIX):]] = v
+            elif k.startswith("extras."):
+                extras[k[len("extras."):]] = v
+            else:
+                # Unknown key — preserve under extras (round-trip safety).
+                extras[k] = v
+
+        kwargs.setdefault("parameters", {})
+        kwargs.setdefault("strategy_ids", [])
+
+        return cls(
+            **kwargs,
+            summary_metrics=(
+                BacktestSummaryMetrics.from_dict(summary_dict)
+                if summary_dict else None
+            ),
+            extras=extras,
+        )
+
+
+def _safe_json(obj: Any) -> Optional[str]:
+    try:
+        return json.dumps(obj, default=str)
+    except (TypeError, ValueError):
+        return None
diff --git a/investing_algorithm_framework/domain/backtesting/backtest_utils.py b/investing_algorithm_framework/domain/backtesting/backtest_utils.py
@@ -551,28 +551,14 @@ def iter_backtests_from_directory(
 
 
 def _backtest_to_index_row(bt: Backtest, bundle_path: Optional[str] = None):
-    """Flatten a backtest's summary + identity into a single row."""
-    summary = (
-        bt.backtest_summary.to_dict() if bt.backtest_summary else {}
-    )
-    row = {
-        "algorithm_id": getattr(bt, "algorithm_id", None),
-        "tag": getattr(bt, "tag", None),
-        "risk_free_rate": getattr(bt, "risk_free_rate", None),
-        "bundle_path": bundle_path,
-        "number_of_runs": len(bt.backtest_runs or []),
-    }
-    # Include scalar summary metrics only (no nested structures).
-    for k, v in summary.items():
-        if isinstance(v, (int, float, str, bool)) or v is None:
-            row[f"summary.{k}"] = v
-    # Parameters as JSON for round-trippability without exploding columns.
-    if getattr(bt, "parameters", None):
-        try:
-            row["parameters"] = json.dumps(bt.parameters, default=str)
-        except (TypeError, ValueError):
-            row["parameters"] = None
-    return row
+    """Flatten a backtest's summary + identity into a single row.
+
+    Thin wrapper around :meth:`Backtest.index_row` for callers that
+    want the legacy flat dict shape (Parquet / SQL columns). The
+    typed :class:`BacktestIndexRow` is the authoritative contract \u2014
+    see ``docs/design/tiered-backtest-storage.md`` \u00a73.1.
+    """
+    return bt.index_row(bundle_path=bundle_path).to_flat_dict()
 
 
 def _write_index(directory_path: Union[str, Path], backtests: List[Backtest]):
diff --git a/tests/domain/backtests/test_index_row.py b/tests/domain/backtests/test_index_row.py