diff --git a/investing_algorithm_framework/services/backtest_store/__init__.py b/investing_algorithm_framework/services/backtest_store/__init__.py new file mode 100644 index 00000000..27268927 --- /dev/null +++ b/investing_algorithm_framework/services/backtest_store/__init__.py @@ -0,0 +1,30 @@ +"""Storage abstraction for backtests (epic #540 phase 3). + +A :class:`BacktestStore` is the single seam between the framework and +*where* backtest results actually live. Phase 3a ships the Protocol +and a thin :class:`LocalDirStore` adapter over the existing ``.iafbt`` +layout, so every consumer (HTML report, ``iaf list/rank``, the MCP +server, future ``FinterionStore``) can be written against one +interface. + +See ``docs/design/tiered-backtest-storage.md`` §7 and the Phase 3 +plan in epic #540 for the full architecture. +""" + +from .base import ( + BacktestStore, + StoreHandle, + StoreError, + StoreHandleNotFoundError, + SupportsCopyFrom, +) +from .local_dir_store import LocalDirStore + +__all__ = [ + "BacktestStore", + "StoreHandle", + "StoreError", + "StoreHandleNotFoundError", + "SupportsCopyFrom", + "LocalDirStore", +] diff --git a/investing_algorithm_framework/services/backtest_store/base.py b/investing_algorithm_framework/services/backtest_store/base.py new file mode 100644 index 00000000..9faec300 --- /dev/null +++ b/investing_algorithm_framework/services/backtest_store/base.py @@ -0,0 +1,157 @@ +"""``BacktestStore`` Protocol + capability mixins (epic #540 phase 3a). + +A :class:`BacktestStore` decouples *where* a backtest is persisted +from the rest of the framework. Three concrete implementations are +planned: + +* :class:`LocalDirStore` (this PR) — directory of ``.iafbt`` bundles. + Adapter over today's :meth:`Backtest.save_bundle` / + :meth:`Backtest.open` so existing layouts keep working unchanged. +* ``LocalTieredStore`` (Phase 3b/3c) — SQLite Tier-1 + per-project + Parquet datasets (Tier-2) + content-addressed chunks (Tier-3). +* ``FinterionStore`` (closed-source) — HTTP adapter over Finterion's + hosted tiered backend, with the optional :class:`SupportsRelations` + capability for strategy-version / report linkage. + +The Protocol stays deliberately small. Capabilities that not every +store can or should implement (efficient bulk migration, relational +graph queries, …) are declared as separate Protocols so callers can +``isinstance(store, SupportsCopyFrom)``-test for them at runtime. +""" + +from __future__ import annotations + +from typing import ( + Iterable, + Iterator, + Optional, + Protocol, + runtime_checkable, +) + +from investing_algorithm_framework.domain import ( + Backtest, + BacktestIndexRow, +) + + +# A handle is an opaque, store-scoped, stable string identifier for a +# single backtest record. For ``LocalDirStore`` it is the bundle path +# relative to the store root; for ``LocalTieredStore`` it will be the +# ``run_id`` (uuid7); for ``FinterionStore`` a remote URI. Callers +# should treat handles as opaque tokens — never parse them. +StoreHandle = str + + +class StoreError(Exception): + """Base class for all :class:`BacktestStore` errors.""" + + +class StoreHandleNotFoundError(StoreError, KeyError): + """Raised when an operation references a handle that does not exist.""" + + +@runtime_checkable +class BacktestStore(Protocol): + """Minimal write/read/list/delete contract for backtest storage. + + All implementations must be safe for concurrent reads. Concurrent + writes are implementation-specific (``LocalDirStore`` allows + them; tiered stores will document their guarantees). + + The contract intentionally mirrors today's + :meth:`Backtest.save_bundle` / :meth:`Backtest.open` semantics so + :class:`LocalDirStore` is a 1:1 adapter and existing tests keep + passing without behavioural drift. + """ + + def write( + self, + backtest: Backtest, + *, + handle: Optional[StoreHandle] = None, + ) -> StoreHandle: + """Persist *backtest* and return its handle. + + If *handle* is supplied the store should write to (or replace) + that exact location; otherwise the store picks a deterministic + handle from the backtest's identity (e.g. ``algorithm_id`` for + local stores, ``run_id`` for tiered stores). + """ + ... + + def open( + self, + handle: StoreHandle, + *, + summary_only: bool = False, + ) -> Backtest: + """Materialise the backtest at *handle*. + + ``summary_only`` mirrors :meth:`Backtest.open`: when True the + store should avoid decoding heavy time-series payloads (the + Tier-2 Parquet bodies, in tiered terminology). + """ + ... + + def exists(self, handle: StoreHandle) -> bool: + """Return True if *handle* refers to a stored backtest.""" + ... + + def delete(self, handle: StoreHandle) -> None: + """Remove the backtest at *handle*. No-op if absent.""" + ... + + def iter_handles(self) -> Iterator[StoreHandle]: + """Yield every handle currently in the store, in stable order.""" + ... + + def iter_index_rows(self) -> Iterator[BacktestIndexRow]: + """Yield a :class:`BacktestIndexRow` for every stored backtest. + + Implementations that have a Tier-1 index should serve this + from the index (no bulk decode); :class:`LocalDirStore` + falls back to ``Backtest.open(..., summary_only=True)`` per + bundle when no sidecar index is present. + """ + ... + + def __len__(self) -> int: + """Number of backtests currently in the store.""" + ... + + +# --------------------------------------------------------------------------- +# Optional capabilities — declared as separate Protocols so callers can +# feature-test with isinstance(store, SupportsXxx). +# --------------------------------------------------------------------------- + + +@runtime_checkable +class SupportsCopyFrom(Protocol): + """Stores that can ingest from another :class:`BacktestStore`. + + Used by ``iaf migrate-store`` (Phase 3d) to move bundles between + a :class:`LocalDirStore` and a ``LocalTieredStore`` (or to push + to ``FinterionStore``). Implementations may optimise — e.g. a + tiered store can dedup chunks during ingest — but the default + fallback is a per-handle ``write(src.open(h))`` loop. + """ + + def copy_from( + self, + src: "BacktestStore", + *, + handles: Optional[Iterable[StoreHandle]] = None, + ) -> int: + """Copy backtests from *src* into this store. + + Args: + src: source store to read from. + handles: optional subset of handles to copy. If None, all + handles in *src* are copied. + + Returns: + Number of backtests successfully copied. + """ + ... diff --git a/investing_algorithm_framework/services/backtest_store/local_dir_store.py b/investing_algorithm_framework/services/backtest_store/local_dir_store.py new file mode 100644 index 00000000..d1208a3b --- /dev/null +++ b/investing_algorithm_framework/services/backtest_store/local_dir_store.py @@ -0,0 +1,270 @@ +"""``LocalDirStore`` — directory-of-bundles :class:`BacktestStore`. + +Thin adapter over the existing ``.iafbt`` storage layout. Every +backtest is one bundle file under the store's *root* directory. +Optionally maintains a sidecar :class:`SqliteBacktestIndex` to serve +:meth:`iter_index_rows` without re-decoding bundles on every call. + +This is the Phase-3a default; it preserves today's on-disk layout +exactly so existing consumers (HTML report, ``iaf list/rank``, the +MCP server) work unchanged. +""" + +from __future__ import annotations + +import logging +import os +import shutil +from pathlib import Path +from typing import Iterable, Iterator, Optional, Union + +from investing_algorithm_framework.domain import ( + Backtest, + BacktestIndexRow, + BUNDLE_EXT, +) +from investing_algorithm_framework.services.backtest_index import ( + SqliteBacktestIndex, +) + +from .base import ( + BacktestStore, + StoreError, + StoreHandle, + StoreHandleNotFoundError, +) + +logger = logging.getLogger(__name__) + + +SIDECAR_INDEX_NAME = "index.sqlite" + + +class LocalDirStore(BacktestStore): + """A directory of ``.iafbt`` bundles, addressable by relative path. + + Handles are bundle paths *relative to the store root* — e.g. + ``"my_strategy.iafbt"`` or ``"sweep_a/run_03.iafbt"``. Relative + handles keep the store portable: moving the root directory does + not invalidate any handle. + + The store optionally maintains a sidecar + ``/index.sqlite`` (built lazily on the first call to + :meth:`iter_index_rows` when ``use_index=True``) so listing / + ranking does not have to re-open every bundle. + """ + + def __init__( + self, + root: Union[str, Path], + *, + use_index: bool = True, + ) -> None: + """ + Args: + root: directory that holds (or will hold) the bundles. + Created if it does not exist. + use_index: when True, :meth:`iter_index_rows` is served + from a sidecar :class:`SqliteBacktestIndex`. Disable + for one-shot scripts that never list. + """ + self.root = Path(root).resolve() + self.root.mkdir(parents=True, exist_ok=True) + self._use_index = use_index + + # ------------------------------------------------------------------ + # internal helpers + # ------------------------------------------------------------------ + def _resolve(self, handle: StoreHandle) -> Path: + """Map *handle* to an absolute path under :attr:`root`. + + Rejects handles that escape the store root (path traversal). + """ + candidate = (self.root / handle).resolve() + try: + candidate.relative_to(self.root) + except ValueError as exc: + raise StoreError( + f"handle {handle!r} resolves outside the store root " + f"{self.root}" + ) from exc + return candidate + + @staticmethod + def _ensure_bundle_suffix(handle: StoreHandle) -> StoreHandle: + if handle.endswith(BUNDLE_EXT): + return handle + return handle + BUNDLE_EXT + + def _default_handle(self, backtest: Backtest) -> StoreHandle: + """Pick a deterministic handle when the caller didn't supply one. + + Uses ``algorithm_id`` if set, otherwise falls back to a + timestamp-derived name. The choice is deliberate: the + Phase 3a contract is "behaves like ``Backtest.save_bundle``"; + callers who need stronger identity (``run_id``, content + hash, …) should pass an explicit handle. + """ + if backtest.algorithm_id: + stem = str(backtest.algorithm_id) + else: + from datetime import datetime, timezone + stem = "backtest_" + datetime.now(timezone.utc).strftime( + "%Y%m%dT%H%M%S%f" + ) + return self._ensure_bundle_suffix(stem) + + def _iter_bundle_paths(self) -> Iterator[Path]: + for p in sorted(self.root.rglob(f"*{BUNDLE_EXT}")): + if p.is_file(): + yield p + + # ------------------------------------------------------------------ + # BacktestStore API + # ------------------------------------------------------------------ + def write( + self, + backtest: Backtest, + *, + handle: Optional[StoreHandle] = None, + ) -> StoreHandle: + if handle is None: + handle = self._default_handle(backtest) + else: + handle = self._ensure_bundle_suffix(handle) + target = self._resolve(handle) + target.parent.mkdir(parents=True, exist_ok=True) + backtest.save_bundle(target) + return handle + + def open( + self, + handle: StoreHandle, + *, + summary_only: bool = False, + ) -> Backtest: + handle = self._ensure_bundle_suffix(handle) + target = self._resolve(handle) + if not target.is_file(): + raise StoreHandleNotFoundError(handle) + return Backtest.open(str(target), summary_only=summary_only) + + def exists(self, handle: StoreHandle) -> bool: + try: + target = self._resolve(self._ensure_bundle_suffix(handle)) + except StoreError: + return False + return target.is_file() + + def delete(self, handle: StoreHandle) -> None: + handle = self._ensure_bundle_suffix(handle) + try: + target = self._resolve(handle) + except StoreError: + return + if target.is_file(): + target.unlink() + elif target.is_dir(): + # Shouldn't happen for .iafbt, but tolerate legacy + # directory-style bundles for symmetry. + shutil.rmtree(target) + + def iter_handles(self) -> Iterator[StoreHandle]: + for p in self._iter_bundle_paths(): + yield str(p.relative_to(self.root)) + + def __len__(self) -> int: + return sum(1 for _ in self._iter_bundle_paths()) + + def __contains__(self, handle: object) -> bool: + if not isinstance(handle, str): + return False + return self.exists(handle) + + # ------------------------------------------------------------------ + # iter_index_rows — backed by a sidecar SqliteBacktestIndex when + # ``use_index=True`` (the default), otherwise a per-call decode. + # ------------------------------------------------------------------ + def iter_index_rows(self) -> Iterator[BacktestIndexRow]: + if not self._use_index: + yield from self._iter_index_rows_decoded() + return + + index_path = self.root / SIDECAR_INDEX_NAME + # Build or refresh incrementally — same machinery as `iaf index`. + index = ( + SqliteBacktestIndex.open(index_path) + if index_path.is_file() + else SqliteBacktestIndex.create(index_path) + ) + try: + for path in self._iter_bundle_paths(): + stat = path.stat() + rel = str(path.relative_to(self.root)) + if not index.is_up_to_date( + rel, stat.st_mtime_ns, stat.st_size, + ): + try: + bt = Backtest.open(str(path), summary_only=True) + row = bt.index_row(bundle_path=rel) + index.upsert( + row, + bundle_mtime_ns=stat.st_mtime_ns, + bundle_size=stat.st_size, + ) + except Exception as exc: # pragma: no cover - logged + logger.warning( + "Skipping bundle %s while building index: %s", + path, exc, + ) + continue + yield from index.iter_rows() + finally: + index.close() + + def _iter_index_rows_decoded(self) -> Iterator[BacktestIndexRow]: + for path in self._iter_bundle_paths(): + rel = str(path.relative_to(self.root)) + try: + bt = Backtest.open(str(path), summary_only=True) + except Exception as exc: # pragma: no cover - logged + logger.warning( + "Skipping bundle %s while listing: %s", path, exc, + ) + continue + yield bt.index_row(bundle_path=rel) + + # ------------------------------------------------------------------ + # SupportsCopyFrom + # ------------------------------------------------------------------ + def copy_from( + self, + src: BacktestStore, + *, + handles: Optional[Iterable[StoreHandle]] = None, + ) -> int: + n = 0 + chosen = list(handles) if handles is not None else list( + src.iter_handles() + ) + for h in chosen: + try: + bt = src.open(h) + except StoreHandleNotFoundError: + logger.warning("copy_from: handle %r not in source", h) + continue + self.write(bt, handle=h) + n += 1 + return n + + # ------------------------------------------------------------------ + # Convenience + # ------------------------------------------------------------------ + def __repr__(self) -> str: + return ( + f"LocalDirStore(root={str(self.root)!r}, " + f"use_index={self._use_index})" + ) + + def __fspath__(self) -> str: + return os.fspath(self.root) diff --git a/tests/services/backtest_store/__init__.py b/tests/services/backtest_store/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/services/backtest_store/test_local_dir_store.py b/tests/services/backtest_store/test_local_dir_store.py new file mode 100644 index 00000000..4f091609 --- /dev/null +++ b/tests/services/backtest_store/test_local_dir_store.py @@ -0,0 +1,186 @@ +"""Tests for the :class:`BacktestStore` Protocol and :class:`LocalDirStore` +adapter (epic #540 phase 3a).""" +from __future__ import annotations + +import os +import shutil +import tempfile +from pathlib import Path +from unittest import TestCase + +from investing_algorithm_framework.domain import ( + Backtest, + BacktestIndexRow, + BUNDLE_EXT, +) +from investing_algorithm_framework.services.backtest_store import ( + BacktestStore, + LocalDirStore, + StoreError, + StoreHandleNotFoundError, + SupportsCopyFrom, +) + + +_FIXTURE = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "resources", + "backtest_reports_for_testing", + "test_algorithm_backtest", +) + + +class TestLocalDirStore(TestCase): + + @classmethod + def setUpClass(cls): + cls.fixture = Backtest.open(_FIXTURE) + + def setUp(self): + self.tmp = tempfile.mkdtemp() + self.store = LocalDirStore(self.tmp) + + def tearDown(self): + shutil.rmtree(self.tmp, ignore_errors=True) + + # ------------------------------------------------------------------ + # Protocol conformance + # ------------------------------------------------------------------ + def test_implements_protocol(self): + self.assertIsInstance(self.store, BacktestStore) + + def test_implements_supports_copy_from(self): + self.assertIsInstance(self.store, SupportsCopyFrom) + + # ------------------------------------------------------------------ + # write / open / exists / delete + # ------------------------------------------------------------------ + def test_write_returns_handle_and_creates_bundle_file(self): + handle = self.store.write(self.fixture, handle="run_a") + self.assertEqual(handle, "run_a" + BUNDLE_EXT) + self.assertTrue((Path(self.tmp) / handle).is_file()) + + def test_write_default_handle_uses_algorithm_id_when_set(self): + handle = self.store.write(self.fixture) + # Fixture has an algorithm_id; the handle must derive from it. + self.assertTrue(handle.endswith(BUNDLE_EXT)) + if self.fixture.algorithm_id: + self.assertIn(str(self.fixture.algorithm_id), handle) + + def test_open_round_trip_matches_algorithm_id(self): + handle = self.store.write(self.fixture, handle="rt") + loaded = self.store.open(handle) + self.assertEqual(loaded.algorithm_id, self.fixture.algorithm_id) + + def test_open_summary_only_skips_bulk_decode(self): + handle = self.store.write(self.fixture, handle="so") + loaded = self.store.open(handle, summary_only=True) + self.assertEqual(loaded.algorithm_id, self.fixture.algorithm_id) + + def test_exists_true_after_write_false_after_delete(self): + handle = self.store.write(self.fixture, handle="ed") + self.assertTrue(self.store.exists(handle)) + self.assertIn(handle, self.store) + self.store.delete(handle) + self.assertFalse(self.store.exists(handle)) + self.assertNotIn(handle, self.store) + + def test_open_missing_handle_raises(self): + with self.assertRaises(StoreHandleNotFoundError): + self.store.open("nope.iafbt") + + def test_delete_missing_handle_is_noop(self): + # Must not raise. + self.store.delete("never_existed.iafbt") + + # ------------------------------------------------------------------ + # iter_handles / __len__ / iter_index_rows + # ------------------------------------------------------------------ + def test_iter_handles_and_len(self): + self.assertEqual(len(self.store), 0) + self.store.write(self.fixture, handle="a") + self.store.write(self.fixture, handle="sub/b") + self.assertEqual(len(self.store), 2) + handles = sorted(self.store.iter_handles()) + self.assertIn("a" + BUNDLE_EXT, handles) + self.assertIn(os.path.join("sub", "b" + BUNDLE_EXT), handles) + + def test_iter_index_rows_yields_typed_rows_with_relative_paths(self): + self.store.write(self.fixture, handle="r1") + self.store.write(self.fixture, handle="r2") + rows = list(self.store.iter_index_rows()) + self.assertEqual(len(rows), 2) + for row in rows: + self.assertIsInstance(row, BacktestIndexRow) + self.assertIsNotNone(row.bundle_path) + # bundle_path is relative to root, so it must not be absolute. + self.assertFalse(os.path.isabs(row.bundle_path)) + + def test_iter_index_rows_creates_sidecar_index(self): + self.store.write(self.fixture, handle="cached") + list(self.store.iter_index_rows()) + sidecar = Path(self.tmp) / "index.sqlite" + self.assertTrue(sidecar.is_file()) + + def test_iter_index_rows_without_index_works(self): + nostore = LocalDirStore(self.tmp, use_index=False) + nostore.write(self.fixture, handle="nox") + rows = list(nostore.iter_index_rows()) + self.assertEqual(len(rows), 1) + self.assertFalse((Path(self.tmp) / "index.sqlite").is_file()) + + # ------------------------------------------------------------------ + # security: handle escape attempts + # ------------------------------------------------------------------ + def test_handle_escape_rejected(self): + with self.assertRaises(StoreError): + self.store.write(self.fixture, handle="../escape") + + def test_absolute_handle_rejected(self): + # Absolute paths get re-anchored by Path() to the abs root and + # then escape the store root, so they must be rejected. + with self.assertRaises(StoreError): + self.store.write(self.fixture, handle="/etc/passwd") + + # ------------------------------------------------------------------ + # SupportsCopyFrom + # ------------------------------------------------------------------ + def test_copy_from_other_store(self): + src = LocalDirStore(tempfile.mkdtemp()) + try: + src.write(self.fixture, handle="x") + src.write(self.fixture, handle="y") + n = self.store.copy_from(src) + self.assertEqual(n, 2) + self.assertEqual(len(self.store), 2) + self.assertTrue(self.store.exists("x")) + self.assertTrue(self.store.exists("y")) + finally: + shutil.rmtree(src.root, ignore_errors=True) + + def test_copy_from_with_handle_subset(self): + src = LocalDirStore(tempfile.mkdtemp()) + try: + src.write(self.fixture, handle="keep") + src.write(self.fixture, handle="skip") + n = self.store.copy_from(src, handles=["keep"]) + self.assertEqual(n, 1) + self.assertTrue(self.store.exists("keep")) + self.assertFalse(self.store.exists("skip")) + finally: + shutil.rmtree(src.root, ignore_errors=True) + + # ------------------------------------------------------------------ + # ergonomics + # ------------------------------------------------------------------ + def test_handle_normalization_adds_bundle_suffix(self): + # Caller may omit the .iafbt suffix and the store adds it. + self.store.write(self.fixture, handle="no_suffix") + self.assertTrue(self.store.exists("no_suffix")) + self.assertTrue(self.store.exists("no_suffix" + BUNDLE_EXT)) + + def test_root_is_created_if_missing(self): + new_root = os.path.join(self.tmp, "deep", "nested", "store") + s = LocalDirStore(new_root) + self.assertTrue(Path(new_root).is_dir()) + self.assertEqual(len(s), 0)