Skip to content

Commit b6ebab0

Browse files
committed
feat(store): iaf migrate-store + dual-store contract suite (epic #540 phase 3d)
Closes the open Phase 3 deliverables that turn the new store abstraction into something users can actually move data through: - iaf migrate-store --from <kind> --src <path> --to <kind> --dst <path> delegates to dst.copy_from(src), so it is incremental, restartable, and tier-aware: when the destination is a local-tiered store, identical OHLCV chunks are written exactly once across the destination regardless of how many bundles reference them (Phase 3c invariant). Optional --handles subset selector for partial migrations. - migrate_store() programmatic helper for in-process pipelines. - BacktestStoreContractTest: a parameterised conformance suite that runs identical scenarios against every concrete store implementation (LocalDirStore, LocalTieredStore today, future remote stores tomorrow). Catches divergence as a failing subTest with the store class name in the label. Covers Protocol + SupportsCopyFrom conformance, write/open round-trip, summary_only, exists, idempotent delete, missing-handle errors, listing, iter_index_rows, and copy_from with both full and subset handle selection. - bug fix in LazyOhlcvDict: items() and values() were inheriting the empty backing dict's iteration, so any code path that did 'for k, v in bt.ohlcv.items()' silently dropped every blob after a tiered round-trip. Now both methods walk the manifest and materialise lazily on access. Caught by the migration dedup test. Note on what is *not* in this PR: byte-identical Tier-2 -> Backtest reassembly (so .iafbt could become export-only) is intentionally deferred. The current model where the bundle is canonical and Tier-1/2/3 are derived is simpler, preserves the existing round-trip contract bit-for-bit, and is what every test in the contract suite already exercises against both stores. Targeted suite (backtest_store + backtest_index + cli): 128 / 128 passing + 26 subTests. Full non-scenario suite: 1705 / 1705 passing with no regressions from the LazyOhlcvDict fix.
1 parent 0950f1b commit b6ebab0

5 files changed

Lines changed: 509 additions & 0 deletions

File tree

investing_algorithm_framework/cli/cli.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,78 @@ def migrate_backtests_cmd(
322322
cli.add_command(migrate_backtests_cmd)
323323

324324

325+
_STORE_KINDS = ["local-dir", "local-tiered"]
326+
327+
328+
@click.command(name="migrate-store")
329+
@click.option(
330+
"--from", "src_kind",
331+
type=click.Choice(_STORE_KINDS),
332+
required=True,
333+
help="Source store kind.",
334+
)
335+
@click.option(
336+
"--src",
337+
type=click.Path(exists=True, file_okay=False, dir_okay=True),
338+
required=True,
339+
help="Path to the source store root.",
340+
)
341+
@click.option(
342+
"--to", "dst_kind",
343+
type=click.Choice(_STORE_KINDS),
344+
required=True,
345+
help="Destination store kind.",
346+
)
347+
@click.option(
348+
"--dst",
349+
type=click.Path(file_okay=False, dir_okay=True),
350+
required=True,
351+
help="Path to the destination store root (created if missing).",
352+
)
353+
@click.option(
354+
"--handles",
355+
default=None,
356+
help=(
357+
"Optional comma-separated subset of source handles to copy. "
358+
"When omitted, every handle is copied."
359+
),
360+
)
361+
def migrate_store_cmd(src_kind, src, dst_kind, dst, handles):
362+
"""Copy backtests between two :class:`BacktestStore` implementations.
363+
364+
Uses the destination's :class:`SupportsCopyFrom` capability so the
365+
operation is incremental, restartable, and tier-aware: when copying
366+
into a ``local-tiered`` store, identical OHLCV chunks are written
367+
exactly once across the entire destination, regardless of how many
368+
bundles reference them (epic #540 phase 3c).
369+
370+
Example::
371+
372+
iaf migrate-store --from local-dir --src ./bt-old \\
373+
--to local-tiered --dst ./bt-new
374+
"""
375+
from .migrate_store_command import migrate_store
376+
377+
handle_list = (
378+
[h.strip() for h in handles.split(",") if h.strip()]
379+
if handles else None
380+
)
381+
n = migrate_store(
382+
src_kind=src_kind,
383+
src_root=src,
384+
dst_kind=dst_kind,
385+
dst_root=dst,
386+
handles=handle_list,
387+
)
388+
click.echo(
389+
f"Migrated {n} backtest(s) from {src_kind}:{src} "
390+
f"to {dst_kind}:{dst}"
391+
)
392+
393+
394+
cli.add_command(migrate_store_cmd)
395+
396+
325397
@click.command(name="index")
326398
@click.argument(
327399
"directory",
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""``iaf migrate-store`` — copy backtests between :class:`BacktestStore`
2+
implementations using the :class:`SupportsCopyFrom` capability.
3+
4+
Phase 3d of epic #540.
5+
"""
6+
from __future__ import annotations
7+
8+
from pathlib import Path
9+
from typing import Optional
10+
11+
from investing_algorithm_framework.services.backtest_store import (
12+
BacktestStore,
13+
LocalDirStore,
14+
LocalTieredStore,
15+
SupportsCopyFrom,
16+
)
17+
18+
19+
_STORE_FACTORIES = {
20+
"local-dir": lambda root: LocalDirStore(root),
21+
"local-tiered": lambda root: LocalTieredStore(root),
22+
}
23+
24+
25+
def _build_store(kind: str, root: str) -> BacktestStore:
26+
if kind not in _STORE_FACTORIES:
27+
raise ValueError(
28+
f"unknown store kind {kind!r}; expected one of "
29+
f"{sorted(_STORE_FACTORIES)}"
30+
)
31+
return _STORE_FACTORIES[kind](root)
32+
33+
34+
def migrate_store(
35+
*,
36+
src_kind: str,
37+
src_root: str,
38+
dst_kind: str,
39+
dst_root: str,
40+
handles: Optional[list] = None,
41+
) -> int:
42+
"""Copy backtests from one store to another.
43+
44+
Args:
45+
src_kind: One of ``"local-dir"``, ``"local-tiered"``.
46+
src_root: Path to the source store root.
47+
dst_kind: Same vocabulary as ``src_kind``.
48+
dst_root: Path to the destination store root.
49+
handles: Optional subset of source handles to copy. When
50+
``None`` every handle in the source is copied.
51+
52+
Returns:
53+
The number of backtests written to the destination.
54+
55+
Raises:
56+
TypeError: When the destination store does not implement
57+
:class:`SupportsCopyFrom`.
58+
"""
59+
src = _build_store(src_kind, src_root)
60+
Path(dst_root).mkdir(parents=True, exist_ok=True)
61+
dst = _build_store(dst_kind, dst_root)
62+
if not isinstance(dst, SupportsCopyFrom):
63+
raise TypeError(
64+
f"destination store {dst_kind!r} does not implement "
65+
f"SupportsCopyFrom"
66+
)
67+
return dst.copy_from(src, handles=handles)

investing_algorithm_framework/domain/backtesting/bundle.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,14 @@ def __len__(self) -> int:
318318
def keys(self): # type: ignore[override]
319319
return self._manifest.keys()
320320

321+
def values(self): # type: ignore[override]
322+
for k in self._manifest:
323+
yield self[k]
324+
325+
def items(self): # type: ignore[override]
326+
for k in self._manifest:
327+
yield k, self[k]
328+
321329
def __getitem__(self, key: str):
322330
if key in self._cache:
323331
return self._cache[key]
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
"""Tests for ``iaf migrate-store`` (epic #540 phase 3d)."""
2+
from __future__ import annotations
3+
4+
import os
5+
import shutil
6+
import tempfile
7+
from copy import deepcopy
8+
from pathlib import Path
9+
from unittest import TestCase
10+
11+
import pandas as pd
12+
from click.testing import CliRunner
13+
14+
from investing_algorithm_framework.cli.cli import migrate_store_cmd
15+
from investing_algorithm_framework.cli.migrate_store_command import (
16+
migrate_store,
17+
)
18+
from investing_algorithm_framework.domain import Backtest
19+
from investing_algorithm_framework.services.backtest_store import (
20+
LocalDirStore,
21+
LocalTieredStore,
22+
)
23+
24+
25+
_FIXTURE = os.path.join(
26+
os.path.dirname(os.path.dirname(__file__)),
27+
"resources",
28+
"backtest_reports_for_testing",
29+
"test_algorithm_backtest",
30+
)
31+
32+
33+
def _make_ohlcv(symbol: str, *, n: int = 4, base: float = 100.0) -> dict:
34+
idx = pd.date_range("2024-01-01", periods=n, freq="h")
35+
df = pd.DataFrame(
36+
{
37+
"Datetime": idx,
38+
"Open": [base + i for i in range(n)],
39+
"High": [base + i + 0.5 for i in range(n)],
40+
"Low": [base + i - 0.5 for i in range(n)],
41+
"Close": [base + i + 0.25 for i in range(n)],
42+
"Volume": [1000 + i for i in range(n)],
43+
}
44+
)
45+
return {f"{symbol}:1h": df}
46+
47+
48+
class TestMigrateStore(TestCase):
49+
50+
@classmethod
51+
def setUpClass(cls):
52+
cls.fixture = Backtest.open(_FIXTURE)
53+
54+
def setUp(self):
55+
self.src_dir = tempfile.mkdtemp()
56+
self.dst_dir = tempfile.mkdtemp()
57+
58+
def tearDown(self):
59+
shutil.rmtree(self.src_dir, ignore_errors=True)
60+
shutil.rmtree(self.dst_dir, ignore_errors=True)
61+
62+
# ------------------------------------------------------------------
63+
# Programmatic API
64+
# ------------------------------------------------------------------
65+
def test_local_dir_to_local_tiered(self):
66+
src = LocalDirStore(self.src_dir)
67+
src.write(self.fixture, handle="a")
68+
src.write(self.fixture, handle="b")
69+
70+
n = migrate_store(
71+
src_kind="local-dir", src_root=self.src_dir,
72+
dst_kind="local-tiered", dst_root=self.dst_dir,
73+
)
74+
self.assertEqual(n, 2)
75+
76+
dst = LocalTieredStore(self.dst_dir)
77+
self.assertEqual(len(dst), 2)
78+
self.assertTrue(dst.exists("a"))
79+
self.assertTrue(dst.exists("b"))
80+
# Tier-1 was populated.
81+
self.assertTrue((Path(self.dst_dir) / "index.sqlite").is_file())
82+
83+
def test_handles_subset(self):
84+
src = LocalDirStore(self.src_dir)
85+
src.write(self.fixture, handle="a")
86+
src.write(self.fixture, handle="b")
87+
src.write(self.fixture, handle="c")
88+
89+
n = migrate_store(
90+
src_kind="local-dir", src_root=self.src_dir,
91+
dst_kind="local-tiered", dst_root=self.dst_dir,
92+
handles=["a", "c"],
93+
)
94+
self.assertEqual(n, 2)
95+
dst = LocalTieredStore(self.dst_dir)
96+
self.assertSetEqual(
97+
set(dst.iter_handles()), {"a", "c"},
98+
)
99+
100+
def test_ohlcv_dedup_during_migration(self):
101+
# Source is a tiered store (preserves OHLCV); destination is
102+
# a fresh tiered store. After migration, identical OHLCV must
103+
# collapse to a single chunk on the destination too.
104+
src = LocalTieredStore(self.src_dir)
105+
bt1 = deepcopy(self.fixture)
106+
bt1.ohlcv = _make_ohlcv("BTC/EUR")
107+
bt2 = deepcopy(self.fixture)
108+
bt2.ohlcv = _make_ohlcv("BTC/EUR")
109+
src.write(bt1, handle="a")
110+
src.write(bt2, handle="b")
111+
# Sanity: src already deduped to one chunk.
112+
self.assertEqual(src.ohlcv_stats()["stored_blobs"], 1)
113+
114+
n = migrate_store(
115+
src_kind="local-tiered", src_root=self.src_dir,
116+
dst_kind="local-tiered", dst_root=self.dst_dir,
117+
)
118+
self.assertEqual(n, 2)
119+
120+
# Phase 3c invariant: identical OHLCV stored once even though
121+
# two bundles reference it.
122+
dst = LocalTieredStore(self.dst_dir)
123+
self.assertEqual(dst.ohlcv_stats()["stored_blobs"], 1)
124+
self.assertEqual(dst.ohlcv_stats()["referenced_blobs"], 1)
125+
126+
def test_unknown_kind_raises(self):
127+
with self.assertRaises(ValueError):
128+
migrate_store(
129+
src_kind="local-dir", src_root=self.src_dir,
130+
dst_kind="bogus", dst_root=self.dst_dir,
131+
)
132+
133+
# ------------------------------------------------------------------
134+
# CLI
135+
# ------------------------------------------------------------------
136+
def test_cli_round_trip(self):
137+
src = LocalDirStore(self.src_dir)
138+
src.write(self.fixture, handle="cli_a")
139+
src.write(self.fixture, handle="cli_b")
140+
141+
runner = CliRunner()
142+
result = runner.invoke(
143+
migrate_store_cmd,
144+
[
145+
"--from", "local-dir", "--src", self.src_dir,
146+
"--to", "local-tiered", "--dst", self.dst_dir,
147+
],
148+
)
149+
self.assertEqual(result.exit_code, 0, result.output)
150+
self.assertIn("Migrated 2 backtest(s)", result.output)
151+
152+
dst = LocalTieredStore(self.dst_dir)
153+
self.assertEqual(len(dst), 2)
154+
155+
def test_cli_handles_subset(self):
156+
src = LocalDirStore(self.src_dir)
157+
for h in ("h1", "h2", "h3"):
158+
src.write(self.fixture, handle=h)
159+
160+
runner = CliRunner()
161+
result = runner.invoke(
162+
migrate_store_cmd,
163+
[
164+
"--from", "local-dir", "--src", self.src_dir,
165+
"--to", "local-tiered", "--dst", self.dst_dir,
166+
"--handles", "h1,h3",
167+
],
168+
)
169+
self.assertEqual(result.exit_code, 0, result.output)
170+
self.assertIn("Migrated 2 backtest(s)", result.output)
171+
dst = LocalTieredStore(self.dst_dir)
172+
self.assertSetEqual(set(dst.iter_handles()), {"h1", "h3"})
173+
174+
def test_cli_rejects_unknown_kind(self):
175+
runner = CliRunner()
176+
result = runner.invoke(
177+
migrate_store_cmd,
178+
[
179+
"--from", "bogus", "--src", self.src_dir,
180+
"--to", "local-tiered", "--dst", self.dst_dir,
181+
],
182+
)
183+
# Click choice validation rejects with non-zero exit.
184+
self.assertNotEqual(result.exit_code, 0)

0 commit comments

Comments
 (0)