Skip to content

Commit 84ccf1f

Browse files
authored
Merge pull request #542 from coding-kitties/feature/iaf-index-cli
feat(cli): `iaf index` + SqliteBacktestIndex — epic #540 phase 2
2 parents 5682157 + c4d30d4 commit 84ccf1f

8 files changed

Lines changed: 791 additions & 0 deletions

File tree

docs/design/tiered-backtest-storage.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,14 @@ Row size: ~1–2 KB. 12,500 rows ≈ 25 MB. Fits comfortably in SQLite for local
8282
> existing `BacktestIndex` Parquet sidecar is now built on top of this
8383
> typed row (`BacktestIndexRow.to_flat_dict()`), making the wire
8484
> shape and the in-memory shape a single source of truth.
85+
>
86+
> **Status (epic #540 phase 2, v8.10):** the SQLite implementation
87+
> ships as `investing_algorithm_framework.services.backtest_index
88+
> .SqliteBacktestIndex`, with `iaf index <bundle-dir>` as the CLI
89+
> entry point. Every scalar field of `BacktestSummaryMetrics` is
90+
> promoted to its own SQL column (`summary_<name>`), so analysts can
91+
> filter without opening any bundle, e.g. `SELECT bundle_path FROM
92+
> backtest_index WHERE summary_sharpe_ratio > 1.0`.
8593
8694
### 3.2 Tier 2 schemas (Parquet, long format)
8795

investing_algorithm_framework/cli/cli.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,3 +320,52 @@ def migrate_backtests_cmd(
320320

321321

322322
cli.add_command(migrate_backtests_cmd)
323+
324+
325+
@click.command(name="index")
326+
@click.argument(
327+
"directory",
328+
type=click.Path(exists=True, file_okay=False, dir_okay=True),
329+
)
330+
@click.option(
331+
"--output", "-o",
332+
type=click.Path(file_okay=True, dir_okay=False),
333+
default=None,
334+
help="Path to the SQLite index file (default: <directory>/index.sqlite).",
335+
)
336+
@click.option(
337+
"--absolute-paths", is_flag=True, default=False,
338+
help="Store absolute bundle paths in the index "
339+
"(default: paths relative to <directory>, so the index stays "
340+
"portable when the folder is moved).",
341+
)
342+
@click.option(
343+
"--no-progress", is_flag=True, default=False,
344+
help="Suppress the progress bar.",
345+
)
346+
def index_cmd(directory, output, absolute_paths, no_progress):
347+
"""Build a SQLite Tier-1 index over a folder of ``.iafbt`` bundles.
348+
349+
The resulting ``index.sqlite`` file holds one row per bundle with
350+
identity / provenance / config columns and every scalar
351+
``BacktestSummaryMetrics`` field promoted to its own column, so
352+
analysts can run ad-hoc SQL queries (e.g.
353+
``SELECT bundle_path FROM backtest_index
354+
WHERE summary_sharpe_ratio > 1.0``) without opening any bundle.
355+
356+
Each bundle is opened with ``summary_only=True`` so no Parquet
357+
metric blobs are decoded \u2014 indexing 12,500 bundles is bounded by
358+
msgpack header parsing, not metric reconstruction.
359+
"""
360+
from .index_command import build_index
361+
362+
out = build_index(
363+
directory=directory,
364+
output=output,
365+
relative_paths=not absolute_paths,
366+
show_progress=not no_progress,
367+
)
368+
click.echo(f"Wrote SQLite index to {out}")
369+
370+
371+
cli.add_command(index_cmd)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""``iaf index`` CLI \u2014 build a SQLite Tier-1 index over a folder of
2+
``.iafbt`` bundles (epic #540 phase 2).
3+
4+
Walks the directory, opens each bundle with ``summary_only=True`` (no
5+
Parquet metric-blob decode), derives a :class:`BacktestIndexRow` via
6+
:meth:`Backtest.index_row`, and upserts into a
7+
:class:`SqliteBacktestIndex`.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import logging
13+
from pathlib import Path
14+
from typing import Iterable, List, Optional
15+
16+
from investing_algorithm_framework.domain import (
17+
Backtest,
18+
BUNDLE_EXT,
19+
)
20+
from investing_algorithm_framework.services.backtest_index import (
21+
SqliteBacktestIndex,
22+
)
23+
24+
logger = logging.getLogger(__name__)
25+
26+
27+
DEFAULT_INDEX_NAME = "index.sqlite"
28+
29+
30+
def _iter_bundle_paths(directory: Path) -> Iterable[Path]:
31+
"""Yield every ``*.iafbt`` file under *directory* (sorted)."""
32+
return sorted(directory.rglob(f"*{BUNDLE_EXT}"))
33+
34+
35+
def build_index(
36+
directory: str,
37+
output: Optional[str] = None,
38+
relative_paths: bool = True,
39+
show_progress: bool = False,
40+
) -> str:
41+
"""Build (or refresh) a SQLite Tier-1 index over *directory*.
42+
43+
Args:
44+
directory: Folder to scan for ``.iafbt`` bundles.
45+
output: Path to the SQLite file. Defaults to
46+
``<directory>/index.sqlite``.
47+
relative_paths: if True, store ``bundle_path`` relative to
48+
*directory* so the index file stays portable when the
49+
folder is moved/renamed.
50+
show_progress: emit a tqdm progress bar.
51+
52+
Returns:
53+
Absolute path of the SQLite file that was written.
54+
"""
55+
src = Path(directory).resolve()
56+
if not src.is_dir():
57+
raise NotADirectoryError(f"Not a directory: {src}")
58+
59+
out = Path(output).resolve() if output else src / DEFAULT_INDEX_NAME
60+
paths: List[Path] = list(_iter_bundle_paths(src))
61+
62+
pbar = None
63+
if show_progress:
64+
try:
65+
from tqdm import tqdm
66+
pbar = tqdm(total=len(paths), desc="Indexing bundles")
67+
except ImportError: # pragma: no cover - tqdm is a dep
68+
pbar = None
69+
70+
index = SqliteBacktestIndex.create(out)
71+
n_ok = 0
72+
n_err = 0
73+
try:
74+
for path in paths:
75+
try:
76+
bt = Backtest.open(str(path), summary_only=True)
77+
bundle_path = (
78+
str(path.relative_to(src)) if relative_paths
79+
else str(path)
80+
)
81+
row = bt.index_row(bundle_path=bundle_path)
82+
index.upsert(row)
83+
n_ok += 1
84+
except Exception as exc: # noqa: BLE001 \u2014 best-effort scan
85+
logger.warning("failed to index %s: %s", path, exc)
86+
n_err += 1
87+
finally:
88+
if pbar is not None:
89+
pbar.update(1)
90+
finally:
91+
if pbar is not None:
92+
pbar.close()
93+
index.close()
94+
95+
logger.info(
96+
"Indexed %d bundle(s) into %s (%d failed)", n_ok, out, n_err,
97+
)
98+
return str(out)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .sqlite_index import SqliteBacktestIndex, SCHEMA_VERSION
2+
3+
__all__ = ["SqliteBacktestIndex", "SCHEMA_VERSION"]

0 commit comments

Comments
 (0)