Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/design/tiered-backtest-storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ Row size: ~1–2 KB. 12,500 rows ≈ 25 MB. Fits comfortably in SQLite for local
> existing `BacktestIndex` Parquet sidecar is now built on top of this
> typed row (`BacktestIndexRow.to_flat_dict()`), making the wire
> shape and the in-memory shape a single source of truth.
>
> **Status (epic #540 phase 2, v8.10):** the SQLite implementation
> ships as `investing_algorithm_framework.services.backtest_index
> .SqliteBacktestIndex`, with `iaf index <bundle-dir>` as the CLI
> entry point. Every scalar field of `BacktestSummaryMetrics` is
> promoted to its own SQL column (`summary_<name>`), so analysts can
> filter without opening any bundle, e.g. `SELECT bundle_path FROM
> backtest_index WHERE summary_sharpe_ratio > 1.0`.

### 3.2 Tier 2 schemas (Parquet, long format)

Expand Down
49 changes: 49 additions & 0 deletions investing_algorithm_framework/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,52 @@ def migrate_backtests_cmd(


cli.add_command(migrate_backtests_cmd)


@click.command(name="index")
@click.argument(
"directory",
type=click.Path(exists=True, file_okay=False, dir_okay=True),
)
@click.option(
"--output", "-o",
type=click.Path(file_okay=True, dir_okay=False),
default=None,
help="Path to the SQLite index file (default: <directory>/index.sqlite).",
)
@click.option(
"--absolute-paths", is_flag=True, default=False,
help="Store absolute bundle paths in the index "
"(default: paths relative to <directory>, so the index stays "
"portable when the folder is moved).",
)
@click.option(
"--no-progress", is_flag=True, default=False,
help="Suppress the progress bar.",
)
def index_cmd(directory, output, absolute_paths, no_progress):
"""Build a SQLite Tier-1 index over a folder of ``.iafbt`` bundles.

The resulting ``index.sqlite`` file holds one row per bundle with
identity / provenance / config columns and every scalar
``BacktestSummaryMetrics`` field promoted to its own column, so
analysts can run ad-hoc SQL queries (e.g.
``SELECT bundle_path FROM backtest_index
WHERE summary_sharpe_ratio > 1.0``) without opening any bundle.

Each bundle is opened with ``summary_only=True`` so no Parquet
metric blobs are decoded \u2014 indexing 12,500 bundles is bounded by
msgpack header parsing, not metric reconstruction.
"""
from .index_command import build_index

out = build_index(
directory=directory,
output=output,
relative_paths=not absolute_paths,
show_progress=not no_progress,
)
click.echo(f"Wrote SQLite index to {out}")


cli.add_command(index_cmd)
98 changes: 98 additions & 0 deletions investing_algorithm_framework/cli/index_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""``iaf index`` CLI \u2014 build a SQLite Tier-1 index over a folder of
``.iafbt`` bundles (epic #540 phase 2).

Walks the directory, opens each bundle with ``summary_only=True`` (no
Parquet metric-blob decode), derives a :class:`BacktestIndexRow` via
:meth:`Backtest.index_row`, and upserts into a
:class:`SqliteBacktestIndex`.
"""

from __future__ import annotations

import logging
from pathlib import Path
from typing import Iterable, List, Optional

from investing_algorithm_framework.domain import (
Backtest,
BUNDLE_EXT,
)
from investing_algorithm_framework.services.backtest_index import (
SqliteBacktestIndex,
)

logger = logging.getLogger(__name__)


DEFAULT_INDEX_NAME = "index.sqlite"


def _iter_bundle_paths(directory: Path) -> Iterable[Path]:
"""Yield every ``*.iafbt`` file under *directory* (sorted)."""
return sorted(directory.rglob(f"*{BUNDLE_EXT}"))


def build_index(
directory: str,
output: Optional[str] = None,
relative_paths: bool = True,
show_progress: bool = False,
) -> str:
"""Build (or refresh) a SQLite Tier-1 index over *directory*.

Args:
directory: Folder to scan for ``.iafbt`` bundles.
output: Path to the SQLite file. Defaults to
``<directory>/index.sqlite``.
relative_paths: if True, store ``bundle_path`` relative to
*directory* so the index file stays portable when the
folder is moved/renamed.
show_progress: emit a tqdm progress bar.

Returns:
Absolute path of the SQLite file that was written.
"""
src = Path(directory).resolve()
if not src.is_dir():
raise NotADirectoryError(f"Not a directory: {src}")

out = Path(output).resolve() if output else src / DEFAULT_INDEX_NAME
paths: List[Path] = list(_iter_bundle_paths(src))

pbar = None
if show_progress:
try:
from tqdm import tqdm
pbar = tqdm(total=len(paths), desc="Indexing bundles")
except ImportError: # pragma: no cover - tqdm is a dep
pbar = None

index = SqliteBacktestIndex.create(out)
n_ok = 0
n_err = 0
try:
for path in paths:
try:
bt = Backtest.open(str(path), summary_only=True)
bundle_path = (
str(path.relative_to(src)) if relative_paths
else str(path)
)
row = bt.index_row(bundle_path=bundle_path)
index.upsert(row)
n_ok += 1
except Exception as exc: # noqa: BLE001 \u2014 best-effort scan
logger.warning("failed to index %s: %s", path, exc)
n_err += 1
finally:
if pbar is not None:
pbar.update(1)
finally:
if pbar is not None:
pbar.close()
index.close()

logger.info(
"Indexed %d bundle(s) into %s (%d failed)", n_ok, out, n_err,
)
return str(out)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .sqlite_index import SqliteBacktestIndex, SCHEMA_VERSION

__all__ = ["SqliteBacktestIndex", "SCHEMA_VERSION"]
Loading
Loading