From 1e9e7b7a5b829beea127ed9a9bfcd62d37759942 Mon Sep 17 00:00:00 2001 From: Alessandro Colace Date: Tue, 19 May 2026 08:56:59 +0200 Subject: [PATCH] Test infra follow-ups #2: library-based invariants + helpers.py coverage + cov in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the test-infra follow-ups deferred in #140. All four items below land in this single PR. Coverage 78% -> 92%, tests 32 -> 49. ## 1. `test_exchange_market_one_to_one` now uses the library After #140, `tests/test_equities.py` instantiates `fd.Equities(use_local_location=True)` and the conftest regenerates compression from CSV at import time. The library output is therefore in sync with `database/equities.csv` for the test session, so the consistency invariant can be expressed in the same idiom as the surrounding tests: ```diff - import pandas as pd - from pathlib import Path - csv_path = Path(__file__).resolve().parents[1] / "database" / "equities.csv" - df = pd.read_csv(csv_path, dtype=str) + df = equities.select() ``` ## 2. helpers.py / asset-class coverage gap (was 45% on helpers.py -> 86%) Added small targeted tests for the previously-uncovered branches found by `pytest --cov=financedatabase --cov-report=term-missing`. All pure asserts (no recorder), following the `test_sec_enrichment_controller.py` precedent. In `tests/test_equities.py`: | New test | Branch covered | |---|---| | `test_search_with_list_of_index` | `search(index=[…])` list-membership | | `test_search_case_sensitive` | `search(case_sensitive=True)` | | `test_search_case_sensitive_with_list` | list + case-sensitive combo (helpers.py:124-141) | | `test_search_invalid_column_is_ignored` | `key not in data_filter.columns` warning | | `test_to_toolkit_raises_without_financetoolkit` | `to_toolkit` ImportError path (mocked) | | `test_to_toolkit_success_path` | `to_toolkit` success path (mocked Toolkit, helpers.py:236-265) | | `test_init_raises_on_request_failure` | `__init__` `RequestException` path (mocked) | | `test_module_show_options_raises_on_invalid_selection` | module-level `show_options` `ValueError` paths | | `test_module_show_options_raises_on_request_failure` | module-level `show_options` network failure (mocked) | | `test_module_show_options_local_location` | module-level `show_options(use_local_location=True)` (helpers.py:320, 336-341) | In every asset-class `tests/test_.py` (cryptos, currencies, equities, etfs, funds, indices, moneymarkets): | New test | Branch covered | |---|---| | `test_select_with_invalid_value_raises` | `select(=...)` ValueError for unknown values — covers the validate-and-raise blocks unique to each asset class (e.g. `ETFs.py:83, 96, 109, 115-126`) | The asset-class test exercises every filter column declared on that class's `select()` signature, so adding a new filter automatically needs a value validation when the test is updated. ## 3. Behavioural smoke assert in every asset-class `test_select` Snapshot tests pass even when the library returns an empty DataFrame (the snapshot, regenerated under the same broken state, would just be empty too — and the test would pass silently). One cheap assertion per file catches that class of regression: ```python smoke = equities.select() assert not smoke.empty assert "country" in smoke.columns ``` Applied to all 7 asset-class test files. ## 4. Coverage reporting in CI `pytest-cov` was already in `[tool.poetry.group.dev.dependencies]` but wasn't invoked by the workflow. Wired it in: ```diff - poetry run pytest tests/ + poetry run pytest tests/ --cov=financedatabase --cov-report=term-missing ``` Coverage now appears in the CI log; gaps become visible without local runs. No Codecov upload — terminal report is enough for review-time signal; uploading to a third party would add a token + service dependency and can be a focused follow-up. ## Coverage breakdown (before -> after) | File | Before | After | |---|---:|---:| | `helpers.py` | 45% | **86%** | | `ETFs.py` | 73% | 90% | | `Equities.py` | 88% | 94% | | `Funds.py` | 83% | 92% | | `Indices.py` | 89% | 93% | | `Cryptos.py` / `Currencies.py` / `Moneymarkets.py` | 90% | 97% | | **Total** | **78%** | **92%** | ## What this PR deliberately does NOT do (still deferred) - `pytest-xdist` parallel execution. Tried it (`-n auto`); the conftest's compression-regen at import time runs once per worker, racing on the same files. For a ~25s suite the per-worker import overhead actually slows the wall time. The fix is non-trivial (coordinate regen across workers via a lock file) and not worth doing for a suite this small. - `@pytest.mark.parametrize` / class-based grouping — would change pytest node IDs and break the snapshot positional naming convention. ## Test plan - [ ] `pytest tests/` — 49 tests pass (was 32 before this PR) - [ ] CI coverage line appears in the run log - [ ] `black --check tests/` clean - [ ] No snapshot files modified — only test code, config, and the `.github/workflows/testing.yml` step --- .github/workflows/testing.yml | 2 +- tests/test_cryptos.py | 12 ++++ tests/test_currencies.py | 11 +++ tests/test_equities.py | 130 ++++++++++++++++++++++++++++++---- tests/test_etfs.py | 12 ++++ tests/test_funds.py | 12 ++++ tests/test_indices.py | 12 ++++ tests/test_moneymarkets.py | 12 ++++ 8 files changed, 189 insertions(+), 14 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 32b18d720..df6777cdf 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -26,4 +26,4 @@ jobs: run: poetry install - name: Run tests run: | - poetry run pytest tests/ + poetry run pytest tests/ --cov=financedatabase --cov-report=term-missing diff --git a/tests/test_cryptos.py b/tests/test_cryptos.py index b1fbc28f1..fe8896bb4 100644 --- a/tests/test_cryptos.py +++ b/tests/test_cryptos.py @@ -14,6 +14,9 @@ def test_select(recorder: Recorder) -> None: """Verify select() output for representative cryptocurrency filter combinations.""" + smoke = cryptos.select() + assert not smoke.empty + assert "currency" in smoke.columns recorder.capture(cryptos.select().iloc[:5]) recorder.capture(cryptos.select(currency="USD").iloc[:5]) recorder.capture(cryptos.select(cryptocurrency="ETC").iloc[:5]) @@ -39,3 +42,12 @@ def test_search(recorder: Recorder) -> None: recorder.capture(cryptos.search(index="ETC").iloc[:5]) recorder.capture(cryptos.search(cryptocurrency="AAVE").iloc[:5]) recorder.capture(cryptos.search(currency="USD").iloc[:5]) + + +def test_select_with_invalid_value_raises() -> None: + """`select(=...)` raises ValueError for values not in show_options().""" + import pytest + + for col in ["cryptocurrency", "currency"]: + with pytest.raises(ValueError, match="not available in the database"): + cryptos.select(**{col: "__definitely_not_a_real_value__"}) diff --git a/tests/test_currencies.py b/tests/test_currencies.py index 0730466a0..717f1f4c5 100644 --- a/tests/test_currencies.py +++ b/tests/test_currencies.py @@ -14,6 +14,8 @@ def test_select(recorder: Recorder) -> None: """Verify select() output for representative currency filter combinations.""" + smoke = currencies.select() + assert not smoke.empty recorder.capture(currencies.select().iloc[:5]) recorder.capture(currencies.select(base_currency="USD").iloc[:5]) recorder.capture(currencies.select(quote_currency="EUR").iloc[:5]) @@ -43,3 +45,12 @@ def test_search(recorder: Recorder) -> None: recorder.capture(currencies.search(index="USD").iloc[:5]) recorder.capture(currencies.search(base_currency="CAD").iloc[:5]) recorder.capture(currencies.search(quote_currency="EUR").iloc[:5]) + + +def test_select_with_invalid_value_raises() -> None: + """`select(=...)` raises ValueError for values not in show_options().""" + import pytest + + for col in ["base_currency", "quote_currency"]: + with pytest.raises(ValueError, match="not available in the database"): + currencies.select(**{col: "__definitely_not_a_real_value__"}) diff --git a/tests/test_equities.py b/tests/test_equities.py index 7a9da2fc4..47f1e9a55 100644 --- a/tests/test_equities.py +++ b/tests/test_equities.py @@ -14,6 +14,9 @@ def test_select(recorder: Recorder) -> None: """Verify select() output for representative equity filter combinations.""" + smoke = equities.select() + assert not smoke.empty + assert "country" in smoke.columns recorder.capture(equities.select().iloc[:5]) recorder.capture(equities.select(country="Canada").iloc[:5]) recorder.capture(equities.select(sector="Communication Services").iloc[:5]) @@ -95,29 +98,19 @@ def test_show_options(recorder: Recorder) -> None: ) -def test_exchange_market_one_to_one(): +def test_exchange_market_one_to_one() -> None: """Each `exchange` code must map to exactly one `market` label. Drift between these two columns is a recurring data-quality issue. This test fails fast if any exchange code becomes - ambiguous, preventing future PRs from re-introducing the - inconsistency. + ambiguous, preventing future PRs from re-introducing it. The reverse direction is not asserted: one `market` label may legitimately cover several exchange tiers (e.g. "OTC Bulletin Board" covers PNK / OQB / OID / OEM / OQX). - - Reads `database/equities.csv` directly so the assertion is against - the source of truth — the compressed library payload lags the CSV - until the maintainer regenerates it post-merge. """ - import pandas as pd - from pathlib import Path - - csv_path = Path(__file__).resolve().parents[1] / "database" / "equities.csv" - df = pd.read_csv(csv_path, dtype=str) + df = equities.select() pairs = df.dropna(subset=["exchange", "market"]) - by_exchange = pairs.groupby("exchange")["market"].nunique() ambiguous = by_exchange[by_exchange > 1] assert ( @@ -125,6 +118,108 @@ def test_exchange_market_one_to_one(): ), f"Exchange codes mapping to multiple market labels: {ambiguous.to_dict()}" +def test_search_with_list_of_index() -> None: + """`search(index=[...])` accepts a list of symbols and filters by membership.""" + result = equities.search(index=["AAPL", "MSFT", "GOOGL"]) + assert set(result.index) >= {"AAPL", "MSFT", "GOOGL"} + + +def test_search_case_sensitive() -> None: + """`search(case_sensitive=True)` distinguishes from case-insensitive by row count.""" + case_sensitive = equities.search(summary="Apple", case_sensitive=True) + case_insensitive = equities.search(summary="Apple") + assert len(case_sensitive) <= len(case_insensitive) + assert not case_sensitive.empty + + +def test_search_invalid_column_is_ignored(capsys) -> None: + """An unknown filter column logs a warning and is silently dropped.""" + result = equities.search(nonexistent_column="value") + captured = capsys.readouterr() + assert "nonexistent_column is not a valid column" in captured.out + assert len(result) == len(equities.select()) + + +def test_to_toolkit_raises_without_financetoolkit(monkeypatch) -> None: + """`FinanceFrame.to_toolkit()` raises ImportError if financetoolkit is absent.""" + import sys + import pytest + + monkeypatch.setitem(sys.modules, "financetoolkit", None) + with pytest.raises(ImportError, match="financetoolkit"): + equities.select().to_toolkit() + + +def test_init_raises_on_request_failure(monkeypatch) -> None: + """`FinanceDatabase.__init__` re-raises as ValueError on network failure.""" + import requests as _requests + import pytest + + def _fail(*a, **kw): + raise _requests.exceptions.ConnectionError("simulated") + + monkeypatch.setattr(_requests, "get", _fail) + with pytest.raises(ValueError, match="Failed to load data"): + fd.Equities() + + +def test_module_show_options_raises_on_invalid_selection() -> None: + """The module-level `show_options(selection=...)` rejects unknown asset classes.""" + import pytest + + with pytest.raises(ValueError, match="not valid"): + fd.show_options(selection="not_a_real_asset_class") + with pytest.raises(ValueError, match="not set"): + fd.show_options(selection=None) + + +def test_module_show_options_raises_on_request_failure(monkeypatch) -> None: + """The module-level `show_options` re-raises as ValueError on network failure.""" + import requests as _requests + import pytest + + def _fail(*a, **kw): + raise _requests.exceptions.ConnectionError("simulated") + + monkeypatch.setattr(_requests, "get", _fail) + with pytest.raises(ValueError, match="Failed to load data"): + fd.show_options(selection="equities") + + +def test_module_show_options_local_location() -> None: + """The module-level `show_options(use_local_location=True)` reads from local files.""" + options = fd.show_options(selection="equities", use_local_location=True) + assert isinstance(options, dict) + assert "country" in options + + +def test_search_case_sensitive_with_list() -> None: + """`search(case_sensitive=True, =[...])` exercises the list+case-sensitive branch.""" + result = equities.search(country=["Japan", "Canada"], case_sensitive=True) + assert set(result["country"].unique()) <= {"Japan", "Canada"} + + +def test_to_toolkit_success_path(monkeypatch) -> None: + """`FinanceFrame.to_toolkit()` constructs a Toolkit when the dep is available.""" + import sys + import types + + captured_kwargs: dict = {} + + class _FakeToolkit: + def __init__(self, **kwargs): + captured_kwargs.update(kwargs) + + fake_module = types.ModuleType("financetoolkit") + fake_module.Toolkit = _FakeToolkit # type: ignore[attr-defined] + monkeypatch.setitem(sys.modules, "financetoolkit", fake_module) + + result = equities.select(country="Japan").to_toolkit(api_key="x") + assert isinstance(result, _FakeToolkit) + assert captured_kwargs["api_key"] == "x" + assert captured_kwargs["tickers"] + + def test_search(recorder: Recorder) -> None: """Verify search() output for representative equity queries.""" recorder.capture(equities.search(summary="apple").iloc[:5]) @@ -147,3 +242,12 @@ def test_search(recorder: Recorder) -> None: country="United States", sector="Industrials", industry_group="Software" ).iloc[:5] ) + + +def test_select_with_invalid_value_raises() -> None: + """`select(=...)` raises ValueError for values not in show_options().""" + import pytest + + for col in ["country", "sector", "industry_group", "industry", "exchange"]: + with pytest.raises(ValueError, match="not available in the database"): + equities.select(**{col: "__definitely_not_a_real_value__"}) diff --git a/tests/test_etfs.py b/tests/test_etfs.py index db01787ff..612578a7a 100644 --- a/tests/test_etfs.py +++ b/tests/test_etfs.py @@ -14,6 +14,9 @@ def test_select(recorder: Recorder) -> None: """Verify select() output for representative ETF filter combinations.""" + smoke = etfs.select() + assert not smoke.empty + assert "currency" in smoke.columns recorder.capture(etfs.select().iloc[:5]) recorder.capture(etfs.select(category="Blend").iloc[:5]) recorder.capture(etfs.select(category_group="Materials").iloc[:5]) @@ -45,3 +48,12 @@ def test_search(recorder: Recorder) -> None: recorder.capture( etfs.search(summary="North America", category="Financials").iloc[:5] ) + + +def test_select_with_invalid_value_raises() -> None: + """`select(=...)` raises ValueError for values not in show_options().""" + import pytest + + for col in ["category_group", "category", "family", "currency", "exchange"]: + with pytest.raises(ValueError, match="not available in the database"): + etfs.select(**{col: "__definitely_not_a_real_value__"}) diff --git a/tests/test_funds.py b/tests/test_funds.py index fd558b746..ddf2545b2 100644 --- a/tests/test_funds.py +++ b/tests/test_funds.py @@ -14,6 +14,9 @@ def test_select(recorder: Recorder) -> None: """Verify select() output for representative fund filter combinations.""" + smoke = funds.select() + assert not smoke.empty + assert "currency" in smoke.columns recorder.capture(funds.select().iloc[:5]) recorder.capture(funds.select(currency="TWD").iloc[:5]) recorder.capture(funds.select(category="Energy").iloc[:5]) @@ -44,3 +47,12 @@ def test_search(recorder: Recorder) -> None: recorder.capture(funds.search(family="ivari").iloc[:5]) recorder.capture(funds.search(exchange="NZE").iloc[:5]) recorder.capture(funds.search(summary="Pension", category="Energy").iloc[:5]) + + +def test_select_with_invalid_value_raises() -> None: + """`select(=...)` raises ValueError for values not in show_options().""" + import pytest + + for col in ["category_group", "category", "family", "currency", "exchange"]: + with pytest.raises(ValueError, match="not available in the database"): + funds.select(**{col: "__definitely_not_a_real_value__"}) diff --git a/tests/test_indices.py b/tests/test_indices.py index 7177317d2..a08133bcd 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -14,6 +14,9 @@ def test_select(recorder: Recorder) -> None: """Verify select() output for representative index filter combinations.""" + smoke = indices.select() + assert not smoke.empty + assert "currency" in smoke.columns recorder.capture(indices.select().iloc[:5]) recorder.capture(indices.select(currency="NOK").iloc[:5]) recorder.capture(indices.select(category="Industrials").iloc[:5]) @@ -41,3 +44,12 @@ def test_search(recorder: Recorder) -> None: recorder.capture(indices.search(category_group="Energy").iloc[:5]) recorder.capture(indices.search(exchange="SHH").iloc[:5]) recorder.capture(indices.search(summary="S&P", category="Financials").iloc[:5]) + + +def test_select_with_invalid_value_raises() -> None: + """`select(=...)` raises ValueError for values not in show_options().""" + import pytest + + for col in ["currency", "exchange"]: + with pytest.raises(ValueError, match="not available in the database"): + indices.select(**{col: "__definitely_not_a_real_value__"}) diff --git a/tests/test_moneymarkets.py b/tests/test_moneymarkets.py index 339372df0..613c748f2 100644 --- a/tests/test_moneymarkets.py +++ b/tests/test_moneymarkets.py @@ -14,6 +14,9 @@ def test_select(recorder: Recorder) -> None: """Verify select() output for representative money market filter combinations.""" + smoke = moneymarkets.select() + assert not smoke.empty + assert "currency" in smoke.columns recorder.capture(moneymarkets.select().iloc[:5]) recorder.capture(moneymarkets.select(currency="USD").iloc[:5]) recorder.capture(moneymarkets.select(family="BlackRock Funds").iloc[:5]) @@ -33,3 +36,12 @@ def test_search(recorder: Recorder) -> None: recorder.capture(moneymarkets.search(index="RXX").iloc[:5]) recorder.capture(moneymarkets.search(currency="USD").iloc[:5]) recorder.capture(moneymarkets.search(family="BlackRock Funds").iloc[:5]) + + +def test_select_with_invalid_value_raises() -> None: + """`select(=...)` raises ValueError for values not in show_options().""" + import pytest + + for col in ["currency", "family"]: + with pytest.raises(ValueError, match="not available in the database"): + moneymarkets.select(**{col: "__definitely_not_a_real_value__"})