Skip to content

Commit 11dd5a2

Browse files
thodson-usgsclaude
andauthored
Fix nldi: validate data source on every call, not just cache-miss (#246)
The validation check `data_source not in _AVAILABLE_DATA_SOURCES` was nested inside `if _AVAILABLE_DATA_SOURCES is None:`, so once any caller warmed the cache, subsequent invalid `data_source`/`feature_source` values were silently accepted. Move the check outside the cache-load branch so all calls are validated. Also guard the `_validate_data_source(feature_source)` call in `get_features` with `if feature_source:` — the previous code unconditionally validated `None` when the user provided only `comid`. The cache bug masked this; with the check now active it would otherwise raise "Invalid data source 'None'". Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 01c7fc7 commit 11dd5a2

2 files changed

Lines changed: 30 additions & 8 deletions

File tree

dataretrieval/nldi.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,9 @@ def get_features(
251251
"navigation_mode is required if comid or data_source is provided"
252252
)
253253
_validate_feature_source_comid(feature_source, feature_id, comid)
254-
if data_source:
254+
if data_source is not None:
255255
_validate_data_source(data_source)
256-
if feature_source:
256+
if feature_source is not None:
257257
_validate_data_source(feature_source)
258258
if navigation_mode:
259259
navigation_mode = _validate_navigation_mode(navigation_mode)
@@ -454,12 +454,13 @@ def _validate_data_source(data_source: str):
454454
url, {}, "Error getting available data sources"
455455
)
456456
_AVAILABLE_DATA_SOURCES = [ds["source"] for ds in available_data_sources]
457-
if data_source not in _AVAILABLE_DATA_SOURCES:
458-
err_msg = (
459-
f"Invalid data source '{data_source}'."
460-
f" Available data sources are: {_AVAILABLE_DATA_SOURCES}"
461-
)
462-
raise ValueError(err_msg)
457+
458+
if data_source not in _AVAILABLE_DATA_SOURCES:
459+
err_msg = (
460+
f"Invalid data source '{data_source}'."
461+
f" Available data sources are: {_AVAILABLE_DATA_SOURCES}"
462+
)
463+
raise ValueError(err_msg)
463464

464465

465466
def _features_err_msg(feature_source, feature_id, comid, data_source) -> str:

tests/nldi_test.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pytest
22
from geopandas import GeoDataFrame
33

4+
import dataretrieval.nldi as nldi
45
from dataretrieval.nldi import (
56
NLDI_API_BASE_URL,
67
_validate_navigation_mode,
@@ -11,6 +12,12 @@
1112
)
1213

1314

15+
@pytest.fixture(autouse=True)
16+
def _reset_data_source_cache(monkeypatch):
17+
"""Reset the module-level cache between tests."""
18+
monkeypatch.setattr(nldi, "_AVAILABLE_DATA_SOURCES", None)
19+
20+
1421
def mock_request_data_sources(requests_mock):
1522
request_url = f"{NLDI_API_BASE_URL}/"
1623
available_data_sources = [
@@ -284,6 +291,20 @@ def test_search_for_features_by_lat_long(requests_mock):
284291
assert len(search_results["features"][0]["geometry"]["coordinates"]) == 27
285292

286293

294+
def test_validate_data_source_rejects_invalid_after_cache_populated(requests_mock):
295+
"""Once the cache is warm, invalid data sources must still raise ValueError.
296+
297+
Regression: previously the validation check was nested inside the
298+
cache-population branch, so all calls after the first silently passed.
299+
"""
300+
mock_request_data_sources(requests_mock)
301+
302+
nldi._validate_data_source("WQP")
303+
304+
with pytest.raises(ValueError, match="Invalid data source 'not_a_real_source'"):
305+
nldi._validate_data_source("not_a_real_source")
306+
307+
287308
# --- regression tests for nldi cleanup batch ---
288309

289310

0 commit comments

Comments
 (0)