Skip to content

Commit d183bb5

Browse files
thodson-usgsclaude
andcommitted
Fix _validate_data_source: validate on every call, not just cache miss
The validation check `data_source not in _AVAILABLE_DATA_SOURCES` was nested inside `if _AVAILABLE_DATA_SOURCES is None:`, so once any caller warmed the cache, subsequent invalid `data_source`/`feature_source` values were silently accepted. Move the check outside the cache-load branch so all calls are validated. Also guard the `_validate_data_source(feature_source)` call in `get_features` with `if feature_source:` — the previous code unconditionally validated `None` when the user provided only `comid`. The cache bug masked this; with the check now active it would otherwise raise "Invalid data source 'None'". Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 51ac674 commit d183bb5

2 files changed

Lines changed: 42 additions & 7 deletions

File tree

dataretrieval/nldi.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,8 @@ def get_features(
259259
if data_source:
260260
_validate_data_source(data_source)
261261
# validate feature source
262-
_validate_data_source(feature_source)
262+
if feature_source:
263+
_validate_data_source(feature_source)
263264
# validate the navigation mode
264265
if navigation_mode:
265266
_validate_navigation_mode(navigation_mode)
@@ -475,12 +476,13 @@ def _validate_data_source(data_source: str):
475476
url, {}, "Error getting available data sources"
476477
)
477478
_AVAILABLE_DATA_SOURCES = [ds["source"] for ds in available_data_sources]
478-
if data_source not in _AVAILABLE_DATA_SOURCES:
479-
err_msg = (
480-
f"Invalid data source '{data_source}'."
481-
f" Available data sources are: {_AVAILABLE_DATA_SOURCES}"
482-
)
483-
raise ValueError(err_msg)
479+
480+
if data_source not in _AVAILABLE_DATA_SOURCES:
481+
err_msg = (
482+
f"Invalid data source '{data_source}'."
483+
f" Available data sources are: {_AVAILABLE_DATA_SOURCES}"
484+
)
485+
raise ValueError(err_msg)
484486

485487

486488
def _validate_navigation_mode(navigation_mode: str):

tests/nldi_test.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import pytest
12
from geopandas import GeoDataFrame
23

4+
import dataretrieval.nldi as nldi
35
from dataretrieval.nldi import (
46
NLDI_API_BASE_URL,
57
get_basin,
@@ -9,6 +11,14 @@
911
)
1012

1113

14+
@pytest.fixture(autouse=True)
15+
def _reset_data_source_cache():
16+
"""Reset the module-level cache between tests."""
17+
nldi._AVAILABLE_DATA_SOURCES = None
18+
yield
19+
nldi._AVAILABLE_DATA_SOURCES = None
20+
21+
1222
def mock_request_data_sources(requests_mock):
1323
request_url = f"{NLDI_API_BASE_URL}/"
1424
available_data_sources = [
@@ -280,3 +290,26 @@ def test_search_for_features_by_lat_long(requests_mock):
280290
assert search_results["features"][0]["type"] == "Feature"
281291
assert search_results["features"][0]["geometry"]["type"] == "LineString"
282292
assert len(search_results["features"][0]["geometry"]["coordinates"]) == 27
293+
294+
295+
def test_validate_data_source_rejects_invalid_after_cache_populated(requests_mock):
296+
"""Once the cache is warm, invalid data sources must still raise ValueError.
297+
298+
Regression: previously the validation check was nested inside the
299+
cache-population branch, so all calls after the first silently passed.
300+
"""
301+
mock_request_data_sources(requests_mock)
302+
303+
# First call: populates the cache with a valid source.
304+
nldi._validate_data_source("WQP")
305+
306+
# Second call with an invalid source must raise.
307+
with pytest.raises(ValueError, match="Invalid data source 'not_a_real_source'"):
308+
nldi._validate_data_source("not_a_real_source")
309+
310+
311+
def test_validate_data_source_rejects_invalid_on_first_call(requests_mock):
312+
"""Cold-cache invalid sources must also raise."""
313+
mock_request_data_sources(requests_mock)
314+
with pytest.raises(ValueError, match="Invalid data source"):
315+
nldi._validate_data_source("not_a_real_source")

0 commit comments

Comments
 (0)