Skip to content

Commit e7da0bb

Browse files
thodson-usgsclaude
andcommitted
test(waterdata): add stats hash-drop unit tests
Two mocked-response tests for ``get_stats_data``: - ``test_get_stats_data_drops_hash_ids_by_default`` asserts ``computation_id`` and ``parent_time_series_id`` are removed when ``include_hash_ids=False`` (the new default). - ``test_get_stats_data_keeps_hash_ids_when_opted_in`` asserts the opt-in path preserves them, matching the legacy behavior. Both use ``monkeypatch`` to stub ``_handle_stats_nesting`` so the fake response only needs to carry the column shape, not the full nested-percentile body. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 5f95a1d commit e7da0bb

1 file changed

Lines changed: 82 additions & 0 deletions

File tree

tests/waterdata_utils_test.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,88 @@ def test_get_stats_data_warning_includes_next_token(caplog, monkeypatch):
256256
assert any("tok2" in m for m in warnings_), warnings_
257257

258258

259+
def test_get_stats_data_drops_hash_ids_by_default(monkeypatch):
260+
"""``get_stats_data`` drops ``computation_id`` and
261+
``parent_time_series_id`` from the result by default — the
262+
``include_hash_ids=False`` counterpart for the stats path."""
263+
from dataretrieval.waterdata.utils import get_stats_data
264+
265+
monkeypatch.setattr(
266+
_utils_module,
267+
"_handle_stats_nesting",
268+
mock.MagicMock(
269+
return_value=pd.DataFrame(
270+
{
271+
"monitoring_location_id": ["USGS-1"],
272+
"parameter_code": ["00060"],
273+
"computation_id": ["7d70379f-8452-44cd-b026-24dfa11f8503"],
274+
"parent_time_series_id": ["9cca880dec4846ec8cbdd05f3e22603e"],
275+
"value": [1.0],
276+
}
277+
)
278+
),
279+
)
280+
281+
page1 = mock.MagicMock()
282+
page1.status_code = 200
283+
page1.json.return_value = {"next": None, "features": []}
284+
page1.elapsed = __import__("datetime").timedelta(milliseconds=1)
285+
page1.headers = {}
286+
page1.url = "https://example/stats"
287+
client = mock.MagicMock(spec=requests.Session)
288+
client.send.return_value = page1
289+
290+
df, _ = get_stats_data(
291+
args={"monitoring_location_id": "USGS-1"},
292+
service="observationNormals",
293+
expand_percentiles=False,
294+
client=client,
295+
)
296+
assert "computation_id" not in df.columns
297+
assert "parent_time_series_id" not in df.columns
298+
assert "monitoring_location_id" in df.columns
299+
assert "parameter_code" in df.columns
300+
assert "value" in df.columns
301+
302+
303+
def test_get_stats_data_keeps_hash_ids_when_opted_in(monkeypatch):
304+
"""``include_hash_ids=True`` preserves the legacy stats columns."""
305+
from dataretrieval.waterdata.utils import get_stats_data
306+
307+
monkeypatch.setattr(
308+
_utils_module,
309+
"_handle_stats_nesting",
310+
mock.MagicMock(
311+
return_value=pd.DataFrame(
312+
{
313+
"monitoring_location_id": ["USGS-1"],
314+
"computation_id": ["7d70379f-8452-44cd-b026-24dfa11f8503"],
315+
"parent_time_series_id": ["9cca880dec4846ec8cbdd05f3e22603e"],
316+
}
317+
)
318+
),
319+
)
320+
321+
page1 = mock.MagicMock()
322+
page1.status_code = 200
323+
page1.json.return_value = {"next": None, "features": []}
324+
page1.elapsed = __import__("datetime").timedelta(milliseconds=1)
325+
page1.headers = {}
326+
page1.url = "https://example/stats"
327+
client = mock.MagicMock(spec=requests.Session)
328+
client.send.return_value = page1
329+
330+
df, _ = get_stats_data(
331+
args={"monitoring_location_id": "USGS-1"},
332+
service="observationNormals",
333+
expand_percentiles=False,
334+
client=client,
335+
include_hash_ids=True,
336+
)
337+
assert "computation_id" in df.columns
338+
assert "parent_time_series_id" in df.columns
339+
340+
259341
def test_handle_stats_nesting_tolerates_missing_drop_columns():
260342
"""If the upstream stats response shape ever changes such that one of
261343
the columns we try to drop ("type", "properties.data") is absent, the

0 commit comments

Comments
 (0)