Skip to content

Commit b728e91

Browse files
thodson-usgsclaude
andcommitted
fix(waterdata): align cql-json filter tests with the unchunkable raise
ChunkPlan now raises RequestTooLarge for an over-budget request with no chunkable axis. That broke test_cql_json_filter_is_not_chunked, whose 300-clause (~30 KB) cql-json filter is no longer passed through. Use a within-budget filter so it still asserts cql-json ships un-chunked, and add test_cql_json_filter_over_budget_raises for the new behavior. Cleanups: reuse the existing _filter_chunking_clauses() helper instead of re-hand-rolling the clause/join, pin both raises with match=, drop a provably-dead transport-mock block (the raise fires during ChunkPlan construction, before any send), and trim a redundant comment. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent d0e23d1 commit b728e91

3 files changed

Lines changed: 34 additions & 11 deletions

File tree

dataretrieval/waterdata/chunking.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -872,13 +872,10 @@ def __init__(
872872

873873
axes = _extract_axes(args)
874874
if not axes:
875-
# No chunkable axis: nothing to split. If the single request fits,
876-
# run it verbatim (the common passthrough). If it's over budget we
877-
# can't chunk it (e.g. a single large CQL ``IN`` clause with no
878-
# top-level ``OR``, or one oversized value), so raise an actionable
879-
# error instead of shipping it for the server to reject with an
880-
# opaque HTTP 414. ``_safe_request_bytes`` treats an un-constructable
881-
# URL (httpx.InvalidURL, > 64 KB) as over budget.
875+
# No chunkable axis: if the single request fits, run it verbatim
876+
# (the common passthrough); otherwise it can't be split, so raise an
877+
# actionable error rather than ship a doomed request for the server
878+
# to reject with an opaque HTTP 414.
882879
if _safe_request_bytes(build_request, args, url_limit) <= url_limit:
883880
return
884881
raise RequestTooLarge(

tests/waterdata_chunking_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def test_chunk_plan_raises_when_unchunkable_request_exceeds_limit():
175175
RequestTooLarge instead of being shipped for the server to reject with an
176176
opaque HTTP 414."""
177177
args = {"monitoring_location_id": "scalar-only"}
178-
with pytest.raises(RequestTooLarge):
178+
with pytest.raises(RequestTooLarge, match="no chunkable"):
179179
ChunkPlan(args, _fake_build, url_limit=10)
180180

181181

tests/waterdata_filters_test.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import pytest
88

99
from dataretrieval.waterdata import get_continuous
10+
from dataretrieval.waterdata.chunking import RequestTooLarge
1011
from dataretrieval.waterdata.filters import (
1112
_check_numeric_filter_pitfall,
1213
_split_top_level_or,
@@ -270,9 +271,14 @@ async def fake_walk_pages(*_args, **_kwargs):
270271

271272

272273
def test_cql_json_filter_is_not_chunked():
273-
"""Chunking applies only to cql-text; cql-json is passed through unchanged."""
274-
clause = "(time >= '2023-01-01T00:00:00Z' AND time <= '2023-01-01T00:30:00Z')"
275-
expr = " OR ".join([clause] * 300)
274+
"""A within-budget cql-json filter is passed through unchanged.
275+
276+
Chunking only applies to cql-text (split on top-level ``OR``); cql-json is
277+
opaque to the chunker. The over-budget case is covered by
278+
``test_cql_json_filter_over_budget_raises``.
279+
"""
280+
# 50 clauses (~5 KB) stay under the 8 KB chunking budget → passthrough.
281+
expr = _filter_chunking_clauses(50)
276282
sent_filters = []
277283

278284
def fake_construct_api_requests(**kwargs):
@@ -304,6 +310,26 @@ def fake_construct_api_requests(**kwargs):
304310
assert sent_filters == [expr]
305311

306312

313+
def test_cql_json_filter_over_budget_raises():
314+
"""A cql-json filter the chunker can't split (no top-level ``OR`` axis is
315+
extracted) that still exceeds the URL byte budget raises ``RequestTooLarge``
316+
rather than shipping a doomed request for the server to reject with an
317+
opaque HTTP 414.
318+
319+
The raise happens during ``ChunkPlan`` construction, before any request is
320+
sent, so no transport mocking is needed.
321+
"""
322+
# 300 clauses (~30 KB) blow past the 8 KB chunking budget.
323+
expr = _filter_chunking_clauses(300)
324+
with pytest.raises(RequestTooLarge, match="no chunkable"):
325+
get_continuous(
326+
monitoring_location_id="USGS-07374525",
327+
parameter_code="72255",
328+
filter=expr,
329+
filter_lang="cql-json",
330+
)
331+
332+
307333
@pytest.mark.parametrize(
308334
"expr",
309335
[

0 commit comments

Comments
 (0)