Skip to content

Commit da1d0ac

Browse files
thodson-usgsclaude
andcommitted
fix(waterdata): raise RequestTooLarge for an unchunkable over-budget request
ChunkPlan's "no chunkable axes" branch returned immediately without sizing the request, deliberately leaving an over-budget URL for the server to reject. So a single large CQL-text `filter` with one big `IN (...)` clause (no top-level `OR`, hence no chunk axis) was shipped verbatim and failed with an opaque HTTP 414 — and not even RequestTooLarge. (The equivalent monitoring_location_id=[...] chunks fine.) Size-check the no-axes path: if the single request fits, pass through as before; if it's over budget there's nothing to split, so raise RequestTooLarge with actionable guidance instead of shipping it. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent ee653e5 commit da1d0ac

2 files changed

Lines changed: 28 additions & 10 deletions

File tree

dataretrieval/waterdata/chunking.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -871,13 +871,22 @@ def __init__(
871871
self.canonical_url: str | None = None
872872

873873
axes = _extract_axes(args)
874-
# No chunkable axes → skip ``build_request`` entirely; the
875-
# common Water Data call shape shouldn't pay for an unused
876-
# request prep on the passthrough hot path. The fetcher
877-
# will run with the user's args verbatim; if that produces
878-
# an over-budget URL, the server (or httpx itself) rejects.
879874
if not axes:
880-
return
875+
# No chunkable axis: nothing to split. If the single request fits,
876+
# run it verbatim (the common passthrough). If it's over budget we
877+
# can't chunk it (e.g. a single large CQL ``IN`` clause with no
878+
# top-level ``OR``, or one oversized value), so raise an actionable
879+
# error instead of shipping it for the server to reject with an
880+
# opaque HTTP 414. ``_safe_request_bytes`` treats an un-constructable
881+
# URL (httpx.InvalidURL, > 64 KB) as over budget.
882+
if _safe_request_bytes(build_request, args, url_limit) <= url_limit:
883+
return
884+
raise RequestTooLarge(
885+
f"Request exceeds {url_limit} bytes (URL + body) and has no "
886+
f"chunkable multi-value argument to split (e.g. a single large "
887+
f"CQL `IN` clause, or one oversized value). Narrow the query, "
888+
f"simplify the filter, or split the call manually."
889+
)
881890

882891
# Constructing the initial request can itself trip
883892
# ``httpx.InvalidURL`` (URL > 64 KB) — that's the canonical

tests/waterdata_chunking_test.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,15 +161,24 @@ def test_extract_axes_skips_singletons_and_never_chunk_params():
161161

162162

163163
def test_chunk_plan_returns_passthrough_when_no_chunkable_axes():
164-
"""Scalar args with nothing to chunk → passthrough, even at a
165-
URL limit the request technically exceeds (the server may 414,
166-
but ``ChunkPlan`` has nothing to split)."""
164+
"""Scalar args with nothing to chunk and a request within the limit →
165+
passthrough (no axes)."""
167166
args = {"monitoring_location_id": "scalar-only"}
168-
plan = ChunkPlan(args, _fake_build, url_limit=10)
167+
plan = ChunkPlan(args, _fake_build, url_limit=1000)
169168
assert plan.axes == []
170169
assert plan.total == 1
171170

172171

172+
def test_chunk_plan_raises_when_unchunkable_request_exceeds_limit():
173+
"""A request with nothing to chunk that still exceeds the byte limit (e.g.
174+
a single large CQL ``IN`` clause with no top-level ``OR``) raises
175+
RequestTooLarge instead of being shipped for the server to reject with an
176+
opaque HTTP 414."""
177+
args = {"monitoring_location_id": "scalar-only"}
178+
with pytest.raises(RequestTooLarge):
179+
ChunkPlan(args, _fake_build, url_limit=10)
180+
181+
173182
def test_chunk_plan_greedy_halving_targets_largest_axis_chunk():
174183
"""The biggest chunk across all axes halves first — when one list
175184
axis dominates URL bytes, only it gets split until it stops being

0 commit comments

Comments
 (0)