Skip to content

Commit c71b865

Browse files
thodson-usgsclaude
andcommitted
utils.query: surface unhandled 4xx/5xx and stop mutating the caller's payload
Two related correctness fixes to the shared HTTP wrapper used by every module in the package. * The function only formatted explicit messages for 400, 404, 414, and 500/502/503. Any other status (401, 403, 405, 408, 429, 501, 504, …) fell through and the response was returned as if it had succeeded — callers parsed an HTML error page as RDB or CSV. Add a ``raise_for_status()`` after the explicit branches so every non-success surfaces, while keeping the friendlier messages for the codes we already format. * The body did ``for key, value in payload.items(): payload[key] = to_str(...)``, mutating the caller's dict. List values came back replaced with their stringified joins, breaking any caller that re-used the dict. Build a fresh ``params`` dict in a comprehension and leave the input alone. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 51ac674 commit c71b865

2 files changed

Lines changed: 38 additions & 9 deletions

File tree

dataretrieval/utils.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def query(url, payload, delimiter=",", ssl_check=True):
163163
url: string
164164
URL to query
165165
payload: dict
166-
query parameters passed to ``requests.get``
166+
query parameters passed to ``requests.get``. Not mutated.
167167
delimiter: string
168168
delimiter to use with lists
169169
ssl_check: bool
@@ -175,17 +175,11 @@ def query(url, payload, delimiter=",", ssl_check=True):
175175
string: query response
176176
The response from the API query ``requests.get`` function call.
177177
"""
178+
params = {key: to_str(value, delimiter) for key, value in payload.items()}
178179

179-
for key, value in payload.items():
180-
payload[key] = to_str(value, delimiter)
181-
# for index in range(len(payload)):
182-
# key, value = payload[index]
183-
# payload[index] = (key, to_str(value))
184-
185-
# define the user agent for the query
186180
user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"}
187181

188-
response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check)
182+
response = requests.get(url, params=params, headers=user_agent, verify=ssl_check)
189183

190184
if response.status_code == 400:
191185
raise ValueError(
@@ -218,6 +212,10 @@ def query(url, payload, delimiter=",", ssl_check=True):
218212
+ f"The service at {response.url} may be down or experiencing issues."
219213
)
220214

215+
# Catch-all for any other 4xx/5xx (401, 403, 405, 408, 429, 501, 504, ...)
216+
# so callers don't silently receive an HTML error page as if it were data.
217+
response.raise_for_status()
218+
221219
if response.text.startswith("No sites/data"):
222220
raise NoSitesError(response.url)
223221

tests/utils_test.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,37 @@ def test_header(self):
4141
assert response.status_code == 200 # GET was successful
4242
assert "user-agent" in response.request.headers
4343

44+
def test_does_not_mutate_caller_payload(self, requests_mock):
45+
"""`query` must not mutate the caller's payload dict.
46+
47+
Regression: previously the function did
48+
``payload[key] = to_str(value, delimiter)`` in place, so list
49+
values were overwritten with their stringified joins after the
50+
call returned.
51+
"""
52+
url = "https://example.com/svc"
53+
requests_mock.get(url, text="ok")
54+
payload = {"sites": ["A", "B"], "stateCd": "MD"}
55+
original = {k: v for k, v in payload.items()}
56+
57+
utils.query(url, payload)
58+
59+
assert payload == original
60+
assert payload["sites"] is original["sites"]
61+
62+
@pytest.mark.parametrize("status", [401, 403, 405, 408, 429, 501, 504])
63+
def test_unhandled_4xx_5xx_raises(self, requests_mock, status):
64+
"""`query` must surface every 4xx/5xx, not just the few it formats.
65+
66+
Regression: codes outside {400, 404, 414, 500, 502, 503} used to
67+
pass through silently — callers received the error body as if
68+
it were data.
69+
"""
70+
url = "https://example.com/svc"
71+
requests_mock.get(url, status_code=status, text="<html>denied</html>")
72+
with pytest.raises(Exception): # noqa: B017 -- HTTPError or ValueError
73+
utils.query(url, {"k": "v"})
74+
4475

4576
class Test_BaseMetadata:
4677
"""Tests of BaseMetadata"""

0 commit comments

Comments
 (0)