Skip to content

Commit 3550374

Browse files
authored
fix: handle non-JSON error response bodies in _error_body (#242) (#274)
1 parent 0f90912 commit 3550374

2 files changed

Lines changed: 75 additions & 6 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ def _check_ogc_requests(endpoint: str = "daily", req_type: str = "queryables"):
335335

336336
def _error_body(resp: requests.Response):
337337
"""
338-
Provide more informative error messages based on the response status.
338+
Build an informative error message from an HTTP response.
339339
340340
Parameters
341341
----------
@@ -345,10 +345,19 @@ def _error_body(resp: requests.Response):
345345
Returns
346346
-------
347347
str
348-
The extracted error message. For status code 429, returns the 'message'
349-
field from the JSON error object. For status code 403, returns a
350-
predefined message indicating possible reasons for denial. For other
351-
status codes, returns the raw response text.
348+
An error message string assembled per status code:
349+
350+
* **429** — predefined message describing the rate-limit and pointing
351+
at the API-token path; the response body is not consulted.
352+
* **403** — predefined message describing the most common cause
353+
(query exceeding server limits); the response body is not
354+
consulted.
355+
* **other statuses** — attempts ``resp.json()`` and renders
356+
``"<status>: <code>. <description>."`` from the JSON error
357+
envelope. If the body is not JSON (e.g. an HTML 502 from a
358+
gateway), falls back to ``"<status>: <reason>. <snippet>"`` with
359+
the first 200 characters of ``resp.text``; an empty body
360+
degrades to ``"<status>: <reason>."``.
352361
"""
353362
status = resp.status_code
354363
if status == 429:
@@ -361,7 +370,14 @@ def _error_body(resp: requests.Response):
361370
"403: Query request denied. Possible reasons include "
362371
"query exceeding server limits."
363372
)
364-
j_txt = resp.json()
373+
try:
374+
j_txt = resp.json()
375+
except ValueError:
376+
snippet = (resp.text or "").strip()[:200]
377+
reason = resp.reason or "Error"
378+
if snippet:
379+
return f"{status}: {reason}. {snippet}"
380+
return f"{status}: {reason}."
365381
return (
366382
f"{status}: {j_txt.get('code', 'Unknown type')}. "
367383
f"{j_txt.get('description', 'No description provided')}."

tests/waterdata_utils_test.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from dataretrieval.waterdata.utils import (
77
_arrange_cols,
8+
_error_body,
89
_format_api_dates,
910
_get_args,
1011
_handle_stats_nesting,
@@ -221,3 +222,55 @@ def test_format_api_dates_open_ended_range_with_none():
221222
"""A None / NaN endpoint becomes '..' in the output range."""
222223
assert _format_api_dates(["2024-01-01", None], date=True) == "2024-01-01/.."
223224
assert _format_api_dates([None, "2024-01-01"], date=True) == "../2024-01-01"
225+
226+
227+
def _make_response(status, body, reason=None, content_type="text/html"):
228+
resp = requests.Response()
229+
resp.status_code = status
230+
resp.reason = reason
231+
resp._content = body.encode("utf-8")
232+
resp.headers["Content-Type"] = content_type
233+
return resp
234+
235+
236+
def test_error_body_handles_non_json_html_response():
237+
"""A non-JSON 502 HTML body must be summarized, not raise JSONDecodeError."""
238+
html = (
239+
"<html>\r\n<head><title>502 Bad Gateway</title></head>"
240+
"<body><center><h1>502 Bad Gateway</h1></center><hr>"
241+
"<center>openresty</center></body></html>"
242+
)
243+
resp = _make_response(502, html, reason="Bad Gateway")
244+
msg = _error_body(resp)
245+
assert "502" in msg
246+
assert "Bad Gateway" in msg
247+
248+
249+
def test_error_body_handles_empty_response_body():
250+
"""An empty error body returns a status/reason message without crashing."""
251+
resp = _make_response(500, "", reason="Internal Server Error")
252+
msg = _error_body(resp)
253+
assert msg == "500: Internal Server Error."
254+
255+
256+
def test_error_body_truncates_long_non_json_body():
257+
"""Non-JSON bodies are truncated to 200 chars to keep the message readable."""
258+
body = ("x" * 200) + "Y" + ("z" * 299)
259+
resp = _make_response(502, body, reason="Bad Gateway")
260+
msg = _error_body(resp)
261+
assert "x" * 200 in msg
262+
assert (("x" * 200) + "Y") not in msg
263+
264+
265+
def test_error_body_still_parses_well_formed_json():
266+
"""JSON error bodies continue to render code/description fields."""
267+
resp = _make_response(
268+
400,
269+
'{"code": "BadRequest", "description": "missing parameter"}',
270+
reason="Bad Request",
271+
content_type="application/json",
272+
)
273+
msg = _error_body(resp)
274+
assert "400" in msg
275+
assert "BadRequest" in msg
276+
assert "missing parameter" in msg

0 commit comments

Comments
 (0)