Skip to content

Commit 3274d91

Browse files
thodson-usgsclaude
andcommitted
fix(waterdata): repair queryables fallback, scalar properties, sediment unit
Review fixes for the hash-drop path: * get_ogc_data's queryables-fallback except caught requests.HTTPError / requests.RequestException, but the module imports only httpx (no requests import), so any queryables failure raised NameError instead of falling back. Catch (httpx.HTTPError, RuntimeError, ValueError) -- the types the path actually raises (RateLimited/ServiceUnavailable subclass RuntimeError). * Guard _service_queryables against a non-dict 200 body so a malformed response falls back (caught ValueError) rather than escaping as AttributeError. * Normalize a scalar-string `properties` to a list at the get_ogc_data boundary, so get_reference_table's raw query dict can't trip `all(pd.isna("..."))` (TypeError) or a per-character set in _arrange_cols. * CF_UNIT_MAP: "tons/day" -> "ton day-1" ("short_ton" is not a valid UDUNITS name; UDUNITS "ton" is the US short ton). Add regression tests for the queryables fallback and scalar-properties paths. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c58e96e commit 3274d91

3 files changed

Lines changed: 64 additions & 7 deletions

File tree

dataretrieval/waterdata/types.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@
9595
"uS/cm": "uS/cm",
9696
"mg/l": "mg L-1",
9797
"mg/L": "mg L-1",
98-
"tons/day": "short_ton day-1",
98+
# UDUNITS 'ton' is the US short ton; 'short_ton' is not a valid UDUNITS name.
99+
"tons/day": "ton day-1",
99100
"%": "percent",
100101
}
101102

dataretrieval/waterdata/utils.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -245,15 +245,19 @@ def _service_queryables(service: str) -> list[str]:
245245
"""Return the cached queryables property list for ``service``.
246246
247247
One HTTP GET per service per process; the list is reused for every
248-
subsequent call. Raises ``requests.HTTPError`` on a non-200 — the
249-
caller's ``include_hash=False`` request can't be satisfied
250-
without it, so failing loudly is preferable to silently dropping
251-
the server-side trim.
248+
subsequent call. Raises on a non-200 (a ``RuntimeError`` subclass via
249+
``_raise_for_non_200``, or an ``httpx.HTTPError`` on a transport failure);
250+
``get_ogc_data`` catches those and falls back to the client-side drop.
252251
"""
253252
cached = _queryables_cache.get(service)
254253
if cached is not None:
255254
return cached
256255
body = _check_ogc_requests(endpoint=service, req_type="queryables")
256+
if not isinstance(body, dict):
257+
# A 200 with a non-object body would make ``body.get`` raise
258+
# AttributeError, which get_ogc_data's fallback doesn't catch; raise a
259+
# caught type so a malformed response falls back to the client-side drop.
260+
raise ValueError(f"queryables response for {service!r} was not a JSON object")
257261
props = list(body.get("properties", {}).keys())
258262
_queryables_cache[service] = props
259263
return props
@@ -1427,8 +1431,14 @@ def get_ogc_data(
14271431
args["service"] = service
14281432
args = _switch_arg_id(args, id_name=output_id, service=service)
14291433
# Capture `properties` before the id-switch so post-processing sees
1430-
# the user-facing names, not the wire-format ones.
1434+
# the user-facing names, not the wire-format ones. A scalar string
1435+
# (e.g. from ``get_reference_table``'s raw ``query`` dict, which skips
1436+
# the list-normalization the typed getters do) is wrapped to a list so
1437+
# the downstream predicates / set-builds treat it as one column rather
1438+
# than per-character.
14311439
properties = args.get("properties")
1440+
if isinstance(properties, str):
1441+
properties = [properties]
14321442
convert_type = args.pop("convert_type", False)
14331443
include_hash = args.pop("include_hash", False)
14341444

@@ -1441,7 +1451,7 @@ def get_ogc_data(
14411451
if use_server_trim:
14421452
try:
14431453
args["properties"] = _default_non_hash_properties(service, output_id)
1444-
except (requests.HTTPError, requests.RequestException, ValueError) as exc:
1454+
except (httpx.HTTPError, RuntimeError, ValueError) as exc:
14451455
logger.warning(
14461456
"Could not fetch queryables for %s (%s); "
14471457
"falling back to client-side hash-ID drop.",

tests/waterdata_utils_test.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,3 +927,49 @@ def test_check_ogc_requests_raises_typed_on_5xx(httpx_mock):
927927
)
928928
with pytest.raises(ServiceUnavailable):
929929
_check_ogc_requests(endpoint="daily", req_type="schema")
930+
931+
932+
def _reach_fetch_sentinel(monkeypatch):
933+
"""Stub ``_fetch_once`` to raise a recognizable sentinel, so a test can
934+
prove ``get_ogc_data`` got past the properties/queryables handling (rather
935+
than crashing earlier) without doing any network I/O."""
936+
937+
def _boom(_args):
938+
raise RuntimeError("reached _fetch_once")
939+
940+
monkeypatch.setattr(_utils_module, "_fetch_once", _boom)
941+
942+
943+
def test_get_ogc_data_falls_back_when_queryables_unavailable(monkeypatch):
944+
# A queryables fetch failure (ServiceUnavailable / httpx error) must be
945+
# caught and fall through to the client-side drop -- not crash. Regression:
946+
# the except clause referenced an unimported ``requests``, so any failure
947+
# raised NameError instead of falling back.
948+
def _boom_queryables(*args, **kwargs):
949+
raise ServiceUnavailable("503")
950+
951+
monkeypatch.setattr(_utils_module, "_default_non_hash_properties", _boom_queryables)
952+
_reach_fetch_sentinel(monkeypatch)
953+
# Reaching the sentinel proves the queryables error was caught and the call
954+
# continued; a NameError or the bare ServiceUnavailable("503") would not
955+
# match and would fail the test.
956+
with pytest.raises(RuntimeError, match="reached _fetch_once"):
957+
_utils_module.get_ogc_data(
958+
args={"monitoring_location_id": "USGS-1"},
959+
output_id="daily_id",
960+
service="daily",
961+
)
962+
963+
964+
def test_get_ogc_data_accepts_scalar_properties(monkeypatch):
965+
# A scalar (non-list) ``properties`` -- reachable via get_reference_table's
966+
# raw query dict -- must be treated as a single column, not crash
967+
# ``_properties_unspecified`` (all(pd.isna("time")) -> TypeError) or become
968+
# a per-character set in ``_arrange_cols``.
969+
_reach_fetch_sentinel(monkeypatch)
970+
with pytest.raises(RuntimeError, match="reached _fetch_once"):
971+
_utils_module.get_ogc_data(
972+
args={"properties": "time", "monitoring_location_id": "USGS-1"},
973+
output_id="daily_id",
974+
service="daily",
975+
)

0 commit comments

Comments
 (0)