Skip to content

Commit 910522f

Browse files
thodson-usgsclaude
andcommitted
fix(waterdata): small coherence cleanups (annotations, column order, defensiveness)
Five small, low-risk fixes surfaced by the package review: 1. get_latest_continuous / get_latest_daily: `value` was annotated `int`, but every other getter (and the docstrings) use `str | Iterable[str]`; the `int` hint also rejected the multi-value list filtering the others advertise. 2. get_time_series_metadata: `thresholds` was annotated `int`, vs `float | list[float]` on get_combined_metadata for the same queryable. 3. _arrange_cols: the "move the synthetic per-record id column to the end" set was a hand-maintained literal that omitted peak_id, channel_measurements_id, combined_meta_id, and field_series_id, so those four getters left their id at the front instead of the end like daily_id. Derive the set from _OUTPUT_ID_BY_SERVICE (every output id except the user-facing monitoring_location_id and time_series_id) so it stays in sync and can't drift again when a service is added. 4. _next_req_url: returned a falsy `href` ("") instead of None, contradicting its Optional[str] contract. Return None. 5. _get_resp_data (geopandas branch): mirror the non-geopandas branch's `f.get("id")` so a feature missing a top-level id yields None rather than a KeyError. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent ee653e5 commit 910522f

2 files changed

Lines changed: 18 additions & 14 deletions

File tree

dataretrieval/waterdata/api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ def get_time_series_metadata(
768768
unit_of_measure: str | Iterable[str] | None = None,
769769
computation_period_identifier: str | Iterable[str] | None = None,
770770
computation_identifier: str | Iterable[str] | None = None,
771-
thresholds: int | None = None,
771+
thresholds: float | list[float] | None = None,
772772
sublocation_identifier: str | Iterable[str] | None = None,
773773
primary: str | Iterable[str] | None = None,
774774
parent_time_series_id: str | Iterable[str] | None = None,
@@ -1213,7 +1213,7 @@ def get_latest_continuous(
12131213
approval_status: str | Iterable[str] | None = None,
12141214
unit_of_measure: str | Iterable[str] | None = None,
12151215
qualifier: str | Iterable[str] | None = None,
1216-
value: int | None = None,
1216+
value: str | Iterable[str] | None = None,
12171217
last_modified: str | Iterable[str] | None = None,
12181218
skip_geometry: bool | None = None,
12191219
time: str | Iterable[str] | None = None,
@@ -1407,7 +1407,7 @@ def get_latest_daily(
14071407
approval_status: str | Iterable[str] | None = None,
14081408
unit_of_measure: str | Iterable[str] | None = None,
14091409
qualifier: str | Iterable[str] | None = None,
1410-
value: int | None = None,
1410+
value: str | Iterable[str] | None = None,
14111411
last_modified: str | Iterable[str] | None = None,
14121412
skip_geometry: bool | None = None,
14131413
time: str | Iterable[str] | None = None,

dataretrieval/waterdata/utils.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@
8686
"time-series-metadata": "time_series_id",
8787
}
8888

89+
# Every service's output id EXCEPT the two that are genuinely user-facing
90+
# (``monitoring_location_id`` and ``time_series_id``). The rest are synthetic
91+
# per-record ids that ``_arrange_cols`` moves to the end of a result frame.
92+
# Derived from ``_OUTPUT_ID_BY_SERVICE`` so adding a service can't silently
93+
# leave a stray id column at the front again.
94+
_EXTRA_ID_COLS = set(_OUTPUT_ID_BY_SERVICE.values()) - {
95+
"monitoring_location_id",
96+
"time_series_id",
97+
}
98+
8999

90100
def _switch_arg_id(ls: dict[str, Any], id_name: str, service: str):
91101
"""
@@ -803,7 +813,7 @@ def _next_req_url(
803813
continue
804814
href = link.get("href")
805815
if not href:
806-
return href
816+
return None
807817
# Refuse to follow a next-page link to a different host —
808818
# the request's headers/auth were minted for the original
809819
# host and shouldn't leak to whatever a poisoned response
@@ -905,7 +915,9 @@ def _get_resp_data(
905915

906916
# Organize json into geodataframe and make sure id column comes along.
907917
df = gpd.GeoDataFrame.from_features(features)
908-
df["id"] = pd.json_normalize(features)["id"].values
918+
# Mirror the non-geopandas branch's defensive ``f.get("id")`` so a feature
919+
# missing a top-level ``id`` yields None rather than a KeyError.
920+
df["id"] = [f.get("id") for f in features]
909921
df = df[["id"] + [col for col in df.columns if col != "id"]]
910922

911923
# If no geometry present, then return pandas dataframe. A geodataframe
@@ -1292,15 +1304,7 @@ def _arrange_cols(
12921304

12931305
# Move meaningless-to-user, extra id columns to the end
12941306
# of the dataframe, if they exist
1295-
extra_id_col = set(df.columns).intersection(
1296-
{
1297-
"latest_continuous_id",
1298-
"latest_daily_id",
1299-
"daily_id",
1300-
"continuous_id",
1301-
"field_measurement_id",
1302-
}
1303-
)
1307+
extra_id_col = set(df.columns).intersection(_EXTRA_ID_COLS)
13041308

13051309
# If the arbitrary id column is returned (either due to properties
13061310
# being none or NaN), then move it to the end of the dataframe, but

0 commit comments

Comments
 (0)