Skip to content

Commit 7caeffe

Browse files
thodson-usgsclaude
andcommitted
refactor(waterdata.xarray): simplify point-coords and naming dedup (/simplify)
- _point_coords: merge the explicit-longitude/latitude path and the geometry path into one dedup/loop/return scaffold with a swappable per-row extractor, and route the lon/lat float coercion through _lonlat instead of a second open-coded try/except (kills the duplicated branch). - _DenseBuilder._disambiguate: replace the `name == base` proxy (a confusing stand-in for "no suffix") with an explicit "suffix didn't separate them" condition, and use collections.Counter for the base-name counts. No behavior change; 67 tests pass, both spatial-coordinate paths verified live. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 0e26f61 commit 7caeffe

1 file changed

Lines changed: 24 additions & 26 deletions

File tree

dataretrieval/waterdata/xarray.py

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import re as _re
5858
import threading as _threading
5959
import warnings as _warnings
60+
from collections import Counter as _Counter
6061
from collections.abc import Callable
6162
from dataclasses import dataclass, field, replace
6263
from functools import wraps as _wraps
@@ -239,34 +240,31 @@ def _point_coords(df, site):
239240
explicit ``longitude`` / ``latitude`` columns (the Samples profile, mapped via
240241
:data:`_SAMPLES_RENAME`) -- so every service surfaces station coordinates.
241242
"""
243+
# Both sources reduce to a per-row "lonlat-able" value that _lonlat decodes
244+
# (a (lon, lat) tuple for the explicit columns, the geometry object
245+
# otherwise), so the dedup/loop/coercion scaffolding is shared.
242246
if {"longitude", "latitude"}.issubset(df.columns):
243-
geo = df.dropna(subset=["longitude", "latitude"]).drop_duplicates(site)
244-
if geo.empty:
245-
return None
246-
lon, lat = {}, {}
247-
for site_id, x, y in zip(
248-
geo[site].to_numpy(),
249-
geo["longitude"].to_numpy(),
250-
geo["latitude"].to_numpy(),
251-
):
252-
try:
253-
lon[site_id], lat[site_id] = float(x), float(y)
254-
except (TypeError, ValueError):
255-
continue
256-
return (lon, lat) if lon else None
257-
if "geometry" not in df.columns:
247+
subset = ["longitude", "latitude"]
248+
249+
def _geoms(g):
250+
return list(zip(g["longitude"].to_numpy(), g["latitude"].to_numpy()))
251+
elif "geometry" in df.columns:
252+
subset = ["geometry"]
253+
254+
def _geoms(g):
255+
return g["geometry"].to_numpy()
256+
else:
258257
return None
259-
geo = df.dropna(subset=["geometry"]).drop_duplicates(site)
258+
259+
geo = df.dropna(subset=subset).drop_duplicates(site)
260260
if geo.empty:
261261
return None
262262
lon, lat = {}, {}
263-
for site_id, geom in zip(geo[site].to_numpy(), geo["geometry"].to_numpy()):
264-
xy = _lonlat(geom)
263+
for site_id, geom in zip(geo[site].to_numpy(), _geoms(geo)):
264+
xy = _lonlat(geom) # skips non-point / unparseable rather than guessing
265265
if xy is not None:
266266
lon[site_id], lat[site_id] = xy
267-
if not lon:
268-
return None # no point geometry; skip rather than guess
269-
return lon, lat
267+
return (lon, lat) if lon else None
270268

271269

272270
def _prepare_values(df, group_cols, ancillary_cols):
@@ -1087,20 +1085,20 @@ def _disambiguate(bases, keys):
10871085
back to the statistic id then the parameter code -- so a bare name never
10881086
silently refers to an arbitrary one of several same-named series.
10891087
"""
1090-
counts: dict[str, int] = {}
1091-
for b in bases:
1092-
counts[b] = counts.get(b, 0) + 1
1088+
counts = _Counter(bases)
10931089
names, used = [], set()
10941090
for base, (pcode, stat) in zip(bases, keys):
10951091
if counts[base] == 1:
10961092
name = base
10971093
else:
1094+
# statistic cell-method (or raw id); if that doesn't yield a
1095+
# fresh name, fall back to the parameter code.
10981096
op = CF_CELL_METHODS.get(str(stat)) if stat is not None else None
10991097
suffix = op or (str(stat) if stat is not None else None)
11001098
name = f"{base}_{_slug(suffix)}" if suffix else base
1101-
if name == base or name in used: # statistic didn't separate them
1099+
if name in used or suffix is None:
11021100
name = f"{base}_{_slug(pcode)}" if pcode is not None else base
1103-
while name in used:
1101+
while name in used: # final guard: append until unique
11041102
name += "_x"
11051103
used.add(name)
11061104
names.append(name)

0 commit comments

Comments
 (0)