|
57 | 57 | import re as _re |
58 | 58 | import threading as _threading |
59 | 59 | import warnings as _warnings |
| 60 | +from collections import Counter as _Counter |
60 | 61 | from collections.abc import Callable |
61 | 62 | from dataclasses import dataclass, field, replace |
62 | 63 | from functools import wraps as _wraps |
@@ -239,34 +240,31 @@ def _point_coords(df, site): |
239 | 240 | explicit ``longitude`` / ``latitude`` columns (the Samples profile, mapped via |
240 | 241 | :data:`_SAMPLES_RENAME`) -- so every service surfaces station coordinates. |
241 | 242 | """ |
| 243 | + # Both sources reduce to a per-row "lonlat-able" value that _lonlat decodes |
| 244 | + # (a (lon, lat) tuple for the explicit columns, the geometry object |
| 245 | + # otherwise), so the dedup/loop/coercion scaffolding is shared. |
242 | 246 | if {"longitude", "latitude"}.issubset(df.columns): |
243 | | - geo = df.dropna(subset=["longitude", "latitude"]).drop_duplicates(site) |
244 | | - if geo.empty: |
245 | | - return None |
246 | | - lon, lat = {}, {} |
247 | | - for site_id, x, y in zip( |
248 | | - geo[site].to_numpy(), |
249 | | - geo["longitude"].to_numpy(), |
250 | | - geo["latitude"].to_numpy(), |
251 | | - ): |
252 | | - try: |
253 | | - lon[site_id], lat[site_id] = float(x), float(y) |
254 | | - except (TypeError, ValueError): |
255 | | - continue |
256 | | - return (lon, lat) if lon else None |
257 | | - if "geometry" not in df.columns: |
| 247 | + subset = ["longitude", "latitude"] |
| 248 | + |
| 249 | + def _geoms(g): |
| 250 | + return list(zip(g["longitude"].to_numpy(), g["latitude"].to_numpy())) |
| 251 | + elif "geometry" in df.columns: |
| 252 | + subset = ["geometry"] |
| 253 | + |
| 254 | + def _geoms(g): |
| 255 | + return g["geometry"].to_numpy() |
| 256 | + else: |
258 | 257 | return None |
259 | | - geo = df.dropna(subset=["geometry"]).drop_duplicates(site) |
| 258 | + |
| 259 | + geo = df.dropna(subset=subset).drop_duplicates(site) |
260 | 260 | if geo.empty: |
261 | 261 | return None |
262 | 262 | lon, lat = {}, {} |
263 | | - for site_id, geom in zip(geo[site].to_numpy(), geo["geometry"].to_numpy()): |
264 | | - xy = _lonlat(geom) |
| 263 | + for site_id, geom in zip(geo[site].to_numpy(), _geoms(geo)): |
| 264 | + xy = _lonlat(geom) # skips non-point / unparseable rather than guessing |
265 | 265 | if xy is not None: |
266 | 266 | lon[site_id], lat[site_id] = xy |
267 | | - if not lon: |
268 | | - return None # no point geometry; skip rather than guess |
269 | | - return lon, lat |
| 267 | + return (lon, lat) if lon else None |
270 | 268 |
|
271 | 269 |
|
272 | 270 | def _prepare_values(df, group_cols, ancillary_cols): |
@@ -1087,20 +1085,20 @@ def _disambiguate(bases, keys): |
1087 | 1085 | back to the statistic id then the parameter code -- so a bare name never |
1088 | 1086 | silently refers to an arbitrary one of several same-named series. |
1089 | 1087 | """ |
1090 | | - counts: dict[str, int] = {} |
1091 | | - for b in bases: |
1092 | | - counts[b] = counts.get(b, 0) + 1 |
| 1088 | + counts = _Counter(bases) |
1093 | 1089 | names, used = [], set() |
1094 | 1090 | for base, (pcode, stat) in zip(bases, keys): |
1095 | 1091 | if counts[base] == 1: |
1096 | 1092 | name = base |
1097 | 1093 | else: |
| 1094 | + # statistic cell-method (or raw id); if that doesn't yield a |
| 1095 | + # fresh name, fall back to the parameter code. |
1098 | 1096 | op = CF_CELL_METHODS.get(str(stat)) if stat is not None else None |
1099 | 1097 | suffix = op or (str(stat) if stat is not None else None) |
1100 | 1098 | name = f"{base}_{_slug(suffix)}" if suffix else base |
1101 | | - if name == base or name in used: # statistic didn't separate them |
| 1099 | + if name in used or suffix is None: |
1102 | 1100 | name = f"{base}_{_slug(pcode)}" if pcode is not None else base |
1103 | | - while name in used: |
| 1101 | + while name in used: # final guard: append until unique |
1104 | 1102 | name += "_x" |
1105 | 1103 | used.add(name) |
1106 | 1104 | names.append(name) |
|
0 commit comments