Skip to content

Commit 6e88cd6

Browse files
thodson-usgsclaude
andcommitted
refactor(waterdata.xarray): move CF vocabulary maps to types
The CF lookup tables (USGS units -> UDUNITS, statistic_id -> cell_methods operator, parameter_code -> standard_name) are plain reference data, so move them out of the converter module into types.py as public, extensible constants (CF_UNIT_MAP, CF_CELL_METHODS, CF_STANDARD_NAMES) alongside the existing PROFILE_LOOKUP. They carry no xarray dependency, so types.py stays import-light and the tables can be extended without importing the xarray-optional converter. xarray.py imports them at the top; behavior is unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 35f6e02 commit 6e88cd6

2 files changed

Lines changed: 53 additions & 47 deletions

File tree

dataretrieval/waterdata/types.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,47 @@
7474
"count",
7575
],
7676
}
77+
78+
79+
# --- CF / xarray vocabulary mappings ---------------------------------------
80+
# Lookup tables used by :mod:`dataretrieval.waterdata.xarray` to translate
81+
# USGS terms into CF-conventions metadata. Each is intentionally partial:
82+
# anything not listed falls back to a sensible default (raw unit string kept
83+
# verbatim; no standard_name emitted) rather than guessing a wrong CF term.
84+
# They are plain data, so they live here rather than in the (xarray-optional)
85+
# converter module and can be extended without importing xarray.
86+
87+
# USGS unit strings -> UDUNITS / CF-canonical form.
88+
CF_UNIT_MAP = {
89+
"ft^3/s": "ft3 s-1",
90+
"ft3/s": "ft3 s-1",
91+
"ft": "ft",
92+
"in": "in",
93+
"degC": "degC",
94+
"deg C": "degC",
95+
"uS/cm": "uS/cm",
96+
"mg/l": "mg L-1",
97+
"mg/L": "mg L-1",
98+
"tons/day": "short_ton day-1",
99+
"%": "percent",
100+
}
101+
102+
# USGS statistic_id -> the operator in a CF ``cell_methods`` string.
103+
CF_CELL_METHODS = {
104+
"00001": "maximum",
105+
"00002": "minimum",
106+
"00003": "mean",
107+
"00006": "sum",
108+
"00008": "median",
109+
"00011": "point", # instantaneous
110+
}
111+
112+
# USGS 5-digit parameter code -> CF standard_name. Deliberately conservative;
113+
# codes without a confident match are left without a standard_name.
114+
CF_STANDARD_NAMES = {
115+
"00060": "water_volume_transport_in_river_channel",
116+
"00010": "water_temperature",
117+
"00065": "water_surface_height_above_reference_datum",
118+
"63160": "water_surface_height_above_reference_datum",
119+
"00045": "lwe_thickness_of_precipitation_amount",
120+
}

dataretrieval/waterdata/xarray.py

Lines changed: 9 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848

4949
from . import api as _api
5050
from .nearest import get_nearest_continuous as _get_nearest_continuous
51+
from .types import CF_CELL_METHODS, CF_STANDARD_NAMES, CF_UNIT_MAP
5152

5253
__all__ = [
5354
"get_continuous",
@@ -62,47 +63,10 @@
6263
]
6364

6465

65-
# --- CF mapping tables -----------------------------------------------------
66-
# Each is intentionally partial: anything not listed falls back to a sensible
67-
# default (raw unit string kept verbatim; no standard_name emitted) rather
68-
# than guessing and emitting a wrong CF term.
69-
70-
# USGS unit strings -> UDUNITS / CF-canonical form.
71-
_UDUNITS = {
72-
"ft^3/s": "ft3 s-1",
73-
"ft3/s": "ft3 s-1",
74-
"ft": "ft",
75-
"in": "in",
76-
"degC": "degC",
77-
"deg C": "degC",
78-
"uS/cm": "uS/cm",
79-
"mg/l": "mg L-1",
80-
"mg/L": "mg L-1",
81-
"tons/day": "short_ton day-1",
82-
"%": "percent",
83-
}
84-
85-
# USGS statistic_id -> the operator in a CF ``cell_methods`` string. Read
86-
# straight from the values frame, so no metadata round-trip is needed to
87-
# classify the aggregation.
88-
_STATISTIC_CELL_METHOD = {
89-
"00001": "maximum",
90-
"00002": "minimum",
91-
"00003": "mean",
92-
"00006": "sum",
93-
"00008": "median",
94-
"00011": "point", # instantaneous
95-
}
96-
97-
# USGS 5-digit parameter code -> CF standard_name. Deliberately conservative;
98-
# codes without a confident match are left without a standard_name.
99-
_STANDARD_NAME = {
100-
"00060": "water_volume_transport_in_river_channel",
101-
"00010": "water_temperature",
102-
"00065": "water_surface_height_above_reference_datum",
103-
"63160": "water_surface_height_above_reference_datum",
104-
"00045": "lwe_thickness_of_precipitation_amount",
105-
}
66+
# The CF vocabulary lookups (USGS units -> UDUNITS, statistic_id ->
67+
# cell_methods operator, parameter_code -> standard_name) are plain data and
68+
# live in ``types`` -- imported as CF_UNIT_MAP / CF_CELL_METHODS /
69+
# CF_STANDARD_NAMES at the top of this module.
10670

10771
# Columns kept off the value pivot but surfaced as ancillary (flag) variables.
10872
_ANCILLARY = ("qualifier", "approval_status")
@@ -179,19 +143,17 @@ def _var_attrs(desc, *, unit, pcode, stat, default_cell_method, ancillary, name)
179143
attrs["long_name"] = str(long_name)
180144

181145
if unit is not None and _pd.notna(unit):
182-
attrs["units"] = _UDUNITS.get(str(unit), str(unit))
146+
attrs["units"] = CF_UNIT_MAP.get(str(unit), str(unit))
183147

184148
op = (
185-
_STATISTIC_CELL_METHOD.get(str(stat))
186-
if stat is not None and _pd.notna(stat)
187-
else None
149+
CF_CELL_METHODS.get(str(stat)) if stat is not None and _pd.notna(stat) else None
188150
)
189151
op = op or default_cell_method
190152
if op:
191153
attrs["cell_methods"] = f"time: {op}"
192154

193155
if pcode is not None and _pd.notna(pcode):
194-
sn = _STANDARD_NAME.get(str(pcode))
156+
sn = CF_STANDARD_NAMES.get(str(pcode))
195157
if sn:
196158
attrs["standard_name"] = sn
197159
attrs["usgs_parameter_code"] = str(pcode)
@@ -289,7 +251,7 @@ def _build_timeseries(
289251

290252
name = _slug(desc.get("parameter_name") or pcode)
291253
if name in used: # same parameter, different statistic -> distinct var
292-
op = _STATISTIC_CELL_METHOD.get(str(stat)) or (str(stat) if stat else None)
254+
op = CF_CELL_METHODS.get(str(stat)) or (str(stat) if stat else None)
293255
name = f"{name}_{_slug(op)}" if op else name
294256
while name in used:
295257
name += "_x"

0 commit comments

Comments
 (0)