Skip to content

Commit 15d3789

Browse files
thodson-usgsclaude
andcommitted
fix(waterdata.xarray): drop time_series_id from the flat stats dataset
The stats flat conversion keeps every column, and _handle_stats_nesting surfaces all outer feature-property keys, so a time_series_id could leak into the CF dataset as an opaque-UUID variable. Add it to _build_stats's drop set (alongside computation_id / parent_time_series_id), matching the coverage of the removed pandas-path hash drop. Test asserts all three are dropped. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent f1755e7 commit 15d3789

2 files changed

Lines changed: 4 additions & 3 deletions

File tree

dataretrieval/waterdata/xarray.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ def _build_stats(df, base_meta, service):
711711
# opaque hash IDs never reach those datasets. This flat path keeps every
712712
# column, so drop the stats service's hash-valued IDs (and geometry) here to
713713
# keep the CF dataset free of per-record UUID coordinates.
714-
drop = ("geometry", "computation_id", "parent_time_series_id")
714+
drop = ("geometry", "computation_id", "parent_time_series_id", "time_series_id")
715715
flat = df.drop(columns=[c for c in drop if c in df.columns])
716716
ds = _xr.Dataset.from_dataframe(flat.reset_index(drop=True))
717717
ds.attrs = _dataset_attrs(service, base_meta)

tests/waterdata_xarray_test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,12 +181,13 @@ def test_build_stats_drops_hash_columns():
181181
"parameter_code": ["00060"],
182182
"computation_id": ["7d70379f-8452-44cd-b026-24dfa11f8503"],
183183
"parent_time_series_id": ["9cca880dec4846ec8cbdd05f3e22603e"],
184+
"time_series_id": ["b026-24dfa11f8503"],
184185
"p50_va": [120.0],
185186
}
186187
)
187188
ds = wdx._build_stats(df, _meta(), "statistics")
188-
assert "computation_id" not in ds.variables
189-
assert "parent_time_series_id" not in ds.variables
189+
for hash_col in ("computation_id", "parent_time_series_id", "time_series_id"):
190+
assert hash_col not in ds.variables
190191
assert "p50_va" in ds.data_vars
191192

192193

0 commit comments

Comments
 (0)