Skip to content

Commit b0a56f9

Browse files
committed
Revert "fix(waterdata): Force float64 dtype on physical-measurement columns"
This reverts commit 0f3b6ec.
1 parent 0f3b6ec commit b0a56f9

2 files changed

Lines changed: 1 addition & 31 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -896,15 +896,7 @@ def _type_cols(df: pd.DataFrame) -> pd.DataFrame:
896896
df[col] = pd.to_datetime(df[col], errors="coerce")
897897

898898
for col in cols.intersection(numerical_cols):
899-
# ``pd.to_numeric`` infers ``int64`` when every value happens to
900-
# be integer-shaped (e.g. all-whole-CFS discharge values at a
901-
# small stream). These are physical measurements — discharge,
902-
# altitude, drainage area — and are continuous quantities by
903-
# nature, so force ``float64`` to avoid surprise arithmetic
904-
# surfaces (e.g. integer division on a chained subsequent
905-
# query) and to keep dtype stable across sites with mixed
906-
# whole vs fractional observations.
907-
df[col] = pd.to_numeric(df[col], errors="coerce").astype("float64")
899+
df[col] = pd.to_numeric(df[col], errors="coerce")
908900

909901
return df
910902

tests/waterdata_utils_test.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
_format_api_dates,
1313
_get_args,
1414
_handle_stats_nesting,
15-
_type_cols,
1615
_walk_pages,
1716
)
1817

@@ -293,27 +292,6 @@ def test_get_stats_data_warning_includes_next_token(caplog, monkeypatch):
293292
assert any("tok2" in m for m in warnings_), warnings_
294293

295294

296-
def test_type_cols_value_is_always_float_even_for_whole_numbers():
297-
"""Regression: ``pd.to_numeric`` infers ``int64`` when every value
298-
is integer-shaped. The USGS API returns whole-CFS discharge as
299-
string-typed integers at small streams (e.g. ``"12"`` for 12 cfs),
300-
so ``_type_cols`` would coerce ``value`` to ``int64`` and a
301-
downstream chained query mixing this site with another that has
302-
fractional readings would surface a confusing dtype upcast. Force
303-
``float64`` for every physical-measurement column."""
304-
df = pd.DataFrame(
305-
{
306-
"value": ["12", "15", "18", "14", "11"],
307-
"altitude": ["100", "200"] + [None] * 3,
308-
"drainage_area": ["5", "10", "15", "20", "25"],
309-
}
310-
)
311-
out = _type_cols(df)
312-
assert out["value"].dtype == "float64"
313-
assert out["altitude"].dtype == "float64"
314-
assert out["drainage_area"].dtype == "float64"
315-
316-
317295
def test_handle_stats_nesting_tolerates_missing_drop_columns():
318296
"""If the upstream stats response shape ever changes such that one of
319297
the columns we try to drop ("type", "properties.data") is absent, the

0 commit comments

Comments
 (0)