Skip to content

Commit 1b534c9

Browse files
committed
Revert "fix(waterdata): Force float64 dtype on physical-measurement columns"
This reverts commit 5b6747f.
1 parent 5b6747f commit 1b534c9

2 files changed

Lines changed: 1 addition & 31 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,15 +1169,7 @@ def _type_cols(df: pd.DataFrame) -> pd.DataFrame:
11691169
df[col] = pd.to_datetime(df[col], errors="coerce")
11701170

11711171
for col in cols.intersection(numerical_cols):
1172-
# ``pd.to_numeric`` infers ``int64`` when every value happens to
1173-
# be integer-shaped (e.g. all-whole-CFS discharge values at a
1174-
# small stream). These are physical measurements — discharge,
1175-
# altitude, drainage area — and are continuous quantities by
1176-
# nature, so force ``float64`` to avoid surprise arithmetic
1177-
# surfaces (e.g. integer division on a chained subsequent
1178-
# query) and to keep dtype stable across sites with mixed
1179-
# whole vs fractional observations.
1180-
df[col] = pd.to_numeric(df[col], errors="coerce").astype("float64")
1172+
df[col] = pd.to_numeric(df[col], errors="coerce")
11811173

11821174
return df
11831175

tests/waterdata_utils_test.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
_handle_stats_nesting,
1717
_parse_retry_after,
1818
_raise_for_non_200,
19-
_type_cols,
2019
_walk_pages,
2120
)
2221

@@ -491,27 +490,6 @@ def test_get_resp_data_always_materializes_id_column():
491490
# --- _arrange_cols ----------------------------------------------------------
492491

493492

494-
def test_type_cols_value_is_always_float_even_for_whole_numbers():
495-
"""Regression: ``pd.to_numeric`` infers ``int64`` when every value
496-
is integer-shaped. The USGS API returns whole-CFS discharge as
497-
string-typed integers at small streams (e.g. ``"12"`` for 12 cfs),
498-
so ``_type_cols`` would coerce ``value`` to ``int64`` and a
499-
downstream chained query mixing this site with another that has
500-
fractional readings would surface a confusing dtype upcast. Force
501-
``float64`` for every physical-measurement column."""
502-
df = pd.DataFrame(
503-
{
504-
"value": ["12", "15", "18", "14", "11"],
505-
"altitude": ["100", "200"] + [None] * 3,
506-
"drainage_area": ["5", "10", "15", "20", "25"],
507-
}
508-
)
509-
out = _type_cols(df)
510-
assert out["value"].dtype == "float64"
511-
assert out["altitude"].dtype == "float64"
512-
assert out["drainage_area"].dtype == "float64"
513-
514-
515493
def test_arrange_cols_does_not_mutate_caller_properties():
516494
"""`_arrange_cols` must not mutate the caller's `properties` list.
517495

0 commit comments

Comments
 (0)