Skip to content

Commit c1e937a

Browse files
thodson-usgsclaude
andcommitted
Mark attach_datetime_columns as private and add type hints
The helper is purely an internal post-processing step inside get_samples / get_results — users have no reason to call it directly, and dataretrieval/__init__.py's `from dataretrieval.utils import *` was leaking it into the public API surface as `dataretrieval.attach_datetime_columns`. Underscore-prefix it and update the two call sites plus the unit tests. Also annotate _attach_datetime_columns and _build_utc_datetime with pd.DataFrame / pd.Series / pd.Series → pd.Series signatures, matching the typing style already used in dataretrieval/waterdata/utils.py. Addresses self-review of PR #272. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent f198449 commit c1e937a

4 files changed

Lines changed: 16 additions & 14 deletions

File tree

dataretrieval/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ def format_datetime(df, date_field, time_field, tz_field):
9999
_TIME_TZ_SUFFIXES = (("Time", "TimeZone"), ("Time/Time", "Time/TimeZoneCode"))
100100

101101

102-
def _build_utc_datetime(date_series, time_series, tz_series):
102+
def _build_utc_datetime(
103+
date_series: pd.Series, time_series: pd.Series, tz_series: pd.Series
104+
) -> pd.Series:
103105
"""Combine date + time + tz-abbreviation columns into a UTC pandas Series.
104106
105107
Unknown timezone codes (and rows missing any of the three values) yield
@@ -118,7 +120,7 @@ def _build_utc_datetime(date_series, time_series, tz_series):
118120
)
119121

120122

121-
def attach_datetime_columns(df):
123+
def _attach_datetime_columns(df: pd.DataFrame) -> pd.DataFrame:
122124
"""Add ``<prefix>DateTime`` UTC columns for any Date/Time/TimeZone triplets.
123125
124126
Detects two naming patterns that appear in USGS Samples and Water Quality

dataretrieval/waterdata/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import requests
1717
from requests.models import PreparedRequest
1818

19-
from dataretrieval.utils import BaseMetadata, attach_datetime_columns, to_str
19+
from dataretrieval.utils import BaseMetadata, _attach_datetime_columns, to_str
2020
from dataretrieval.waterdata.filters import FILTER_LANG
2121
from dataretrieval.waterdata.types import (
2222
CODE_SERVICES,
@@ -2329,7 +2329,7 @@ def get_samples(
23292329
response.raise_for_status()
23302330

23312331
df = pd.read_csv(StringIO(response.text), delimiter=",")
2332-
df = attach_datetime_columns(df)
2332+
df = _attach_datetime_columns(df)
23332333

23342334
return df, BaseMetadata(response)
23352335

dataretrieval/wqp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import pandas as pd
1919

20-
from .utils import BaseMetadata, attach_datetime_columns, query
20+
from .utils import BaseMetadata, _attach_datetime_columns, query
2121

2222
if TYPE_CHECKING:
2323
from pandas import DataFrame
@@ -152,7 +152,7 @@ def get_results(
152152
response = query(url, kwargs, delimiter=";", ssl_check=ssl_check)
153153

154154
df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
155-
df = attach_datetime_columns(df)
155+
df = _attach_datetime_columns(df)
156156
return df, WQP_Metadata(response)
157157

158158

tests/utils_test.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def test_to_str_non_iterable(self):
100100

101101

102102
class Test_attach_datetime_columns:
103-
"""Tests of attach_datetime_columns, which derives <prefix>DateTime UTC
103+
"""Tests of _attach_datetime_columns, which derives <prefix>DateTime UTC
104104
columns from Date/Time/TimeZone triplets in Samples and WQP CSVs."""
105105

106106
def test_wqx3_triplet_resolves_to_utc(self):
@@ -111,7 +111,7 @@ def test_wqx3_triplet_resolves_to_utc(self):
111111
"Activity_StartTimeZone": ["PST", "EST"],
112112
}
113113
)
114-
df = utils.attach_datetime_columns(df)
114+
df = utils._attach_datetime_columns(df)
115115
assert df["Activity_StartDateTime"][0] == pd.Timestamp(
116116
"2024-01-09 18:00:00", tz="UTC"
117117
)
@@ -128,7 +128,7 @@ def test_legacy_wqp_triplet_resolves_to_utc(self):
128128
"ActivityStartTime/TimeZoneCode": ["PST"],
129129
}
130130
)
131-
df = utils.attach_datetime_columns(df)
131+
df = utils._attach_datetime_columns(df)
132132
assert df["ActivityStartDateTime"][0] == pd.Timestamp(
133133
"2024-01-09 18:00:00", tz="UTC"
134134
)
@@ -141,7 +141,7 @@ def test_unknown_timezone_is_NaT(self):
141141
"Activity_StartTimeZone": ["BOGUS"],
142142
}
143143
)
144-
df = utils.attach_datetime_columns(df)
144+
df = utils._attach_datetime_columns(df)
145145
assert df["Activity_StartDateTime"].isna().all()
146146

147147
def test_missing_time_or_tz_is_NaT(self):
@@ -152,7 +152,7 @@ def test_missing_time_or_tz_is_NaT(self):
152152
"Activity_StartTimeZone": ["PST", "EST"],
153153
}
154154
)
155-
df = utils.attach_datetime_columns(df)
155+
df = utils._attach_datetime_columns(df)
156156
assert df["Activity_StartDateTime"][0] == pd.Timestamp(
157157
"2024-01-09 18:00:00", tz="UTC"
158158
)
@@ -167,7 +167,7 @@ def test_existing_datetime_column_not_overwritten(self):
167167
"Activity_StartDateTime": ["preexisting"],
168168
}
169169
)
170-
df = utils.attach_datetime_columns(df)
170+
df = utils._attach_datetime_columns(df)
171171
assert df["Activity_StartDateTime"].tolist() == ["preexisting"]
172172

173173
def test_multiple_triplets_handled(self):
@@ -181,11 +181,11 @@ def test_multiple_triplets_handled(self):
181181
"LabInfo_AnalysisStartTimeZone": ["EST"],
182182
}
183183
)
184-
df = utils.attach_datetime_columns(df)
184+
df = utils._attach_datetime_columns(df)
185185
assert "Activity_StartDateTime" in df.columns
186186
assert "LabInfo_AnalysisStartDateTime" in df.columns
187187

188188
def test_lone_date_column_left_alone(self):
189189
df = pd.DataFrame({"LastChangeDate": ["2024-01-09"]})
190-
df = utils.attach_datetime_columns(df)
190+
df = utils._attach_datetime_columns(df)
191191
assert list(df.columns) == ["LastChangeDate"]

0 commit comments

Comments
 (0)