Skip to content

Commit add49eb

Browse files
committed
Merge remote-tracking branch 'origin/main' into fix/wqp-metadata-site-info
# Conflicts: # tests/wqp_test.py
2 parents 3f03d36 + 98b3057 commit add49eb

5 files changed

Lines changed: 71 additions & 24 deletions

File tree

dataretrieval/nldi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ def _validate_data_source(data_source: str):
486486
def _validate_navigation_mode(navigation_mode: str):
487487
navigation_mode = navigation_mode.upper()
488488
if navigation_mode not in ("UM", "DM", "UT", "DD"):
489-
raise TypeError(f"Invalid navigation mode '{navigation_mode}'")
489+
raise ValueError(f"Invalid navigation mode '{navigation_mode}'")
490490

491491

492492
def _validate_feature_source_comid(

dataretrieval/waterdata/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -892,11 +892,13 @@ def _handle_stats_nesting(
892892
# otherwise return a geodataframe
893893
if not geopd:
894894
df = pd.json_normalize(body["features"]).drop(
895-
columns=["type", "properties.data"]
895+
columns=["type", "properties.data"], errors="ignore"
896896
)
897897
df.columns = df.columns.str.split(".").str[-1]
898898
else:
899-
df = gpd.GeoDataFrame.from_features(body["features"]).drop(columns=["data"])
899+
df = gpd.GeoDataFrame.from_features(body["features"]).drop(
900+
columns=["data"], errors="ignore"
901+
)
900902

901903
# Unnest json features, properties, data, and values while retaining necessary
902904
# metadata to merge with main dataframe.

dataretrieval/wqp.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -127,31 +127,23 @@ def get_results(
127127
kwargs = _check_kwargs(kwargs)
128128

129129
if legacy is True:
130-
if (
131-
"dataProfile" in kwargs
132-
and kwargs["dataProfile"] not in result_profiles_legacy
133-
):
134-
raise TypeError(
135-
f"dataProfile {kwargs['dataProfile']} is not a legacy profile.",
136-
f"Valid options are {result_profiles_legacy}.",
137-
)
138-
130+
valid_profiles = result_profiles_legacy
131+
kind = "legacy"
139132
url = wqp_url("Result")
140-
141133
else:
142-
if (
143-
"dataProfile" in kwargs
144-
and kwargs["dataProfile"] not in result_profiles_wqx3
145-
):
146-
raise TypeError(
147-
f"dataProfile {kwargs['dataProfile']} is not a valid WQX3.0"
148-
f"profile. Valid options are {result_profiles_wqx3}.",
149-
)
150-
else:
151-
kwargs["dataProfile"] = "fullPhysChem"
152-
134+
valid_profiles = result_profiles_wqx3
135+
kind = "WQX3.0"
153136
url = wqx3_url("Result")
154137

138+
profile = kwargs.get("dataProfile")
139+
if profile is not None and profile not in valid_profiles:
140+
raise ValueError(
141+
f"dataProfile {profile!r} is not a valid {kind} profile. "
142+
f"Valid options are {valid_profiles}."
143+
)
144+
if legacy is not True and profile is None:
145+
kwargs["dataProfile"] = "fullPhysChem"
146+
155147
response = query(url, kwargs, delimiter=";", ssl_check=ssl_check)
156148

157149
df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)

tests/waterdata_utils_test.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from dataretrieval.waterdata.utils import (
66
_get_args,
7+
_handle_stats_nesting,
78
_walk_pages,
89
)
910

@@ -80,3 +81,33 @@ def test_walk_pages_multiple_mocked():
8081
assert mock_client.send.called
8182
assert mock_client.request.called
8283
assert mock_client.request.call_args[0][1] == "https://example.com/page2"
84+
85+
86+
def test_handle_stats_nesting_tolerates_missing_drop_columns():
87+
"""If the upstream stats response shape ever changes such that one of
88+
the columns we try to drop ("type", "properties.data") is absent, the
89+
function should still return a DataFrame instead of raising KeyError.
90+
"""
91+
body = {
92+
"next": None,
93+
"features": [
94+
{
95+
"properties": {
96+
"monitoring_location_id": "USGS-12345",
97+
"data": [
98+
{
99+
"parameter_code": "00060",
100+
"unit_of_measure": "ft^3/s",
101+
"parent_time_series_id": "ts-1",
102+
"values": [{"statistic_id": "mean", "value": 10.0}],
103+
}
104+
],
105+
},
106+
}
107+
],
108+
}
109+
110+
df = _handle_stats_nesting(body, geopd=False)
111+
112+
assert len(df) == 1
113+
assert df["monitoring_location_id"].iloc[0] == "USGS-12345"

tests/wqp_test.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,3 +279,25 @@ def test_wqp_metadata_site_info_uses_wqx3_when_originating_query_was_wqx3(
279279
_df, md = get_results(legacy=False, siteid="UTAHDWQ_WQX-4993795")
280280
_site_df, site_md = md.site_info
281281
assert site_md.url == sites_wqx3_url
282+
283+
284+
def test_get_results_wqx3_preserves_user_dataProfile(requests_mock):
285+
"""A valid user-supplied WQX3.0 profile must not be overwritten.
286+
287+
Regression: previously the `else` branch of the `dataProfile` validation
288+
triggered whenever the value was *not invalid*, including any valid
289+
user-supplied profile, silently overwriting it with 'fullPhysChem'.
290+
"""
291+
request_url = (
292+
"https://www.waterqualitydata.us/wqx3/Result/search?"
293+
"siteid=UTAHDWQ_WQX-4993795&mimeType=csv&dataProfile=narrow"
294+
)
295+
response_file_path = "tests/data/wqp3_results.txt"
296+
mock_request(requests_mock, request_url, response_file_path)
297+
298+
df, _md = get_results(
299+
legacy=False, siteid="UTAHDWQ_WQX-4993795", dataProfile="narrow"
300+
)
301+
assert isinstance(df, DataFrame)
302+
sent = requests_mock.request_history[-1]
303+
assert sent.qs.get("dataprofile") == ["narrow"]

0 commit comments

Comments
 (0)