forked from DOI-USGS/dataretrieval-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwaterdata_utils_test.py
More file actions
159 lines (129 loc) · 4.85 KB
/
Copy pathwaterdata_utils_test.py
File metadata and controls
159 lines (129 loc) · 4.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from unittest import mock
import pandas as pd
import requests
from dataretrieval.waterdata.utils import (
_arrange_cols,
_get_args,
_handle_stats_nesting,
_walk_pages,
)
def test_get_args_basic():
local_vars = {
"monitoring_location_id": "123",
"service": "daily",
"output_id": "daily_id",
"none_val": None,
"other": "val",
}
result = _get_args(local_vars)
assert result == {"monitoring_location_id": "123", "other": "val"}
def test_get_args_with_exclude():
local_vars = {
"monitoring_location_id": "123",
"service": "daily",
"output_id": "daily_id",
"to_exclude": "secret",
"other": "val",
}
result = _get_args(local_vars, exclude={"to_exclude"})
assert result == {"monitoring_location_id": "123", "other": "val"}
def test_get_args_empty():
assert _get_args({}) == {}
def test_walk_pages_multiple_mocked():
# Setup mock responses
resp1 = mock.MagicMock()
resp1.json.return_value = {
"numberReturned": 1,
"features": [{"id": "1", "properties": {"val": "a"}}],
"links": [{"rel": "next", "href": "https://example.com/page2"}],
}
# Mock headers and links
resp1.headers = {}
resp1.links = {"next": {"url": "https://example.com/page2"}}
resp1.status_code = 200
resp2 = mock.MagicMock()
resp2.json.return_value = {
"numberReturned": 1,
"features": [{"id": "2", "properties": {"val": "b"}}],
"links": [],
}
resp2.headers = {}
resp2.links = {}
resp2.status_code = 200
# Mock client (Session)
mock_client = mock.MagicMock(spec=requests.Session)
# First call to send() returns resp1, then call to request() in loop returns resp2
mock_client.send.return_value = resp1
mock_client.request.return_value = resp2
# Mock request (PreparedRequest)
mock_req = mock.MagicMock(spec=requests.PreparedRequest)
mock_req.method = "GET"
mock_req.headers = {}
mock_req.url = "https://example.com/page1"
# Call _walk_pages
df, final_resp = _walk_pages(geopd=False, req=mock_req, client=mock_client)
assert len(df) == 2
assert list(df["val"]) == ["a", "b"]
assert list(df["id"]) == ["1", "2"]
assert mock_client.send.called
assert mock_client.request.called
assert mock_client.request.call_args[0][1] == "https://example.com/page2"
def test_handle_stats_nesting_tolerates_missing_drop_columns():
"""If the upstream stats response shape ever changes such that one of
the columns we try to drop ("type", "properties.data") is absent, the
function should still return a DataFrame instead of raising KeyError.
"""
body = {
"next": None,
"features": [
{
"properties": {
"monitoring_location_id": "USGS-12345",
"data": [
{
"parameter_code": "00060",
"unit_of_measure": "ft^3/s",
"parent_time_series_id": "ts-1",
"values": [{"statistic_id": "mean", "value": 10.0}],
}
],
},
}
],
}
df = _handle_stats_nesting(body, geopd=False)
assert len(df) == 1
assert df["monitoring_location_id"].iloc[0] == "USGS-12345"
# --- _arrange_cols ----------------------------------------------------------
def test_arrange_cols_does_not_mutate_caller_properties():
"""`_arrange_cols` must not mutate the caller's `properties` list.
Regression: previously the function did
``properties.append("geometry")`` and
``properties[properties.index("id")] = output_id`` in place, so the
caller's list grew and was rewritten across successive calls.
"""
df = pd.DataFrame(
{
"id": ["a", "b"],
"value": [1.0, 2.0],
"geometry": ["p1", "p2"],
}
)
properties = ["id", "value"]
snapshot = list(properties)
_arrange_cols(df, properties, output_id="daily_id")
_arrange_cols(df, properties, output_id="daily_id")
assert properties == snapshot, (
f"caller's properties list was mutated: {properties!r} != {snapshot!r}"
)
def test_arrange_cols_swaps_id_in_returned_columns():
"""`'id'` in `properties` should still resolve to the output_id column."""
df = pd.DataFrame({"id": ["a"], "value": [1.0]})
result = _arrange_cols(df, ["id", "value"], output_id="daily_id")
assert "daily_id" in result.columns
assert "id" not in result.columns
def test_arrange_cols_keeps_geometry_when_present():
"""Geometry must come along even if the caller didn't list it."""
df = pd.DataFrame({"id": ["a"], "value": [1.0], "geometry": ["p1"]})
result = _arrange_cols(df, ["value"], output_id="daily_id")
assert "geometry" in result.columns