forked from DOI-USGS/dataretrieval-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwaterdata_utils_test.py
More file actions
202 lines (163 loc) · 6.08 KB
/
Copy pathwaterdata_utils_test.py
File metadata and controls
202 lines (163 loc) · 6.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
from unittest import mock
import pandas as pd
import requests
from dataretrieval.waterdata.utils import (
_get_args,
_walk_pages,
get_stats_data,
)
def test_get_args_basic():
local_vars = {
"monitoring_location_id": "123",
"service": "daily",
"output_id": "daily_id",
"none_val": None,
"other": "val",
}
result = _get_args(local_vars)
assert result == {"monitoring_location_id": "123", "other": "val"}
def test_get_args_with_exclude():
local_vars = {
"monitoring_location_id": "123",
"service": "daily",
"output_id": "daily_id",
"to_exclude": "secret",
"other": "val",
}
result = _get_args(local_vars, exclude={"to_exclude"})
assert result == {"monitoring_location_id": "123", "other": "val"}
def test_get_args_empty():
assert _get_args({}) == {}
def test_walk_pages_multiple_mocked():
# Setup mock responses
resp1 = mock.MagicMock()
resp1.json.return_value = {
"numberReturned": 1,
"features": [{"id": "1", "properties": {"val": "a"}}],
"links": [{"rel": "next", "href": "https://example.com/page2"}],
}
# Mock headers and links
resp1.headers = {}
resp1.links = {"next": {"url": "https://example.com/page2"}}
resp1.status_code = 200
resp2 = mock.MagicMock()
resp2.json.return_value = {
"numberReturned": 1,
"features": [{"id": "2", "properties": {"val": "b"}}],
"links": [],
}
resp2.headers = {}
resp2.links = {}
resp2.status_code = 200
# Mock client (Session)
mock_client = mock.MagicMock(spec=requests.Session)
# First call to send() returns resp1, then call to request() in loop returns resp2
mock_client.send.return_value = resp1
mock_client.request.return_value = resp2
# Mock request (PreparedRequest)
mock_req = mock.MagicMock(spec=requests.PreparedRequest)
mock_req.method = "GET"
mock_req.headers = {}
mock_req.url = "https://example.com/page1"
# Call _walk_pages
df, final_resp = _walk_pages(geopd=False, req=mock_req, client=mock_client)
assert len(df) == 2
assert list(df["val"]) == ["a", "b"]
assert list(df["id"]) == ["1", "2"]
assert mock_client.send.called
assert mock_client.request.called
assert mock_client.request.call_args[0][1] == "https://example.com/page2"
def test_walk_pages_raises_on_non_200_in_loop():
"""`_walk_pages` must surface a non-200 mid-loop, not silently truncate.
Regression: previously any non-200 page was appended (with whatever
body it had) and pagination quietly stopped because `_get_resp_data`
or `_next_req_url` raised inside the bare except. The user got a
partial result with no warning.
"""
resp1 = mock.MagicMock()
resp1.json.return_value = {
"numberReturned": 1,
"features": [{"id": "1", "properties": {"val": "a"}}],
"links": [],
}
resp1.headers = {}
resp1.links = {"next": {"url": "https://example.com/page2"}}
resp1.status_code = 200
resp2 = mock.MagicMock()
resp2.status_code = 500
resp2.text = "<html>error</html>"
mock_client = mock.MagicMock(spec=requests.Session)
mock_client.send.return_value = resp1
mock_client.request.return_value = resp2
mock_req = mock.MagicMock(spec=requests.PreparedRequest)
mock_req.method = "GET"
mock_req.headers = {}
mock_req.url = "https://example.com/page1"
df, _ = _walk_pages(geopd=False, req=mock_req, client=mock_client)
# Page 1 still returned; page 2 logged-and-stopped after the explicit
# status check raised. The contract here is "log + truncate", same
# as the pre-fix bare-except behavior, but now the raise inside the
# loop is intentional rather than incidental.
assert len(df) == 1
# --- get_stats_data pagination ----------------------------------------------
def _stats_feature():
"""Build a single feature shaped to satisfy ``_handle_stats_nesting``."""
return {
"type": "Feature",
"id": "USGS-1",
"geometry": None,
"properties": {
"monitoring_location_id": "USGS-1",
"data": [
{
"parameter_code": "00060",
"unit_of_measure": "ft^3/s",
"parent_time_series_id": "abc",
"values": [{"value": 1.0}],
}
],
},
}
def _stats_body(features, next_token=None):
body = {
"type": "FeatureCollection",
"features": features,
"numberReturned": len(features),
}
if next_token is not None:
body["next"] = next_token
return body
def test_get_stats_data_handles_missing_next_key():
"""A response without a ``next`` key must not raise KeyError.
Regression: ``body["next"]`` raised when the key was absent. Now
uses ``body.get("next")`` so a missing key means "no more pages".
"""
resp = mock.MagicMock()
resp.status_code = 200
resp.json.return_value = _stats_body([_stats_feature()])
# No "next" key at all.
client = mock.MagicMock(spec=requests.Session)
client.send.return_value = resp
df, _ = get_stats_data(
args={}, service="observationNormals", expand_percentiles=False, client=client
)
assert isinstance(df, pd.DataFrame)
assert len(df) >= 1
def test_get_stats_data_truncates_on_non_200_continuation():
"""A 4xx/5xx on a continuation page must log and stop, not crash."""
resp1 = mock.MagicMock()
resp1.status_code = 200
resp1.json.return_value = _stats_body([_stats_feature()], next_token="abc")
resp2 = mock.MagicMock()
resp2.status_code = 503
resp2.text = "Service Unavailable"
resp2.url = "https://example.com/page2"
client = mock.MagicMock(spec=requests.Session)
client.send.return_value = resp1
client.request.return_value = resp2
df, _ = get_stats_data(
args={}, service="observationNormals", expand_percentiles=False, client=client
)
# Page 1 still surfaces; page 2 was caught by the in-loop status check.
assert isinstance(df, pd.DataFrame)
assert len(df) >= 1