1- import sys
1+ from datetime import timedelta
2+ from types import SimpleNamespace
23from unittest import mock
34from urllib .parse import parse_qs , urlsplit
45
6+ import pandas as pd
57import pytest
68import requests
79
1416 _walk_pages ,
1517)
1618
17- OGC_CONTINUOUS_URL = (
18- "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items"
19- )
20-
2119
2220def _query_params (prepared_request ):
2321 return parse_qs (urlsplit (prepared_request .url ).query )
@@ -205,11 +203,7 @@ def test_construct_filter_on_all_ogc_services(service):
205203 assert qs ["filter-lang" ] == ["cql-text" ]
206204
207205
208- @pytest .mark .skipif (
209- sys .version_info < (3 , 10 ),
210- reason = "get_continuous requires py>=3.10 (see tests/waterdata_test.py)" ,
211- )
212- def test_long_filter_fans_out_into_multiple_requests (requests_mock ):
206+ def test_long_filter_fans_out_into_multiple_requests ():
213207 """An oversized top-level OR filter triggers multiple HTTP requests
214208 whose results are concatenated."""
215209 from dataretrieval .waterdata import get_continuous
@@ -221,49 +215,45 @@ def test_long_filter_fans_out_into_multiple_requests(requests_mock):
221215 expr = " OR " .join (clause .format (day = (i % 28 ) + 1 ) for i in range (300 ))
222216 assert len (expr ) > _CQL_FILTER_CHUNK_LEN
223217
224- call_count = {"n" : 0 }
225-
226- def respond (request , context ):
227- context .status_code = 200
228- call_count ["n" ] += 1
229- return {
230- "type" : "FeatureCollection" ,
231- "numberReturned" : 1 ,
232- "features" : [
233- {
234- "type" : "Feature" ,
235- "id" : f"chunk-{ call_count ['n' ]} " ,
236- "geometry" : None ,
237- "properties" : {"value" : call_count ["n" ]},
238- }
239- ],
240- "links" : [],
241- }
242-
243- requests_mock .get (OGC_CONTINUOUS_URL , json = respond )
244-
245- df , _ = get_continuous (
246- monitoring_location_id = "USGS-07374525" ,
247- parameter_code = "72255" ,
248- filter = expr ,
249- filter_lang = "cql-text" ,
250- )
218+ sent_filters = []
219+
220+ def fake_construct_api_requests (** kwargs ):
221+ sent_filters .append (kwargs .get ("filter" ))
222+ return SimpleNamespace (url = "https://example.test" , method = "GET" , headers = {})
223+
224+ def fake_walk_pages (* _args , ** _kwargs ):
225+ idx = len (sent_filters )
226+ frame = pd .DataFrame ({"id" : [f"chunk-{ idx } " ], "value" : [idx ]})
227+ resp = SimpleNamespace (
228+ url = "https://example.test" ,
229+ elapsed = timedelta (milliseconds = 1 ),
230+ headers = {},
231+ )
232+ return frame , resp
233+
234+ with mock .patch (
235+ "dataretrieval.waterdata.utils._construct_api_requests" ,
236+ side_effect = fake_construct_api_requests ,
237+ ), mock .patch (
238+ "dataretrieval.waterdata.utils._walk_pages" , side_effect = fake_walk_pages
239+ ):
240+ df , _ = get_continuous (
241+ monitoring_location_id = "USGS-07374525" ,
242+ parameter_code = "72255" ,
243+ filter = expr ,
244+ filter_lang = "cql-text" ,
245+ )
251246
252247 # Mirror the library's splitter so the test doesn't hardcode a chunk count.
253248 expected_chunks = _chunk_cql_or (expr )
254249 assert len (expected_chunks ) > 1
255- assert call_count ["n" ] == len (expected_chunks )
250+ assert len (sent_filters ) == len (expected_chunks )
251+ assert sent_filters == expected_chunks
256252 assert len (df ) == len (expected_chunks )
257- for req in requests_mock .request_history :
258- filter_qs = parse_qs (urlsplit (req .url ).query ).get ("filter" , ["" ])[0 ]
259- assert len (filter_qs ) <= _CQL_FILTER_CHUNK_LEN
253+ assert all (len (chunk ) <= _CQL_FILTER_CHUNK_LEN for chunk in sent_filters )
260254
261255
262- @pytest .mark .skipif (
263- sys .version_info < (3 , 10 ),
264- reason = "get_continuous requires py>=3.10 (see tests/waterdata_test.py)" ,
265- )
266- def test_long_filter_deduplicates_cross_chunk_overlap (requests_mock ):
256+ def test_long_filter_deduplicates_cross_chunk_overlap ():
267257 """Features returned by multiple chunks (same feature `id`) are
268258 deduplicated in the concatenated result."""
269259 from dataretrieval .waterdata import get_continuous
@@ -276,36 +266,69 @@ def test_long_filter_deduplicates_cross_chunk_overlap(requests_mock):
276266
277267 call_count = {"n" : 0 }
278268
279- def respond (request , context ):
280- context .status_code = 200
269+ def fake_walk_pages (* _args , ** _kwargs ):
281270 call_count ["n" ] += 1
282- # Every chunk returns the same feature id so dedup should collapse
283- # the concatenated frame down to a single row.
284- return {
285- "type" : "FeatureCollection" ,
286- "numberReturned" : 1 ,
287- "features" : [
288- {
289- "type" : "Feature" ,
290- "id" : "shared-feature" ,
291- "geometry" : None ,
292- "properties" : {"value" : 1 },
293- }
294- ],
295- "links" : [],
296- }
297-
298- requests_mock .get (OGC_CONTINUOUS_URL , json = respond )
299-
300- df , _ = get_continuous (
301- monitoring_location_id = "USGS-07374525" ,
302- parameter_code = "72255" ,
303- filter = expr ,
304- filter_lang = "cql-text" ,
305- )
271+ frame = pd .DataFrame ({"id" : ["shared-feature" ], "value" : [1 ]})
272+ resp = SimpleNamespace (
273+ url = "https://example.test" ,
274+ elapsed = timedelta (milliseconds = 1 ),
275+ headers = {},
276+ )
277+ return frame , resp
278+
279+ with mock .patch (
280+ "dataretrieval.waterdata.utils._construct_api_requests" ,
281+ return_value = SimpleNamespace (
282+ url = "https://example.test" , method = "GET" , headers = {}
283+ ),
284+ ), mock .patch (
285+ "dataretrieval.waterdata.utils._walk_pages" , side_effect = fake_walk_pages
286+ ):
287+ df , _ = get_continuous (
288+ monitoring_location_id = "USGS-07374525" ,
289+ parameter_code = "72255" ,
290+ filter = expr ,
291+ filter_lang = "cql-text" ,
292+ )
306293
307294 expected_chunks = _chunk_cql_or (expr )
308295 assert len (expected_chunks ) > 1
309296 assert call_count ["n" ] == len (expected_chunks )
310297 # Even though each chunk returned a feature, dedup by id collapses them.
311298 assert len (df ) == 1
299+
300+
301+ def test_cql_json_filter_is_not_chunked ():
302+ """Chunking applies only to cql-text; cql-json is passed through unchanged."""
303+ from dataretrieval .waterdata import get_continuous
304+
305+ clause = "(time >= '2023-01-01T00:00:00Z' AND time <= '2023-01-01T00:30:00Z')"
306+ expr = " OR " .join ([clause ] * 300 )
307+ sent_filters = []
308+
309+ def fake_construct_api_requests (** kwargs ):
310+ sent_filters .append (kwargs .get ("filter" ))
311+ return SimpleNamespace (url = "https://example.test" , method = "GET" , headers = {})
312+
313+ with mock .patch (
314+ "dataretrieval.waterdata.utils._construct_api_requests" ,
315+ side_effect = fake_construct_api_requests ,
316+ ), mock .patch (
317+ "dataretrieval.waterdata.utils._walk_pages" ,
318+ return_value = (
319+ pd .DataFrame ({"id" : ["row-1" ], "value" : [1 ]}),
320+ SimpleNamespace (
321+ url = "https://example.test" ,
322+ elapsed = timedelta (milliseconds = 1 ),
323+ headers = {},
324+ ),
325+ ),
326+ ):
327+ get_continuous (
328+ monitoring_location_id = "USGS-07374525" ,
329+ parameter_code = "72255" ,
330+ filter = expr ,
331+ filter_lang = "cql-json" ,
332+ )
333+
334+ assert sent_filters == [expr ]
0 commit comments