Skip to content

Commit 5135bb5

Browse files
thodson-usgsclaude
andcommitted
refactor(waterdata): emit the geopandas advisory once at import, not per call
Geopandas availability is a static, environment-level fact, so warn once at module import (right where GEOPANDAS is determined) instead of inside the getters/_paginate. This removes the per-call _warn_geopandas_once() latch and guarantees the advisory prints exactly once — above any progress line — without repeating per query/chunk or interleaving with the status line. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 37a6b86 commit 5135bb5

2 files changed

Lines changed: 25 additions & 79 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -40,29 +40,22 @@
4040
# Set up logger for this module
4141
logger = logging.getLogger(__name__)
4242

43+
# Whether geopandas is present is a static, environment-level fact, so warn once
44+
# here at import time rather than per query/chunk. That avoids the warning
45+
# repeating on every call and avoids it interleaving with the progress line's
46+
# carriage-return rewrites.
47+
if not GEOPANDAS:
48+
logger.warning(
49+
"Geopandas not installed. Geometries will be flattened into pandas DataFrames."
50+
)
51+
4352
BASE_URL = "https://api.waterdata.usgs.gov"
4453
OGC_API_VERSION = "v0"
4554
OGC_API_URL = f"{BASE_URL}/ogcapi/{OGC_API_VERSION}"
4655
SAMPLES_URL = f"{BASE_URL}/samples-data"
4756
STATISTICS_API_VERSION = "v0"
4857
STATISTICS_API_URL = f"{BASE_URL}/statistics/{STATISTICS_API_VERSION}"
4958

50-
# "Geopandas not installed" is a static, environment-level fact. Emit it at most
51-
# once per process (and before any progress line opens) so it neither repeats
52-
# per query/chunk nor interleaves with the status line's carriage-return rewrites.
53-
_geopandas_warned = False
54-
55-
56-
def _warn_geopandas_once() -> None:
57-
"""Warn (once per process) that geometries are flattened without geopandas."""
58-
global _geopandas_warned
59-
if not GEOPANDAS and not _geopandas_warned:
60-
_geopandas_warned = True
61-
logger.warning(
62-
"Geopandas not installed. Geometries will be flattened "
63-
"into pandas DataFrames."
64-
)
65-
6659

6760
def _switch_arg_id(ls: dict[str, Any], id_name: str, service: str):
6861
"""
@@ -1257,7 +1250,6 @@ def get_ogc_data(
12571250
convert_type = args.pop("convert_type", False)
12581251
args = {k: v for k, v in args.items() if v is not None}
12591252

1260-
_warn_geopandas_once() # before the progress line, so it never interleaves
12611253
with _progress.progress_context(service=service):
12621254
return_list, response = _fetch_once(args)
12631255
return_list = _deal_with_empty(return_list, properties, service)
@@ -1332,9 +1324,8 @@ def _handle_stats_nesting(
13321324
if not features:
13331325
return gpd.GeoDataFrame() if geopd else pd.DataFrame()
13341326

1335-
# The geopd-missing warning is emitted once per process by
1336-
# ``_warn_geopandas_once`` at the getter entry; doing it here would log
1337-
# per page.
1327+
# The geopd-missing warning is emitted once at import (see top of module);
1328+
# doing it here would log per page.
13381329
if not geopd:
13391330
outer_props = [
13401331
{k: v for k, v in (f.get("properties") or {}).items() if k != "data"}
@@ -1505,7 +1496,6 @@ def follow_up(cursor: str, sess: requests.Session) -> requests.Response:
15051496

15061497
# The stats path doesn't go through ``multi_value_chunked``, so it opens
15071498
# its own progress context; ``_paginate`` reports pages/rate-limit into it.
1508-
_warn_geopandas_once() # before the progress line, so it never interleaves
15091499
with _progress.progress_context(service=service):
15101500
df, response = _paginate(
15111501
req,

tests/waterdata_utils_test.py

Lines changed: 14 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -406,66 +406,22 @@ def test_handle_stats_nesting_tolerates_missing_drop_columns():
406406
assert df["monitoring_location_id"].iloc[0] == "USGS-12345"
407407

408408

409-
def _warning_messages(caplog):
410-
"""Pull WARNING-level message strings out of caplog."""
411-
return [r.getMessage() for r in caplog.records if r.levelno == logging.WARNING]
412-
413-
414-
@pytest.fixture(autouse=True)
415-
def _reset_geopandas_warned(monkeypatch):
416-
"""The geopandas advisory is latched once per process; reset it so these
417-
warning tests don't depend on order."""
418-
monkeypatch.setattr(_utils_module, "_geopandas_warned", False)
419-
420-
421-
def test_warn_geopandas_once_is_latched(monkeypatch, caplog):
422-
# Static environment fact -> warn at most once per process.
423-
monkeypatch.setattr(_utils_module, "GEOPANDAS", False)
424-
caplog.set_level(logging.WARNING, logger=_LOGGER_NAME)
425-
_utils_module._warn_geopandas_once()
426-
_utils_module._warn_geopandas_once() # second call must not re-warn
427-
geo = [m for m in _warning_messages(caplog) if "Geopandas not installed" in m]
428-
assert len(geo) == 1
429-
430-
431-
def test_warn_geopandas_once_silent_when_installed(monkeypatch, caplog):
432-
monkeypatch.setattr(_utils_module, "GEOPANDAS", True)
433-
caplog.set_level(logging.WARNING, logger=_LOGGER_NAME)
434-
_utils_module._warn_geopandas_once()
435-
assert not [m for m in _warning_messages(caplog) if "Geopandas not installed" in m]
436-
437-
438-
def test_get_stats_data_warns_once_when_geopandas_missing(caplog, monkeypatch):
439-
# The advisory fires once per process (via _warn_geopandas_once) before the
440-
# progress line — not once per paginated page, and not interleaved with it.
441-
from dataretrieval.waterdata.utils import get_stats_data
442-
443-
monkeypatch.setattr(_utils_module, "GEOPANDAS", False)
444-
monkeypatch.setattr(
445-
_utils_module,
446-
"_handle_stats_nesting",
447-
mock.MagicMock(return_value=pd.DataFrame()),
409+
def test_geopandas_advisory_emitted_once_on_import():
410+
# The advisory is a one-time module-import side effect, not per call. Import
411+
# the module in a subprocess with geopandas blocked and confirm exactly one
412+
# warning reaches stderr.
413+
import subprocess
414+
import sys
415+
416+
code = (
417+
"import sys; sys.modules['geopandas'] = None\n"
418+
"import dataretrieval.waterdata.utils\n"
448419
)
449-
caplog.set_level(logging.WARNING, logger=_LOGGER_NAME)
450-
451-
page1 = mock.MagicMock(status_code=200, headers={})
452-
page1.json.return_value = {"next": "tok", "features": []}
453-
page2 = mock.MagicMock(status_code=200, headers={})
454-
page2.json.return_value = {"next": None, "features": []}
455-
456-
client = mock.MagicMock(spec=requests.Session)
457-
client.send.return_value = page1
458-
client.request.return_value = page2
459-
460-
get_stats_data(
461-
args={"monitoring_location_id": "USGS-1"},
462-
service="observationNormals",
463-
expand_percentiles=False,
464-
client=client,
420+
result = subprocess.run(
421+
[sys.executable, "-c", code], capture_output=True, text=True
465422
)
466-
467-
geo = [m for m in _warning_messages(caplog) if "Geopandas not installed" in m]
468-
assert len(geo) == 1
423+
assert result.returncode == 0, result.stderr
424+
assert result.stderr.count("Geopandas not installed") == 1
469425

470426

471427
def test_handle_stats_nesting_returns_empty_on_empty_features():

0 commit comments

Comments
 (0)