|
81 | 81 | # leaves ~200 bytes for request-line framing and proxy variance. |
82 | 82 | _WATERDATA_URL_BYTE_LIMIT = 8000 |
83 | 83 |
|
84 | | -# Default rule: any list-shaped kwarg with >1 element is chunked across |
85 | | -# sub-requests — each chunk becomes a comma-joined sub-list in the URL. |
86 | | -# The OGC getters expose ~90 such list-shaped params (IDs, codes, |
87 | | -# statuses, ...), all chunkable, so it's shorter to enumerate the |
88 | | -# exceptions than to maintain an allowlist that grows with the API. |
89 | | -# Exceptions, by reason: |
90 | | -# - response shape: ``properties`` defines the columns; sharding |
91 | | -# would yield different schemas per chunk. |
92 | | -# - structured: ``bbox`` is a fixed 4-element coord tuple. |
93 | | -# - intervals: date/time ranges are not enumerable sets. |
94 | | -# - handled elsewhere: ``filter`` becomes its own axis in |
95 | | -# ``_extract_axes`` (joiner ``" OR "``); |
96 | | -# comma-joining CQL clauses would emit |
97 | | -# malformed expressions. |
98 | | -# - scalar by contract: ``limit``, ``skip_geometry``, ``filter_lang`` |
99 | | -# — a list value would be a type-erasure smuggle. |
| 84 | +# Any list-shaped kwarg with >1 element is chunked (comma-joined per |
| 85 | +# sub-list in the URL); ~90 OGC params qualify, so we denylist the few |
| 86 | +# exceptions rather than maintain a growing allowlist. Excluded because: |
| 87 | +# ``properties`` defines the column schema; ``bbox`` is a fixed coord |
| 88 | +# tuple; date/time params are intervals, not enumerable sets; ``filter`` |
| 89 | +# is handled as its own OR-axis in ``_extract_axes``; and ``limit`` / |
| 90 | +# ``skip_geometry`` / ``filter_lang`` are scalar by contract. |
100 | 91 | _NEVER_CHUNK = frozenset( |
101 | 92 | { |
102 | 93 | "properties", |
|
118 | 109 | # Response header USGS uses to advertise remaining hourly quota. |
119 | 110 | _QUOTA_HEADER = "x-ratelimit-remaining" |
120 | 111 |
|
121 | | -# Environment variable that controls fan-out concurrency. Read at call |
122 | | -# time (not import) so test patches via ``monkeypatch.setenv`` take |
123 | | -# effect. The default (16) is the server-friendly sweet spot: higher |
124 | | -# values trip the upstream into 5xx burst-protection in practice. Set to |
125 | | -# ``1`` for a single connection, set to ``unbounded`` for no per-call cap |
126 | | -# (use sparingly — you own the upstream-burst risk). |
| 112 | +# Fan-out concurrency cap, read at call time (not import) so test |
| 113 | +# ``monkeypatch.setenv`` applies. Value grammar in :func:`_read_concurrency_env`; |
| 114 | +# the concurrency model is in the module docstring. |
127 | 115 | _CONCURRENCY_ENV = "API_USGS_CONCURRENT" |
128 | 116 | _CONCURRENCY_DEFAULT = 16 |
129 | 117 | _CONCURRENCY_UNBOUNDED = "unbounded" |
@@ -564,13 +552,10 @@ def __init__( |
564 | 552 | self.total_chunks = total_chunks |
565 | 553 | self.call = call |
566 | 554 | self.retry_after = retry_after |
567 | | - # Snapshot partial state at raise time so the exception's view |
568 | | - # stays stable across later ``call.resume()`` advances; the |
569 | | - # live view lives on ``call.partial_frame``/``.partial_response``. |
570 | | - # ``partial_frame`` gets a defensive ``.copy()`` because |
571 | | - # ``_combine_chunk_frames`` may return a chunk frame verbatim |
572 | | - # in the single-completed-chunk fast path; ``partial_response`` |
573 | | - # already comes via ``copy.copy`` from ``_combine_chunk_responses``. |
| 555 | + # Snapshot partial state at raise time so the exception's view stays |
| 556 | + # stable across later ``call.resume()`` advances (the live view is on |
| 557 | + # ``call.partial_frame`` / ``.partial_response``). ``.copy()`` guards |
| 558 | + # the single-chunk fast path, where the frame may be returned verbatim. |
574 | 559 | if call is None: |
575 | 560 | self.partial_frame: pd.DataFrame = pd.DataFrame() |
576 | 561 | self.partial_response: httpx.Response | None = None |
@@ -1485,14 +1470,7 @@ def wrap_failure(self, exc: BaseException) -> ChunkInterrupted | None: |
1485 | 1470 |
|
1486 | 1471 | @property |
1487 | 1472 | def completed_chunks(self) -> int: |
1488 | | - """ |
1489 | | - Number of sub-requests completed so far. |
1490 | | -
|
1491 | | - Returns |
1492 | | - ------- |
1493 | | - int |
1494 | | - The count of completed sub-requests. |
1495 | | - """ |
| 1473 | + """Number of sub-requests completed so far.""" |
1496 | 1474 | return len(self._chunks) |
1497 | 1475 |
|
1498 | 1476 | def _combine_raw(self) -> tuple[pd.DataFrame, httpx.Response]: |
@@ -1760,21 +1738,13 @@ def multi_value_chunked( |
1760 | 1738 | """ |
1761 | 1739 | Decorate an async fetcher to transparently chunk over-budget requests. |
1762 | 1740 |
|
1763 | | - Splits multi-value list params and cql-text filters across |
1764 | | - sub-requests so each fits the URL byte limit. Builds a |
1765 | | - :class:`ChunkPlan` and runs it: passthrough requests are a trivial |
1766 | | - single-step plan, so the decorated function has one code path |
1767 | | - either way. |
1768 | | -
|
1769 | | - Decorates an ``async def fetch(args) -> (df, response)`` and returns a |
1770 | | - callable that builds the :class:`ChunkPlan`, constructs a |
1771 | | - :class:`ChunkedCall` over the fetcher, and drives it to completion via |
1772 | | - :meth:`ChunkedCall.resume` (an ``anyio`` worker-thread portal, so it |
1773 | | - works whether or not the caller is already inside an event loop — |
1774 | | - Jupyter / IPython / async apps). Every pending sub-request is gathered |
1775 | | - under one :class:`httpx.AsyncClient`; concurrency is bounded purely by |
1776 | | - the connection pool, sized from ``API_USGS_CONCURRENT`` (``1`` is a |
1777 | | - single-connection gather, ``plan.total <= 1`` a one-element gather). |
| 1741 | + Returns a callable that builds a :class:`ChunkPlan` from ``args``, |
| 1742 | + constructs a :class:`ChunkedCall` over the decorated |
| 1743 | + ``async def fetch(args) -> (df, response)``, and drives it to |
| 1744 | + completion via :meth:`ChunkedCall.resume`. The plan splits multi-value |
| 1745 | + list params and the cql-text filter so each sub-request URL fits the |
| 1746 | + byte limit; an already-fitting request is a one-step plan. See the |
| 1747 | + module docstring for the concurrency model. |
1778 | 1748 |
|
1779 | 1749 | Parameters |
1780 | 1750 | ---------- |
|
0 commit comments