|
51 | 51 | _split_top_level_or, |
52 | 52 | ) |
53 | 53 |
|
54 | | -# Params the multi-value chunker must NOT split. ``properties`` defines |
55 | | -# the response schema; chunking it would shard columns. ``bbox`` is a |
56 | | -# fixed 4-element coord tuple. Date params are intervals not sets. |
57 | | -# ``filter`` has its own OR-clause partitioning inside ``_plan_joint``; |
58 | | -# treating it as a multi-value list would emit malformed CQL. |
59 | | -# ``filter_lang`` is a scalar string. ``limit`` and ``skip_geometry`` |
60 | | -# are scalars by contract (smuggling a list through type erasure would |
61 | | -# silently fan into queries with conflicting per-request semantics). |
| 54 | +# Params the multi-value chunker must NOT comma-join across sub-requests. |
| 55 | +# Categorized to make additions intentional: |
| 56 | +# - response shape: ``properties`` defines columns; sharding would |
| 57 | +# give different columns per chunk. |
| 58 | +# - structured: ``bbox`` is a fixed 4-element coord tuple. |
| 59 | +# - intervals: date/time ranges are not enumerable sets. |
| 60 | +# - handled elsewhere: ``filter`` gets OR-clause partitioning in |
| 61 | +# ``_plan_joint``; multi-value semantics would |
| 62 | +# emit malformed CQL. |
| 63 | +# - scalar by contract: ``limit``, ``skip_geometry``, ``filter_lang``. |
62 | 64 | _NEVER_CHUNK = frozenset( |
63 | 65 | { |
64 | 66 | "properties", |
@@ -213,6 +215,19 @@ def _plan_total(list_plan: dict[str, list[list[Any]]], n_filter_chunks: int) -> |
213 | 215 | return list_count * n_filter_chunks |
214 | 216 |
|
215 | 217 |
|
| 218 | +def _resolve_max_chunks(max_chunks: int | None) -> int: |
| 219 | + """Apply ``max_chunks`` default + validation. Both planner entry |
| 220 | + points share this rule, so pinning it here keeps them in sync.""" |
| 221 | + if max_chunks is None: |
| 222 | + return _DEFAULT_MAX_CHUNKS |
| 223 | + if max_chunks < 1: |
| 224 | + raise ValueError( |
| 225 | + f"max_chunks must be >= 1; got {max_chunks}. Zero or negative " |
| 226 | + f"values would silently bypass the cap." |
| 227 | + ) |
| 228 | + return max_chunks |
| 229 | + |
| 230 | + |
216 | 231 | def _worst_case_args( |
217 | 232 | base_args: dict[str, Any], list_plan: dict[str, list[list[Any]]] |
218 | 233 | ) -> dict[str, Any]: |
@@ -263,13 +278,7 @@ def _plan_list_chunks( |
263 | 278 | when the smallest reducible plan still doesn't fit or when the |
264 | 279 | plan would exceed ``max_chunks`` sub-requests. |
265 | 280 | """ |
266 | | - if max_chunks is None: |
267 | | - max_chunks = _DEFAULT_MAX_CHUNKS |
268 | | - if max_chunks < 1: |
269 | | - raise ValueError( |
270 | | - f"max_chunks must be >= 1; got {max_chunks}. Zero or negative " |
271 | | - f"values would silently bypass the cap." |
272 | | - ) |
| 281 | + max_chunks = _resolve_max_chunks(max_chunks) |
273 | 282 | chunkable = _chunkable_params(args) |
274 | 283 | if not chunkable: |
275 | 284 | return None |
@@ -381,13 +390,7 @@ def _plan_joint( |
381 | 390 | Raises ``RequestTooLarge`` when no candidate fits or the best plan |
382 | 391 | would exceed ``max_chunks``. |
383 | 392 | """ |
384 | | - if max_chunks is None: |
385 | | - max_chunks = _DEFAULT_MAX_CHUNKS |
386 | | - if max_chunks < 1: |
387 | | - raise ValueError( |
388 | | - f"max_chunks must be >= 1; got {max_chunks}. Zero or negative " |
389 | | - f"values would silently bypass the cap." |
390 | | - ) |
| 393 | + max_chunks = _resolve_max_chunks(max_chunks) |
391 | 394 |
|
392 | 395 | filter_expr = args.get("filter") |
393 | 396 | clauses: list[str] = [] |
@@ -455,14 +458,11 @@ def _iter_sub_args( |
455 | 458 | filter_chunks: list[str | None], |
456 | 459 | ) -> Iterator[dict[str, Any]]: |
457 | 460 | """Yield the substituted ``args`` for each sub-request in the joint |
458 | | - plan, in deterministic order: list-dim cartesian product (insertion |
459 | | - order, Python 3.7+ guarantee) crossed with filter chunks.""" |
460 | | - list_keys = list(list_plan) |
461 | | - list_combos = ( |
462 | | - itertools.product(*(list_plan[k] for k in list_keys)) if list_plan else [()] |
463 | | - ) |
| 461 | + plan, in deterministic order: list-dim cartesian product (dict |
| 462 | + insertion order — Python 3.7+ guarantee) crossed with filter chunks.""" |
| 463 | + list_combos = itertools.product(*list_plan.values()) if list_plan else [()] |
464 | 464 | for combo in list_combos: |
465 | | - base = {**args, **dict(zip(list_keys, combo))} |
| 465 | + base = {**args, **dict(zip(list_plan, combo))} |
466 | 466 | for filter_chunk in filter_chunks: |
467 | 467 | yield base if filter_chunk is None else {**base, "filter": filter_chunk} |
468 | 468 |
|
@@ -518,14 +518,12 @@ def wrapper( |
518 | 518 | args: dict[str, Any], |
519 | 519 | ) -> tuple[pd.DataFrame, requests.Response]: |
520 | 520 | limit = ( |
521 | | - url_limit |
522 | | - if url_limit is not None |
523 | | - else filters._WATERDATA_URL_BYTE_LIMIT |
| 521 | + filters._WATERDATA_URL_BYTE_LIMIT if url_limit is None else url_limit |
524 | 522 | ) |
525 | 523 | floor = ( |
526 | | - quota_safety_floor |
527 | | - if quota_safety_floor is not None |
528 | | - else _DEFAULT_QUOTA_SAFETY_FLOOR |
| 524 | + _DEFAULT_QUOTA_SAFETY_FLOOR |
| 525 | + if quota_safety_floor is None |
| 526 | + else quota_safety_floor |
529 | 527 | ) |
530 | 528 | plan = _plan_joint(args, build_request, limit, max_chunks) |
531 | 529 | if plan is None: |
|
0 commit comments