Skip to content

Commit fd62436

Browse files
committed
addressing sdk-agent comments
1 parent e4c4b39 commit fd62436

8 files changed

Lines changed: 776 additions & 229 deletions

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#### Bugs Fixed
1010
* Fixed bug where `CosmosClient` construction with AAD credentials would crash at startup if the semantic reranking inference endpoint environment variable was not set, even when semantic reranking was not being used. The inference service is now lazily initialized on first use. See [PR 46243](https://github.com/Azure/azure-sdk-for-python/pull/46243)
11+
* Fixed a bug in `query_items(feed_range=...)` where pagination could return incorrect results after a partition split caused the supplied feed range to overlap multiple physical partitions. See [PR 46506](https://github.com/Azure/azure-sdk-for-python/pull/46506)
1112

1213
#### Other Changes
1314

sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py

Lines changed: 132 additions & 112 deletions
Large diffs are not rendered by default.

sdk/cosmos/azure-cosmos/azure/cosmos/_query_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def build_parameterized_query_for_items(
155155
parameters.append({"name": id_param_name, "value": item_id})
156156
condition_parts = [f"c.id = {id_param_name}"]
157157

158-
pk_values = []
158+
pk_values: list = []
159159
if partition_key_value is not None and not isinstance(partition_key_value, type(NonePartitionKeyValue)):
160160
pk_values = partition_key_value if isinstance(partition_key_value, list) else [partition_key_value]
161161
if len(pk_values) != len(partition_key_paths):

sdk/cosmos/azure-cosmos/azure/cosmos/_routing/feed_range_continuation.py

Lines changed: 351 additions & 26 deletions
Large diffs are not rendered by default.

sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py

Lines changed: 119 additions & 90 deletions
Large diffs are not rendered by default.

sdk/cosmos/azure-cosmos/tests/test_feed_range_continuation_token.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,21 @@
2828

2929
import pytest
3030

31+
from azure.cosmos import http_constants
3132
from azure.cosmos._routing import routing_range
3233
from azure.cosmos._routing.feed_range_continuation import (
34+
_MAX_CONSECUTIVE_NO_PROGRESS_PAGES,
35+
_apply_feedrange_request_headers,
3336
_build_outbound_token,
3437
_build_scope_from_overlaps,
38+
_count_page_items_from_partial_result,
3539
_decode_token,
3640
_derive_initial_feedranges,
3741
_encode_token,
3842
_hash_feed_range,
3943
_hash_query_spec,
4044
_normalize_max_item_count,
45+
_update_no_progress_page_count,
4146
_validate_token_identity,
4247
_FIELD_BACKEND_CONTINUATION,
4348
_FIELD_COLLECTION_RID,
@@ -52,6 +57,7 @@
5257

5358
# Fixed inputs reused across the round-trip / mismatch tests so each
5459
# assertion compares against a known-good baseline.
60+
# cspell:ignore AOXB BFFFFFFFFFFFFFFF BAAAAAAAAAA
5561
_RID = "Yxs1AOXBSp4="
5662
_QUERY = {"query": "SELECT * FROM c WHERE c.x = @x",
5763
"parameters": [{"name": "@x", "value": 7}]}
@@ -76,6 +82,10 @@
7682
_BACKEND_CONT = "+RID:~Yxs1AOXBSp4BAAAAAAAAAA==#RT:1#TRC:5#ISV:2#IEO:65567"
7783

7884

85+
def _mk_range(mn: str, mx: str) -> routing_range.Range:
86+
return routing_range.Range(range_min=mn, range_max=mx, isMinInclusive=True, isMaxInclusive=False)
87+
88+
7989
def _make_valid_token_payload() -> dict:
8090
"""Build a structurally-complete v=1 token payload over the fixtures."""
8191
return {
@@ -640,3 +650,163 @@ def test_non_numeric_is_treated_as_unbounded(self):
640650
def test_object_is_treated_as_unbounded(self):
641651
assert _normalize_max_item_count(object()) is None
642652

653+
654+
# ---------------------------------------------------------------------- #
655+
# Request-header shaping
656+
# ---------------------------------------------------------------------- #
657+
class TestApplyFeedrangeRequestHeaders:
658+
"""``_apply_feedrange_request_headers`` sets and clears routing/page/token
659+
headers correctly for both full-partition and sub-range requests."""
660+
661+
@pytest.mark.parametrize(
662+
"current_feedrange,expect_epk_headers",
663+
[
664+
# full-partition request -> EPK headers must be cleared
665+
(_mk_range("10", "20"), False),
666+
# strict sub-range request -> EPK headers must be stamped
667+
(_mk_range("12", "18"), True),
668+
],
669+
)
670+
def test_epk_headers_match_full_vs_subrange(self, current_feedrange, expect_epk_headers):
671+
req_headers = {
672+
# pre-populate with stale values to prove clear behavior
673+
http_constants.HttpHeaders.StartEpkString: "stale-start",
674+
http_constants.HttpHeaders.EndEpkString: "stale-end",
675+
}
676+
overlapping = [{"id": "7", "minInclusive": "10", "maxExclusive": "20"}]
677+
partition_scope = _mk_range("10", "20")
678+
679+
_apply_feedrange_request_headers(
680+
req_headers=req_headers,
681+
overlapping=overlapping,
682+
partition_scope=partition_scope,
683+
current_feedrange=current_feedrange,
684+
remaining_page_item_count=None,
685+
inbound_continuation=None,
686+
)
687+
688+
assert req_headers[http_constants.HttpHeaders.PartitionKeyRangeID] == "7"
689+
assert req_headers[http_constants.HttpHeaders.ReadFeedKeyType] == "EffectivePartitionKeyRange"
690+
if expect_epk_headers:
691+
assert req_headers[http_constants.HttpHeaders.StartEpkString] == current_feedrange.min
692+
assert req_headers[http_constants.HttpHeaders.EndEpkString] == current_feedrange.max
693+
else:
694+
assert http_constants.HttpHeaders.StartEpkString not in req_headers
695+
assert http_constants.HttpHeaders.EndEpkString not in req_headers
696+
697+
@pytest.mark.parametrize(
698+
"remaining_page_item_count,inbound_continuation,expect_page_size,expect_continuation",
699+
[
700+
(5, "abc", True, True),
701+
(None, "abc", False, True),
702+
(5, None, True, False),
703+
(None, None, False, False),
704+
],
705+
)
706+
def test_page_size_and_continuation_are_set_or_cleared(
707+
self,
708+
remaining_page_item_count,
709+
inbound_continuation,
710+
expect_page_size,
711+
expect_continuation,
712+
):
713+
req_headers = {
714+
# pre-populate stale values; helper should clear when args are None
715+
http_constants.HttpHeaders.PageSize: "999",
716+
http_constants.HttpHeaders.Continuation: "stale-cont",
717+
}
718+
overlapping = [{"id": "9", "minInclusive": "30", "maxExclusive": "40"}]
719+
partition_scope = _mk_range("30", "40")
720+
current_feedrange = _mk_range("30", "40")
721+
722+
_apply_feedrange_request_headers(
723+
req_headers=req_headers,
724+
overlapping=overlapping,
725+
partition_scope=partition_scope,
726+
current_feedrange=current_feedrange,
727+
remaining_page_item_count=remaining_page_item_count,
728+
inbound_continuation=inbound_continuation,
729+
)
730+
731+
if expect_page_size:
732+
assert req_headers[http_constants.HttpHeaders.PageSize] == str(remaining_page_item_count)
733+
else:
734+
assert http_constants.HttpHeaders.PageSize not in req_headers
735+
736+
if expect_continuation:
737+
assert req_headers[http_constants.HttpHeaders.Continuation] == inbound_continuation
738+
else:
739+
assert http_constants.HttpHeaders.Continuation not in req_headers
740+
741+
742+
class TestBudgetCounting:
743+
"""Budget accounting treats aggregate partial rows as merge fragments."""
744+
745+
def test_standard_documents_consume_budget(self):
746+
partial_result = {"Documents": [{"id": "1"}, {"id": "2"}]}
747+
assert _count_page_items_from_partial_result(partial_result, "SELECT * FROM c") == 2
748+
749+
def test_object_aggregate_partial_does_not_consume_budget(self):
750+
partial_result = {"Documents": [{"_aggregate": {"count": 7}}]}
751+
assert _count_page_items_from_partial_result(partial_result, "SELECT COUNT(1) FROM c") == 0
752+
753+
def test_value_aggregate_partial_does_not_consume_budget(self):
754+
partial_result = {"Documents": [7]}
755+
assert _count_page_items_from_partial_result(partial_result, "SELECT VALUE COUNT(1) FROM c") == 0
756+
757+
def test_value_non_aggregate_numeric_row_consumes_budget(self):
758+
partial_result = {"Documents": [7]}
759+
assert _count_page_items_from_partial_result(partial_result, "SELECT VALUE c.value FROM c") == 1
760+
761+
def test_value_non_aggregate_boolean_row_consumes_budget(self):
762+
partial_result = {"Documents": [True]}
763+
assert _count_page_items_from_partial_result(partial_result, "SELECT VALUE c.flag FROM c") == 1
764+
765+
766+
class TestEmptyPageStallCounter:
767+
"""No-progress guard only counts empty pages that still carry continuation."""
768+
769+
def test_increments_on_empty_page_with_continuation(self):
770+
current_feedrange = _mk_range("10", "20")
771+
assert _update_no_progress_page_count(
772+
3,
773+
page_items_returned=0,
774+
previous_feedrange=current_feedrange,
775+
previous_backend_continuation="token",
776+
current_feedrange=current_feedrange,
777+
current_backend_continuation="token",
778+
) == 4
779+
780+
def test_resets_when_items_are_returned(self):
781+
current_feedrange = _mk_range("10", "20")
782+
assert _update_no_progress_page_count(
783+
5,
784+
page_items_returned=1,
785+
previous_feedrange=current_feedrange,
786+
previous_backend_continuation="token",
787+
current_feedrange=current_feedrange,
788+
current_backend_continuation="token",
789+
) == 0
790+
791+
def test_resets_when_continuation_is_none(self):
792+
assert _update_no_progress_page_count(
793+
_MAX_CONSECUTIVE_NO_PROGRESS_PAGES - 1,
794+
page_items_returned=0,
795+
previous_feedrange=_mk_range("10", "20"),
796+
previous_backend_continuation="token",
797+
current_feedrange=_mk_range("20", "30"),
798+
current_backend_continuation=None,
799+
) == 0
800+
801+
def test_resets_when_continuation_advances(self):
802+
current_feedrange = _mk_range("10", "20")
803+
assert _update_no_progress_page_count(
804+
8,
805+
page_items_returned=0,
806+
previous_feedrange=current_feedrange,
807+
previous_backend_continuation="token-1",
808+
current_feedrange=current_feedrange,
809+
current_backend_continuation="token-2",
810+
) == 0
811+
812+

sdk/cosmos/azure-cosmos/tests/test_query_feed_range_multipartition.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,7 @@ def test_legacy_opaque_token_compat(self):
511511
# continuation string that is NOT base64-of-our-JSON. The new
512512
# _decode_token must return None for this and the call site must
513513
# restart from offset 0.
514+
# cspell:ignore AOXB BAAAAAAAAAA EAAAAFAAAA
514515
legacy_token = "+RID:~Yxs1AOXBSp4BAAAAAAAAAA==#RT:1#TRC:5#ISV:2#IEO:65567#FPC:AgEAAAAFAAAA"
515516

516517
pager = container.query_items(

sdk/cosmos/azure-cosmos/tests/test_query_feed_range_multipartition_async.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ async def test_legacy_opaque_token_compat_async(self):
387387
ground_truth = await _ids_via_per_partition_scan(
388388
container, [chosen[0], chosen[1]])
389389

390+
# cspell:ignore AOXB BAAAAAAAAAA EAAAAFAAAA
390391
legacy_token = "+RID:~Yxs1AOXBSp4BAAAAAAAAAA==#RT:1#TRC:5#ISV:2#IEO:65567#FPC:AgEAAAAFAAAA"
391392

392393
pager = container.query_items(

0 commit comments

Comments
 (0)