Skip to content

Commit 378f07e

Browse files
tvaron3Copilot
andcommitted
perf(cosmos): optimize partition key range cache memory usage
Three optimizations to reduce CollectionRoutingMap memory footprint: 1. Convert to compact PKRange namedtuples: Strip 9 unused fields from service response during _discard_parent_ranges (single pass, no extra iteration). PKRange supports dict-style access for backward compat. 2. Add __slots__ to Range class: Eliminates per-instance __dict__ overhead (~100 bytes per Range object, measured at 64 bytes with slots). 3. Skip redundant .upper() calls: Check if string is already uppercase before creating a copy. Service returns uppercase hex. PPCB overhead reduction (tracemalloc, 150 clients): | Clients | Original | Patched | Reduction | |---------|----------|---------|-----------| | 25 | 5.1 MB | 2.1 MB | -58% | | 50 | 10.3 MB | 4.1 MB | -60% | | 100 | 15.4 MB | 7.2 MB | -53% | | 150 | 27.4 MB | 6.9 MB | -74% | Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent de23b45 commit 378f07e

4 files changed

Lines changed: 135 additions & 8 deletions

File tree

sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from ... import _base
2929
from ..collection_routing_map import CollectionRoutingMap
3030
from .. import routing_range
31+
from ..routing_range import PKRange
3132

3233
_LOGGER = logging.getLogger(__name__)
3334

@@ -124,10 +125,18 @@ async def get_range_by_partition_key_range_id(
124125
def _discard_parent_ranges(partitionKeyRanges):
125126
parentIds = set()
126127
for r in partitionKeyRanges:
127-
if isinstance(r, dict) and routing_range.PartitionKeyRange.Parents in r:
128-
for parentId in r[routing_range.PartitionKeyRange.Parents]:
128+
parents = r.get(routing_range.PartitionKeyRange.Parents)
129+
if parents:
130+
for parentId in parents:
129131
parentIds.add(parentId)
130-
return (r for r in partitionKeyRanges if r[routing_range.PartitionKeyRange.Id] not in parentIds)
132+
return (
133+
PKRange(id=r[routing_range.PartitionKeyRange.Id],
134+
minInclusive=r[routing_range.PartitionKeyRange.MinInclusive],
135+
maxExclusive=r[routing_range.PartitionKeyRange.MaxExclusive],
136+
parents=r.get(routing_range.PartitionKeyRange.Parents))
137+
for r in partitionKeyRanges
138+
if r[routing_range.PartitionKeyRange.Id] not in parentIds
139+
)
131140

132141

133142
def _second_range_is_after_first_range(range1, range2):

sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_map_provider.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from .. import _base
2929
from .collection_routing_map import CollectionRoutingMap
3030
from . import routing_range
31+
from .routing_range import PKRange
3132
from .routing_range import PartitionKeyRange
3233

3334
_LOGGER = logging.getLogger(__name__)
@@ -125,10 +126,18 @@ def get_range_by_partition_key_range_id(
125126
def _discard_parent_ranges(partitionKeyRanges):
126127
parentIds = set()
127128
for r in partitionKeyRanges:
128-
if isinstance(r, dict) and PartitionKeyRange.Parents in r:
129-
for parentId in r[PartitionKeyRange.Parents]:
129+
parents = r.get(PartitionKeyRange.Parents)
130+
if parents:
131+
for parentId in parents:
130132
parentIds.add(parentId)
131-
return (r for r in partitionKeyRanges if r[PartitionKeyRange.Id] not in parentIds)
133+
return (
134+
PKRange(id=r[PartitionKeyRange.Id],
135+
minInclusive=r[PartitionKeyRange.MinInclusive],
136+
maxExclusive=r[PartitionKeyRange.MaxExclusive],
137+
parents=r.get(PartitionKeyRange.Parents))
138+
for r in partitionKeyRanges
139+
if r[PartitionKeyRange.Id] not in parentIds
140+
)
132141

133142

134143
def _second_range_is_after_first_range(range1, range2):

sdk/cosmos/azure-cosmos/azure/cosmos/_routing/routing_range.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,31 @@
2727
import json
2828

2929

30+
from collections import namedtuple
31+
32+
_PKRangeBase = namedtuple('PKRange', ['id', 'minInclusive', 'maxExclusive', 'parents'])
33+
34+
35+
class PKRange(_PKRangeBase):
36+
"""Compact partition key range with dict-compatible access."""
37+
__slots__ = ()
38+
39+
def __getitem__(self, key):
40+
try:
41+
return getattr(self, key)
42+
except AttributeError:
43+
raise KeyError(key)
44+
45+
def get(self, key, default=None):
46+
return getattr(self, key, default)
47+
48+
def __contains__(self, key):
49+
return key in self._fields
50+
51+
def items(self):
52+
return zip(self._fields, self)
53+
54+
3055
class PartitionKeyRange(object):
3156
"""Partition Key Range Constants"""
3257

@@ -39,6 +64,8 @@ class PartitionKeyRange(object):
3964
class Range(object):
4065
"""description of class"""
4166

67+
__slots__ = ('min', 'max', 'isMinInclusive', 'isMaxInclusive')
68+
4269
MinPath = "min"
4370
MaxPath = "max"
4471
IsMinInclusivePath = "isMinInclusive"
@@ -50,8 +77,8 @@ def __init__(self, range_min, range_max, isMinInclusive, isMaxInclusive):
5077
if range_max is None:
5178
raise ValueError("max is missing")
5279

53-
self.min = range_min.upper()
54-
self.max = range_max.upper()
80+
self.min = range_min if range_min == range_min.upper() else range_min.upper()
81+
self.max = range_max if range_max == range_max.upper() else range_max.upper()
5582
self.isMinInclusive = isMinInclusive
5683
self.isMaxInclusive = isMaxInclusive
5784

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# The MIT License (MIT)
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
4+
import unittest
5+
6+
import pytest
7+
8+
from azure.cosmos._routing.routing_range import Range, PKRange, PartitionKeyRange
9+
from azure.cosmos._routing.collection_routing_map import CollectionRoutingMap
10+
from azure.cosmos._routing.routing_map_provider import PartitionKeyRangeCache
11+
12+
13+
@pytest.mark.cosmosEmulator
14+
class TestPKRangeOptimizations(unittest.TestCase):
15+
"""Tests for partition key range memory optimizations."""
16+
17+
def test_pkrange_dict_access(self):
18+
"""PKRange supports dict-style [key] access for backward compatibility."""
19+
pkr = PKRange(id="1", minInclusive="00", maxExclusive="FF", parents=["0"])
20+
self.assertEqual(pkr["id"], "1")
21+
self.assertEqual(pkr["minInclusive"], "00")
22+
self.assertEqual(pkr["maxExclusive"], "FF")
23+
self.assertEqual(pkr["parents"], ["0"])
24+
25+
def test_pkrange_get_method(self):
26+
"""PKRange.get() works like dict.get() with defaults."""
27+
pkr = PKRange(id="1", minInclusive="00", maxExclusive="FF", parents=None)
28+
self.assertEqual(pkr.get("id"), "1")
29+
self.assertIsNone(pkr.get("parents"))
30+
self.assertEqual(pkr.get("nonexistent", "default"), "default")
31+
32+
def test_pkrange_contains(self):
33+
"""PKRange supports 'in' operator for field checking."""
34+
pkr = PKRange(id="1", minInclusive="00", maxExclusive="FF", parents=["0"])
35+
self.assertIn("id", pkr)
36+
self.assertIn("parents", pkr)
37+
self.assertNotIn("_rid", pkr)
38+
39+
def test_pkrange_in_collection_routing_map(self):
40+
"""CollectionRoutingMap works with PKRange namedtuples instead of dicts."""
41+
pk_ranges = [
42+
PKRange(id="0", minInclusive="", maxExclusive="05C1C9CD673398", parents=None),
43+
PKRange(id="1", minInclusive="05C1C9CD673398", maxExclusive="05C1D9CD673398", parents=None),
44+
PKRange(id="2", minInclusive="05C1D9CD673398", maxExclusive="FF", parents=None),
45+
]
46+
crm = CollectionRoutingMap.CompleteRoutingMap(
47+
[(r, True) for r in pk_ranges], "test-collection"
48+
)
49+
self.assertIsNotNone(crm)
50+
51+
# Test get_overlapping_ranges
52+
full_range = Range("", "FF", True, False)
53+
overlapping = crm.get_overlapping_ranges(full_range)
54+
self.assertEqual(len(overlapping), 3)
55+
56+
# Test get_range_by_effective_partition_key
57+
result = crm.get_range_by_effective_partition_key("")
58+
self.assertEqual(result["id"], "0")
59+
60+
# Test get_range_by_partition_key_range_id
61+
result = crm.get_range_by_partition_key_range_id("1")
62+
self.assertEqual(result["id"], "1")
63+
64+
def test_discard_parent_ranges_returns_pkrange(self):
65+
"""_discard_parent_ranges returns PKRange namedtuples, filtering parents."""
66+
raw_ranges = [
67+
{"id": "0", "minInclusive": "", "maxExclusive": "FF", "parents": []},
68+
{"id": "1", "minInclusive": "", "maxExclusive": "80", "parents": ["0"]},
69+
{"id": "2", "minInclusive": "80", "maxExclusive": "FF", "parents": ["0"]},
70+
]
71+
result = list(PartitionKeyRangeCache._discard_parent_ranges(raw_ranges))
72+
# Parent "0" should be discarded, only children "1" and "2" remain
73+
self.assertEqual(len(result), 2)
74+
self.assertEqual(result[0].id, "1")
75+
self.assertEqual(result[1].id, "2")
76+
# Verify they are PKRange instances
77+
self.assertIsInstance(result[0], PKRange)
78+
self.assertIsInstance(result[1], PKRange)
79+
80+
81+
if __name__ == "__main__":
82+
unittest.main()

0 commit comments

Comments
 (0)