Azure
diff --git a/‎sdk/cosmos/azure-cosmos/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎sdk/cosmos/azure-cosmos/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py‎
Lines changed: 7 additions & 4 deletions b/‎sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py‎
Lines changed: 11 additions & 0 deletions b/‎sdk/cosmos/azure-cosmos/azure/cosmos/_partition_health_tracker.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos/azure/cosmos/_routing/_routing_map_provider_common.py‎
Lines changed: 28 additions & 2 deletions b/‎sdk/cosmos/azure-cosmos/azure/cosmos/_routing/_routing_map_provider_common.py‎
Lines changed: 28 additions & 2 deletions
diff --git a/‎sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py‎
Lines changed: 142 additions & 12 deletions b/‎sdk/cosmos/azure-cosmos/azure/cosmos/_routing/aio/routing_map_provider.py‎
Lines changed: 142 additions & 12 deletions
diff --git a/‎sdk/cosmos/azure-cosmos/azure/cosmos/_routing/collection_routing_map.py‎
Lines changed: 5 additions & 2 deletions b/‎sdk/cosmos/azure-cosmos/azure/cosmos/_routing/collection_routing_map.py‎
Lines changed: 5 additions & 2 deletions
@@ -10,6 +10,7 @@
 * Fixed bug where `CosmosClient` construction with AAD credentials would crash at startup if the semantic reranking inference endpoint environment variable was not set, even when semantic reranking was not being used. The inference service is now lazily initialized on first use. See [PR 46243](https://github.com/Azure/azure-sdk-for-python/pull/46243)
 
 #### Other Changes
+* Reduced per-client memory overhead when partition-level circuit breaker (PPCB) is enabled by sharing the partition key range routing map cache across CosmosClient instances connected to the same endpoint, and stripping unused fields from cached partition key ranges using compact PKRange namedtuples. See [PR 46297](https://github.com/Azure/azure-sdk-for-python/pull/46297)
 
 ### 4.16.0b2 (2026-04-04)
 
 
@@ -3591,7 +3591,8 @@ def refresh_routing_map_provider(
 
         If collection_link is provided, refreshes only that collection.
         When previous_routing_map is provided this is incremental; otherwise this is a collection-scoped repopulation.
-        Without collection_link, it creates a new provider instance for a full refresh.
+        Without collection_link, it clears the shared routing-map cache in place
+        so the next request for any collection re-fetches from the service.
 
         :param str collection_link: The collection link.
         :param object previous_routing_map: The routing map that is considered stale.
@@ -3634,12 +3635,14 @@ def refresh_routing_map_provider(
                     status_code,
                 )
         else:
-            # Full refresh - create a new provider instance. This clears all cached routing maps.
-            self._routing_map_provider = routing_map_provider.SmartRoutingMapProvider(self)
+            # Full refresh - clear the shared routing-map cache in place so all
+            # clients sharing this endpoint re-fetch on next use. The provider
+            # instance itself is preserved (shared cache design).
+            self._routing_map_provider.clear_cache()
             return
 
         # Fallback to full refresh when targeted refresh fails transiently.
-        self._routing_map_provider = routing_map_provider.SmartRoutingMapProvider(self)
+        self._routing_map_provider.clear_cache()
 
     def _refresh_container_properties_cache(self, container_link: str):
         # If container properties cache is stale, refresh it by reading the container.
 
@@ -50,6 +50,17 @@ class _PartitionHealthInfo(object):
     """
     This internal class keeps the health and statistics for a partition.
     """
+    # __slots__ reduces per-instance memory by using a fixed-size C array
+    # instead of a per-instance __dict__. Significant when tracking many partitions.
+    __slots__ = (
+        'write_failure_count',
+        'read_failure_count',
+        'write_success_count',
+        'read_success_count',
+        'read_consecutive_failure_count',
+        'write_consecutive_failure_count',
+        'unavailability_info',
+    )
 
     def __init__(self) -> None:
         self.write_failure_count: int = 0
 
@@ -34,6 +34,7 @@
 from .collection_routing_map import CollectionRoutingMap, _build_routing_map_from_ranges
 from . import routing_range
 from .routing_range import (
+    PKRange,
     PartitionKeyRange,
     _is_sorted_and_non_overlapping,
     _subtract_range,
@@ -122,6 +123,31 @@ def prepare_fetch_options_and_headers(
 
 
 
+
+def _resolve_endpoint(client: Any) -> str:
+    """Return a cache key for ``client``'s endpoint.
+
+    Falls back to ``__unknown_<id>__`` when ``client`` has no ``url_connection``
+    so unknown/mocked clients are isolated rather than collapsed into a single
+    shared cache entry.
+
+    Centralized here so the sync (``routing_map_provider``) and async
+    (``aio.routing_map_provider``) modules use exactly the same fallback shape
+    — a divergence here would silently fragment the per-endpoint shared cache.
+
+    :param client: The CosmosClient (or compatible) instance whose endpoint
+        will be used as the shared-cache key.
+    :type client: Any
+    :returns: The endpoint URL string, or a per-instance fallback key when the
+        client does not expose ``url_connection``.
+    :rtype: str
+    """
+    try:
+        return client.url_connection
+    except AttributeError:
+        return f"__unknown_{id(client)}__"
+
+
 class _NeedFullRefresh(Exception):
     """Sentinel raised by :func:`process_fetched_ranges` when the
     incremental update cannot be completed and a full refresh is needed."""
@@ -186,7 +212,7 @@ def process_fetched_ranges(
     # Incremental update -- merge deltas into the existing map.
     # Resolve parent chains transitively within this single delta so cascading
     # splits (A->B+C and B->D+E in one payload) can be merged incrementally.
-    range_tuples: List[Tuple[Dict[str, Any], Any]] = []
+    range_tuples: List[Tuple[Any, Any]] = []
     known_range_info_by_id = {
         pkr_id: pkr_tuple[1]
         for pkr_id, pkr_tuple in previous_routing_map._rangeById.items()  # pylint: disable=protected-access
@@ -209,7 +235,7 @@ def process_fetched_ranges(
                 next_unresolved.append(r)
                 continue
 
-            range_tuples.append((r, range_info))
+            range_tuples.append((PKRange.from_dict(r), range_info))
             known_range_info_by_id[r[PartitionKeyRange.Id]] = range_info
             progress_made = True
 
 
@@ -24,12 +24,14 @@
 """
 import asyncio  # pylint: disable=do-not-import-asyncio
 import logging
+import threading
 from typing import Dict, Any, Optional, List, TYPE_CHECKING
 from azure.core.utils import CaseInsensitiveDict
 from ... import _base, http_constants
 from ..collection_routing_map import CollectionRoutingMap
 from ...exceptions import CosmosHttpResponseError
 from .._routing_map_provider_common import (
+    _resolve_endpoint,
     prepare_fetch_options_and_headers,
     process_fetched_ranges,
     is_cache_unchanged_since_previous,
@@ -41,6 +43,60 @@
 
 if TYPE_CHECKING:
     from ...aio._cosmos_client_connection_async import CosmosClientConnection
+
+# Module-level shared state, keyed by endpoint URL. All four dicts and the
+# refcount are mutated only while holding ``_shared_cache_lock``. Sharing across
+# every async CosmosClient that targets the same endpoint is what eliminates
+# the per-client duplicate copies of the routing map (the memory win driving
+# this change), and what lets concurrent readers single-flight a single
+# refresh.
+
+# endpoint -> { collection_id -> CollectionRoutingMap }. The actual cached
+# routing maps. The inner dict is shared by every client for that endpoint, so
+# a routing-map populated by one client is immediately visible to all others.
+_shared_routing_map_cache: dict = {}
+
+# endpoint -> { (loop_id, collection_id) -> asyncio.Lock }. Per-collection
+# refresh lock, scoped to the asyncio event loop that owns it. We key by loop
+# id (``id(asyncio.get_running_loop())``) because ``asyncio.Lock`` instances
+# bind to the loop on first ``acquire()`` (CPython 3.10+) and raise
+# ``RuntimeError: ... bound to a different event loop`` if reused from a
+# different running loop. Single-flighting only needs to be per-loop in
+# practice — coroutines on different loops have different connection pools
+# and are effectively independent clients.
+_shared_collection_locks: Dict[str, Dict[tuple, asyncio.Lock]] = {}
+
+# endpoint -> threading.Lock. Guards the creation of new entries in the inner
+# dict of ``_shared_collection_locks``. Was an ``asyncio.Lock`` previously,
+# but its critical sections are pure dict reads/writes (no await), so a
+# ``threading.Lock`` works identically and avoids the same loop-binding
+# hazard described above. Without this guard, two coroutines racing on a
+# brand-new (loop, collection_id) could each create a different Lock object
+# and defeat the single-flight invariant.
+_shared_locks_locks: Dict[str, threading.Lock] = {}
+
+# endpoint -> int. Number of live async ``PartitionKeyRangeCache`` instances
+# using this endpoint. Incremented on construction and decremented in
+# ``release`` (called from ``CosmosClient.__aexit__`` / ``close`` / ``__del__``).
+# When the count hits zero we drop the entry from all four dicts so an idle
+# endpoint does not pin memory forever. ``clear_cache`` does NOT touch this
+# count — it only wipes routing-map contents.
+_shared_cache_refcounts: Dict[str, int] = {}
+
+# Process-wide lock guarding the four dicts above for *this* (async) module.
+# Note: the sync module ``_routing/routing_map_provider.py`` defines its own
+# independent set of module-level dicts and its own ``_shared_cache_lock`` —
+# state is NOT shared between the sync and async modules. A sync and an async
+# ``CosmosClient`` targeting the same endpoint maintain separate routing-map
+# caches. Using a ``threading.Lock`` (not an ``asyncio.Lock``) is also
+# essential for correctness across multiple event loops in the same process:
+# an ``asyncio.Lock`` binds to the loop that first acquires it. The critical
+# sections this lock guards are pure dict reads/writes — never await, never
+# network I/O — so a brief threading-lock acquisition from a coroutine is
+# safe and does not block the event loop in any meaningful way.
+_shared_cache_lock = threading.Lock()
+
+
 # pylint: disable=protected-access
 
 logger = logging.getLogger(__name__)
@@ -64,25 +120,99 @@ def __init__(self, client: Any):
         """
 
         self._document_client = client
+        self._endpoint = _resolve_endpoint(client)
+        self._released = False
+
+        # Share routing map cache, per-collection asyncio locks, and the
+        # per-endpoint meta-lock that guards the per-collection-lock dict
+        # across all clients with the same endpoint. Refcount lets us evict
+        # the entry when the last sharing client releases it (see ``release``).
+        with _shared_cache_lock:
+            if self._endpoint not in _shared_routing_map_cache:
+                _shared_routing_map_cache[self._endpoint] = {}
+                _shared_collection_locks[self._endpoint] = {}
+                _shared_locks_locks[self._endpoint] = threading.Lock()
+                _shared_cache_refcounts[self._endpoint] = 0
+            _shared_cache_refcounts[self._endpoint] += 1
+            self._collection_routing_map_by_item = _shared_routing_map_cache[self._endpoint]
+            self._collection_locks: Dict[tuple, asyncio.Lock] = _shared_collection_locks[self._endpoint]
+            self._locks_lock: threading.Lock = _shared_locks_locks[self._endpoint]
+
+    def clear_cache(self):
+        """Clear the shared routing map cache for this endpoint.
+
+        Uses in-place ``.clear()`` on the routing-map dict to preserve all
+        client references to the same dict object, so concurrent clients
+        sharing the endpoint continue to share a single cache instance.
+
+        The per-collection locks dict is intentionally **not** cleared here:
+        an in-flight ``_fetch_routing_map`` caller holds one of those locks
+        and will write its result into the (now-empty) shared cache when it
+        completes. Keeping the lock in place ensures that any concurrent
+        arrival serialises behind the in-flight refresh (single-flight
+        invariant) instead of racing it with a fresh lock. The locks dict
+        is evicted in ``release()`` once the endpoint refcount hits zero.
+        """
+        with _shared_cache_lock:
+            if self._endpoint in _shared_routing_map_cache:
+                _shared_routing_map_cache[self._endpoint].clear()
+
+    def release(self) -> None:
+        """Decrement the per-endpoint refcount and evict shared state at zero.
 
-        # keeps the cached collection routing map by collection id
-        self._collection_routing_map_by_item: Dict[str, CollectionRoutingMap] = {}
-        # A lock to control access to the locks dictionary itself
-        self._locks_lock = asyncio.Lock()
-        # A dictionary to hold a lock for each collection ID
-        self._collection_locks: Dict[str, asyncio.Lock] = {}
+        Safe to call multiple times concurrently. Best-effort: never raises.
+
+        The ``_released`` check-and-set is performed *inside* the shared
+        cache lock to close the TOCTOU window between two concurrent callers
+        (e.g. ``CosmosClient.__aexit__`` racing the GC's ``__del__``).
+        Without the lock, both callers could pass the early-return guard
+        before either set the flag, then both would decrement the refcount.
+        """
+        endpoint = self._endpoint
+        try:
+            with _shared_cache_lock:
+                if self._released:
+                    return
+                self._released = True
+                count = _shared_cache_refcounts.get(endpoint, 0) - 1
+                if count <= 0:
+                    _shared_cache_refcounts.pop(endpoint, None)
+                    _shared_routing_map_cache.pop(endpoint, None)
+                    _shared_collection_locks.pop(endpoint, None)
+                    _shared_locks_locks.pop(endpoint, None)
+                else:
+                    _shared_cache_refcounts[endpoint] = count
+        except Exception:  # pylint: disable=broad-except
+            # release() may be called from __del__ during interpreter shutdown
+            # where module globals may already be torn down.
+            pass
+
+    def __del__(self):
+        # Defensive fallback in case the owning client teardown path didn't
+        # call release(). Must never raise.
+        try:
+            self.release()
+        except Exception:  # pylint: disable=broad-except
+            pass
 
     async def _get_lock_for_collection(self, collection_id: str) -> asyncio.Lock:
-        """Safely gets or creates a lock for a given collection ID.
+        """Safely gets or creates a lock for a given (loop, collection) pair.
+
+        Scoped to the running event loop so the returned ``asyncio.Lock`` is
+        always bound to the loop that will await it — see the comment on
+        ``_shared_collection_locks`` for the loop-binding rationale.
 
         :param str collection_id: The ID of the collection.
-        :return: An asyncio.Lock specific to the collection ID.
+        :return: An asyncio.Lock specific to the (loop, collection) pair.
         :rtype: asyncio.Lock
         """
-        async with self._locks_lock:
-            if collection_id not in self._collection_locks:
-                self._collection_locks[collection_id] = asyncio.Lock()
-            return self._collection_locks[collection_id]
+        key = (id(asyncio.get_running_loop()), collection_id)
+        with self._locks_lock:
+            lock = self._collection_locks.get(key)
+            if lock is None:
+                lock = asyncio.Lock()
+                self._collection_locks[key] = lock
+            return lock
 
     def _is_cache_stale(
             self,
 
@@ -27,7 +27,7 @@
 from typing import Optional, Union
 
 from azure.cosmos._routing import routing_range
-from azure.cosmos._routing.routing_range import PartitionKeyRange
+from azure.cosmos._routing.routing_range import PartitionKeyRange, PKRange
 
 # pylint: disable=line-too-long
 class CollectionRoutingMap(object):
@@ -288,7 +288,10 @@ def _build_routing_map_from_ranges(
         if PartitionKeyRange.Parents in r and r[PartitionKeyRange.Parents]:
             gone_range_ids.update(r[PartitionKeyRange.Parents])
 
-    filtered_ranges = [r for r in ranges if r[PartitionKeyRange.Id] not in gone_range_ids]
+    filtered_ranges = [
+        PKRange.from_dict(r)
+        for r in ranges if r[PartitionKeyRange.Id] not in gone_range_ids
+    ]
     range_tuples = [(r, True) for r in filtered_ranges]
 
     routing_map = CollectionRoutingMap.CompleteRoutingMap(