Skip to content

Commit a666f4b

Browse files
committed
fix(serve): guard against NULL request_router in update_deployment_config
When AsyncioRouter.update_deployment_config() is called before the request_router has been lazily initialised (e.g. request_router_class is None, or the first config update arrives before any replica is assigned), the self.request_router property returns None. The previous code unconditionally evaluated: len(self.request_router.curr_replicas) which raises AttributeError: 'NoneType' object has no attribute 'curr_replicas'. Fix: cache the property result in a local variable and fall back to 0 when it is None. Zero is semantically correct because no replicas are active at that point. Regression tests added in TestUpdateDeploymentConfigNullRouter: - test_update_deployment_config_with_null_router_no_exception - test_update_deployment_config_with_null_router_passes_zero_replicas - test_update_deployment_config_with_initialized_router_passes_correct_count Signed-off-by: chenshi5012 <chenshi5012@163.com>
1 parent 35629a8 commit a666f4b

2 files changed

Lines changed: 121 additions & 1 deletion

File tree

python/ray/serve/_private/router.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -796,9 +796,15 @@ def update_deployment_config(self, deployment_config: DeploymentConfig):
796796
max_backoff_s=self._max_backoff_s,
797797
)
798798

799+
# Guard against the case where request_router is None (e.g., when
800+
# request_router_class is None and lazy initialization has not yet
801+
# occurred). In that scenario there are no active replicas yet, so
802+
# passing 0 is the correct and safe value.
803+
_router = self.request_router
804+
curr_num_replicas = len(_router.curr_replicas) if _router is not None else 0
799805
self._metrics_manager.update_deployment_config(
800806
deployment_config,
801-
curr_num_replicas=len(self.request_router.curr_replicas),
807+
curr_num_replicas=curr_num_replicas,
802808
)
803809

804810
async def _resolve_request_arguments(

python/ray/serve/tests/unit/test_router.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,6 +1864,120 @@ async def test_multiple_requests_each_trigger_on_request_completed(
18641864
assert completed_ids == expected_ids
18651865

18661866

1867+
1868+
@pytest.mark.asyncio
1869+
class TestUpdateDeploymentConfigNullRouter:
1870+
"""Regression tests for the NULL-deref fix in AsyncioRouter.update_deployment_config.
1871+
1872+
Before the fix, calling update_deployment_config() when request_router is
1873+
not yet initialized (i.e. self.request_router returns None) would raise an
1874+
AttributeError because the code unconditionally called
1875+
``len(self.request_router.curr_replicas)``.
1876+
"""
1877+
1878+
async def test_update_deployment_config_with_null_router_no_exception(self):
1879+
"""update_deployment_config must not raise when request_router is None.
1880+
1881+
Reproduces the scenario where request_router_class is None and
1882+
_request_router has not been initialised yet. The property
1883+
self.request_router returns None in that case, and the old code would
1884+
crash with:
1885+
AttributeError: 'NoneType' object has no attribute 'curr_replicas'
1886+
"""
1887+
router = AsyncioRouter(
1888+
controller_handle=Mock(),
1889+
deployment_id=DeploymentID(name="test-deployment"),
1890+
handle_id="test-handle-id",
1891+
self_actor_id="test-node-id",
1892+
handle_source=DeploymentHandleSource.UNKNOWN,
1893+
event_loop=get_or_create_event_loop(),
1894+
enable_strict_max_ongoing_requests=False,
1895+
# Deliberately pass neither request_router nor request_router_class
1896+
# so that self._request_router remains None and the property
1897+
# returns None.
1898+
request_router_class=None,
1899+
request_router=None,
1900+
node_id="test-node-id",
1901+
availability_zone="test-az",
1902+
prefer_local_node_routing=False,
1903+
_request_router_initialized_event=asyncio.Event(),
1904+
)
1905+
# Should not raise AttributeError.
1906+
router.update_deployment_config(DeploymentConfig())
1907+
1908+
async def test_update_deployment_config_with_null_router_passes_zero_replicas(self):
1909+
"""When request_router is None, curr_num_replicas forwarded to
1910+
MetricsManager must be 0 (not an exception)."""
1911+
router = AsyncioRouter(
1912+
controller_handle=Mock(),
1913+
deployment_id=DeploymentID(name="test-deployment"),
1914+
handle_id="test-handle-id",
1915+
self_actor_id="test-node-id",
1916+
handle_source=DeploymentHandleSource.UNKNOWN,
1917+
event_loop=get_or_create_event_loop(),
1918+
enable_strict_max_ongoing_requests=False,
1919+
request_router_class=None,
1920+
request_router=None,
1921+
node_id="test-node-id",
1922+
availability_zone="test-az",
1923+
prefer_local_node_routing=False,
1924+
_request_router_initialized_event=asyncio.Event(),
1925+
)
1926+
captured: dict = {}
1927+
original = router._metrics_manager.update_deployment_config
1928+
1929+
def _capture(deployment_config, curr_num_replicas):
1930+
captured["curr_num_replicas"] = curr_num_replicas
1931+
return original(deployment_config, curr_num_replicas)
1932+
1933+
router._metrics_manager.update_deployment_config = _capture
1934+
router.update_deployment_config(DeploymentConfig())
1935+
1936+
assert captured["curr_num_replicas"] == 0, (
1937+
f"Expected 0 replicas when router is None, "
1938+
f"got {captured['curr_num_replicas']}"
1939+
)
1940+
1941+
async def test_update_deployment_config_with_initialized_router_passes_correct_count(
1942+
self,
1943+
):
1944+
"""When request_router is initialised, curr_num_replicas reflects
1945+
the actual number of live replicas."""
1946+
fake_request_router = FakeRequestRouter(use_queue_len_cache=False)
1947+
r1_id = ReplicaID(
1948+
unique_id="test-replica-1", deployment_id=DeploymentID(name="test")
1949+
)
1950+
fake_request_router.set_replica_to_return(FakeReplica(r1_id))
1951+
1952+
router = AsyncioRouter(
1953+
controller_handle=Mock(),
1954+
deployment_id=DeploymentID(name="test-deployment"),
1955+
handle_id="test-handle-id",
1956+
self_actor_id="test-node-id",
1957+
handle_source=DeploymentHandleSource.UNKNOWN,
1958+
event_loop=get_or_create_event_loop(),
1959+
enable_strict_max_ongoing_requests=False,
1960+
request_router=fake_request_router,
1961+
node_id="test-node-id",
1962+
availability_zone="test-az",
1963+
prefer_local_node_routing=False,
1964+
_request_router_initialized_event=asyncio.Event(),
1965+
)
1966+
captured: dict = {}
1967+
original = router._metrics_manager.update_deployment_config
1968+
1969+
def _capture(deployment_config, curr_num_replicas):
1970+
captured["curr_num_replicas"] = curr_num_replicas
1971+
return original(deployment_config, curr_num_replicas)
1972+
1973+
router._metrics_manager.update_deployment_config = _capture
1974+
router.update_deployment_config(DeploymentConfig())
1975+
1976+
assert captured["curr_num_replicas"] == 1, (
1977+
f"Expected 1 replica, got {captured['curr_num_replicas']}"
1978+
)
1979+
1980+
18671981
@pytest.mark.asyncio
18681982
class TestCustomRequestRouterAPIs:
18691983
"""Tests for the new RequestRouter APIs introduced in this branch:

0 commit comments

Comments
 (0)