lablup · jopemachine · May 6, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/changes/11491.enhance.md b/changes/11491.enhance.md
@@ -0,0 +1 @@
+Add `AppProxyClientRegistry` to expose app-proxy domain clients (`DeploymentChatClient`) with the same lazy `@cached_property` pattern as `BackendAIClientRegistry`.
diff --git a/pyproject.toml b/pyproject.toml
@@ -233,6 +233,8 @@ split-on-trailing-comma = true
 "src/ai/backend/client/utils.py" = ["PLC0415"]
 # - Client v2 registry uses lazy imports to avoid circular dependencies with domain clients
 "src/ai/backend/client/v2/registry.py" = ["PLC0415"]
+# - Client v2 app-proxy registry uses lazy imports to avoid circular dependencies with domain clients
+"src/ai/backend/client/v2/approxy_registry.py" = ["PLC0415"]
 # - vfolder shim uses lazy imports to avoid circular dependency with api/rest/vfolder
 "src/ai/backend/manager/api/vfolder.py" = ["PLC0415"]
 # - Client func vfolder has progress printing

diff --git a/src/ai/backend/client/cli/v2/deployment/chat/commands.py b/src/ai/backend/client/cli/v2/deployment/chat/commands.py
@@ -21,7 +21,11 @@
     DeploymentChatConfig,
     DeploymentChatHistory,
 )
-from ai.backend.client.cli.v2.helpers import create_v2_registry, load_v2_config
+from ai.backend.client.cli.v2.helpers import (
+    create_appproxy_registry,
+    create_v2_registry,
+    load_v2_config,
+)
 from ai.backend.common.dto.clients.openai_compat import ChatCompletionRequest
 from ai.backend.common.identifier.deployment import DeploymentID
 
@@ -96,8 +100,6 @@ def chat(
     temperature and top_p differ between runtime variants — pass them
     through ``--params``.
     """
-    from ai.backend.client.v2.config import ClientConfig
-    from ai.backend.client.v2.deployment_chat import DeploymentChatClient
     from ai.backend.client.v2.exceptions import DeploymentAuthError
 
     connection_config = load_v2_config()
@@ -135,22 +137,15 @@ async def _run() -> None:
             cache.save()
 
         token = chat_config.get_token(deployment_id)
-        # ``endpoint`` is required on ClientConfig but unused by AppProxyClient
-        # (deployment URLs are passed per-request); pass through the manager
-        # endpoint so the rest of the connection knobs (TLS, timeouts) match.
-        client_config = ClientConfig(
-            endpoint=connection_config.endpoint,
-            endpoint_type=connection_config.endpoint_type,
-            api_version=connection_config.api_version,
-            skip_ssl_verification=connection_config.skip_ssl_verification,
-        )
-        async with DeploymentChatClient(client_config) as client:
+        appproxy_registry = await create_appproxy_registry(connection_config)
+        try:
+            client = appproxy_registry.deployment_chat
+            # Resolution: --model > config.model (user-set) >
+            # cache.default_model (auto) > GET /v1/models (auto, cached).
+            request_model = (
+                model or chat_config.get_model(deployment_id) or endpoint_entry.default_model
+            )
             try:
-                # Resolution: --model > config.model (user-set) >
-                # cache.default_model (auto) > GET /v1/models (auto, cached).
-                request_model = (
-                    model or chat_config.get_model(deployment_id) or endpoint_entry.default_model
-                )
                 if request_model is None:
                     # No explicit --model, no user-set config, no cached
                     # default — ask the OpenAI-compat endpoint itself which
@@ -199,6 +194,8 @@ async def _run() -> None:
                     f"re-register with:\n"
                     f"  ./bai deployment chat-config set {deployment_id} --token <token>"
                 ) from e
+        finally:
+            await appproxy_registry.close()
         # Only persist when both sides of the round are present, so the file
         # never carries half-conversations that would skew future context.
         assistant_message = response.assistant_message

diff --git a/src/ai/backend/client/cli/v2/helpers.py b/src/ai/backend/client/cli/v2/helpers.py
@@ -13,6 +13,7 @@
 from yarl import URL
 
 if TYPE_CHECKING:
+    from ai.backend.client.v2.approxy_registry import AppProxyClientRegistry
     from ai.backend.client.v2.v2_registry import V2ClientRegistry
 
 CONFIG_DIR = Path.home() / ".backend.ai"
@@ -126,6 +127,26 @@ async def create_v2_registry(config: V2ConnectionConfig) -> V2ClientRegistry:
     return await V2ClientRegistry.create(client_config, auth)
 
 
+async def create_appproxy_registry(config: V2ConnectionConfig) -> AppProxyClientRegistry:
+    """Build an ``AppProxyClientRegistry`` from a ``V2ConnectionConfig``.
+
+    The app-proxy registry talks to deployment runtimes (vLLM / SGLang /
+    NIM / TGI), not the manager API, so HMAC credentials and the cookie
+    jar are intentionally ignored — only TLS / timeout knobs from the
+    manager-side config are reused.
+    """
+    from ai.backend.client.v2.approxy_registry import AppProxyClientRegistry
+    from ai.backend.client.v2.config import ClientConfig
+
+    client_config = ClientConfig(
+        endpoint=config.endpoint,
+        endpoint_type=config.endpoint_type,
+        api_version=config.api_version,
+        skip_ssl_verification=config.skip_ssl_verification,
+    )
+    return await AppProxyClientRegistry.create(client_config)
+
+
 def parse_order_options(
     order_by: tuple[str, ...],
     order_field_enum: type,

diff --git a/src/ai/backend/client/v2/approxy_registry.py b/src/ai/backend/client/v2/approxy_registry.py
@@ -0,0 +1,43 @@
+"""App-proxy client registry.
+
+Provides ``AppProxyClientRegistry`` which lazy-loads domain clients that
+target inference runtimes fronted by Backend.AI's app-proxy (vLLM, SGLang,
+NIM, TGI in Messages API mode, etc.). Mirrors the
+:class:`BackendAIClientRegistry` pattern but uses
+:class:`BackendAIAppProxyClient` (token-based, deployment URL per request)
+instead of :class:`BackendAIAuthClient` (HMAC-signed manager API).
+"""
+
+from __future__ import annotations
+
+from functools import cached_property
+from typing import TYPE_CHECKING
+
+from .base_client import BackendAIAppProxyClient
+from .config import ClientConfig
+
+if TYPE_CHECKING:
+    from .deployment_chat import DeploymentChatClient
+
+
+class AppProxyClientRegistry:
+    """Registry of domain clients targeting deployment runtimes via app-proxy."""
+
+    _client: BackendAIAppProxyClient
+
+    def __init__(self, client: BackendAIAppProxyClient) -> None:
+        self._client = client
+
+    @classmethod
+    async def create(cls, config: ClientConfig) -> AppProxyClientRegistry:
+        client = BackendAIAppProxyClient(config)
+        return cls(client)
+
+    async def close(self) -> None:
+        await self._client.close()
+
+    @cached_property
+    def deployment_chat(self) -> DeploymentChatClient:
+        from .deployment_chat import DeploymentChatClient
+
+        return DeploymentChatClient(self._client)
diff --git a/src/ai/backend/client/v2/base_appproxy_domain.py b/src/ai/backend/client/v2/base_appproxy_domain.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .base_client import BackendAIAppProxyClient
+
+
+class BaseAppProxyDomainClient:
+    _client: BackendAIAppProxyClient
+
+    def __init__(self, client: BackendAIAppProxyClient) -> None:
+        self._client = client
diff --git a/src/ai/backend/client/v2/base_client.py b/src/ai/backend/client/v2/base_client.py
@@ -464,16 +464,10 @@ async def typed_request(
 
 
 class BackendAIAppProxyClient:
-    """HTTP client base for direct-to-deployment endpoints fronted by Backend.AI's app-proxy.
+    """HTTP client for endpoints fronted by Backend.AI's app-proxy.
 
-    Unlike :class:`BackendAIAuthClient` (which signs requests with HMAC against
-    the Backend.AI manager API), this client targets the runtime's own HTTP
-    surface (vLLM / SGLang / NIM / TGI / custom) and uses an optional
-    ``Authorization: Bearer <token>`` header. The deployment endpoint URL is
-    supplied per-request, not via :attr:`ClientConfig.endpoint`.
-
-    Subclasses add the contract-specific request methods (e.g. chat-completions,
-    /generate, etc.).
+    Uses an optional ``Authorization: Bearer <token>`` header; the target
+    URL is supplied per-request, not via :attr:`ClientConfig.endpoint`.
     """
 
     _config: ClientConfig

diff --git a/src/ai/backend/client/v2/deployment_chat.py b/src/ai/backend/client/v2/deployment_chat.py
@@ -16,7 +16,7 @@
 
 from typing import Any
 
-from ai.backend.client.v2.base_client import BackendAIAppProxyClient
+from ai.backend.client.v2.base_appproxy_domain import BaseAppProxyDomainClient
 from ai.backend.common.dto.clients.openai_compat import (
     ChatCompletionResponse,
     ListModelsResponse,
@@ -26,14 +26,14 @@
 _OPENAI_COMPATIBLE_MODELS_PATH = "/v1/models"
 
 
-class DeploymentChatClient(BackendAIAppProxyClient):
+class DeploymentChatClient(BaseAppProxyDomainClient):
     """OpenAI Chat Completions client for direct-to-deployment inference traffic.
 
     Sends ``POST /v1/chat/completions`` with an OpenAI-shaped
     ``{model, messages, ...}`` JSON body. Compatible runtimes: vLLM,
     SGLang, NVIDIA NIM, and TGI in Messages API mode. Vanilla TGI
     (``/generate``) and arbitrary custom containers need a different
-    :class:`BackendAIAppProxyClient` subclass.
+    :class:`BaseAppProxyDomainClient` subclass.
     """
 
     async def chat_completion(
@@ -42,7 +42,7 @@ async def chat_completion(
         token: str | None,
         body: dict[str, Any],
     ) -> ChatCompletionResponse:
-        payload = await self._request(
+        payload = await self._client._request(
             "POST", endpoint_url, _OPENAI_COMPATIBLE_CHAT_PATH, token, body=body
         )
         return ChatCompletionResponse.model_validate(payload)
@@ -57,5 +57,7 @@ async def list_models(
         Used to auto-derive a default model name when the caller did not
         pass ``--model`` and no cached default is known.
         """
-        payload = await self._request("GET", endpoint_url, _OPENAI_COMPATIBLE_MODELS_PATH, token)
+        payload = await self._client._request(
+            "GET", endpoint_url, _OPENAI_COMPATIBLE_MODELS_PATH, token
+        )
         return ListModelsResponse.model_validate(payload)
diff --git a/tests/unit/client/v2/test_deployment_chat_client.py b/tests/unit/client/v2/test_deployment_chat_client.py
@@ -9,6 +9,7 @@
 from yarl import URL
 
 from ai.backend.client.exceptions import BackendAPIError, BackendClientError
+from ai.backend.client.v2.base_client import BackendAIAppProxyClient
 from ai.backend.client.v2.config import ClientConfig
 from ai.backend.client.v2.deployment_chat import DeploymentChatClient
 from ai.backend.client.v2.exceptions import DeploymentAuthError
@@ -22,11 +23,11 @@
 async def chat_client() -> AsyncIterator[DeploymentChatClient]:
     # ``endpoint`` is required on ClientConfig but unused by AppProxyClient.
     config = ClientConfig(endpoint=URL("http://manager.unused"))
-    client = DeploymentChatClient(config)
+    appproxy_client = BackendAIAppProxyClient(config)
     try:
-        yield client
+        yield DeploymentChatClient(appproxy_client)
     finally:
-        await client.close()
+        await appproxy_client.close()
 
 
 def _make_body() -> dict[str, Any]:

diff --git a/tests/unit/client_v2/test_approxy_registry.py b/tests/unit/client_v2/test_approxy_registry.py
@@ -0,0 +1,58 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from yarl import URL
+
+from ai.backend.client.v2.approxy_registry import AppProxyClientRegistry
+from ai.backend.client.v2.base_appproxy_domain import BaseAppProxyDomainClient
+from ai.backend.client.v2.base_client import BackendAIAppProxyClient
+from ai.backend.client.v2.config import ClientConfig
+from ai.backend.client.v2.deployment_chat import DeploymentChatClient
+
+
+def _build_appproxy_client(session: MagicMock | None = None) -> BackendAIAppProxyClient:
+    """Construct a BackendAIAppProxyClient with the aiohttp session swapped for a mock.
+
+    Bypasses ``_create_aiohttp_session`` so the synchronous constructor does
+    not require a running event loop (aiohttp >= 3.13 raises otherwise).
+    """
+    config = ClientConfig(endpoint=URL("https://api.example.com"))
+    with patch(
+        "ai.backend.client.v2.base_client._create_aiohttp_session",
+        return_value=session if session is not None else MagicMock(),
+    ):
+        return BackendAIAppProxyClient(config)
+
+
+class TestAppProxyClientRegistry:
+    @pytest.fixture
+    def registry(self) -> AppProxyClientRegistry:
+        return AppProxyClientRegistry(_build_appproxy_client())
+
+    async def test_create_factory(self) -> None:
+        config = ClientConfig(endpoint=URL("https://api.example.com"))
+        with patch(
+            "ai.backend.client.v2.base_client._create_aiohttp_session",
+            return_value=MagicMock(),
+        ):
+            registry = await AppProxyClientRegistry.create(config)
+        assert isinstance(registry._client, BackendAIAppProxyClient)
+
+    def test_domain_clients_return_correct_types(self, registry: AppProxyClientRegistry) -> None:
+        assert isinstance(registry.deployment_chat, DeploymentChatClient)
+
+    def test_domain_clients_inherit_base(self, registry: AppProxyClientRegistry) -> None:
+        assert isinstance(registry.deployment_chat, BaseAppProxyDomainClient)
+
+    def test_cached_property_returns_same_instance(self, registry: AppProxyClientRegistry) -> None:
+        first = registry.deployment_chat
+        second = registry.deployment_chat
+        assert first is second
+
+    async def test_close_delegates_to_client(self) -> None:
+        mock_session = AsyncMock()
+        mock_session.closed = False
+        client = _build_appproxy_client(mock_session)
+        registry = AppProxyClientRegistry(client)
+        await registry.close()
+        mock_session.close.assert_awaited_once()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add `AppProxyClientRegistry` to expose app-proxy domain clients (`DeploymentChatClient`) with the same lazy `@cached_property` pattern as `BackendAIClientRegistry`.