Skip to content

Commit 9c82ebb

Browse files
authored
Implement WebSocket support for InvocationAgentServerHost (#46841)
1 parent 17a2bf7 commit 9c82ebb

23 files changed

Lines changed: 2643 additions & 5 deletions

sdk/agentserver/azure-ai-agentserver-core/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
### Features Added
66

77
- Added `_platform_headers` module with cross-cutting protocol header name constants (`x-request-id`, `x-platform-server`, `x-agent-session-id`, `x-platform-error-source`, `x-platform-error-detail`, and others). Protocol packages now import shared header name strings from core instead of maintaining their own copies.
8+
- `AgentConfig.ws_ping_interval` — new field resolved from the `WS_KEEPALIVE_INTERVAL` environment variable (auto-injected by AgentService into hosted-agent containers). `0` disables; negative/non-finite values raise `ValueError` at startup. `AgentServerHost._build_hypercorn_config` wires this into Hypercorn's `websocket_ping_interval` so any protocol package serving WebSocket routes inherits keep-alive without per-package wiring.
89

910
## 2.0.0b3 (2026-04-22)
1011

sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,14 @@ async def _lifespan(_app: Starlette) -> AsyncGenerator[None, None]: # noqa: RUF
219219
cfg = self.config
220220
logger.info(
221221
"Platform environment: is_hosted=%s, agent_name=%s, agent_version=%s, "
222-
"port=%s, session_id=%s, sse_keepalive_interval=%s",
222+
"port=%s, session_id=%s, sse_keepalive_interval=%s, ws_ping_interval=%s",
223223
cfg.is_hosted,
224224
cfg.agent_name or _NOT_SET,
225225
cfg.agent_version or _NOT_SET,
226226
cfg.port,
227227
cfg.session_id or _NOT_SET,
228228
cfg.sse_keepalive_interval if cfg.sse_keepalive_interval > 0 else "disabled",
229+
f"{cfg.ws_ping_interval}s" if cfg.ws_ping_interval > 0 else "disabled",
229230
)
230231
logger.info(
231232
"Connectivity: project_endpoint=%s, otlp_endpoint=%s, appinsights_configured=%s",
@@ -417,6 +418,11 @@ def _build_hypercorn_config(self, host: str, port: int) -> object:
417418
config.graceful_timeout = float(self._graceful_shutdown_timeout)
418419
# Spec requires HTTP/1.1 only — disable HTTP/2
419420
config.h2_max_concurrent_streams = 0
421+
# WebSocket Ping/Pong keep-alive (RFC 6455 opcodes 0x9/0xA).
422+
# ``0`` (disabled) maps to Hypercorn's ``None`` sentinel; any
423+
# positive value is sent verbatim to Hypercorn.
424+
ws_ping = self.config.ws_ping_interval
425+
config.websocket_ping_interval = ws_ping if ws_ping > 0 else None # type: ignore[attr-defined]
420426
# Access logging
421427
if self._access_log is not None:
422428
config.accesslog = self._access_log # type: ignore[assignment]

sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_config.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@
3232
_ENV_APPLICATIONINSIGHTS_CONNECTION_STRING = "APPLICATIONINSIGHTS_CONNECTION_STRING"
3333
_ENV_OTEL_EXPORTER_OTLP_ENDPOINT = "OTEL_EXPORTER_OTLP_ENDPOINT"
3434
_ENV_SSE_KEEPALIVE_INTERVAL = "SSE_KEEPALIVE_INTERVAL"
35+
_ENV_WS_KEEPALIVE_INTERVAL = "WS_KEEPALIVE_INTERVAL"
3536

3637
_DEFAULT_PORT = 8088
3738
_DEFAULT_SSE_KEEPALIVE_INTERVAL = 0
39+
_DEFAULT_WS_PING_INTERVAL = 0.0
3840

3941

4042
# ======================================================================
@@ -64,6 +66,8 @@ class AgentConfig: # pylint: disable=too-many-instance-attributes
6466
:param appinsights_connection_string: Application Insights connection string.
6567
:param otlp_endpoint: OTLP exporter endpoint.
6668
:param sse_keepalive_interval: SSE keep-alive interval in seconds (0 = disabled).
69+
:param ws_ping_interval: WebSocket protocol Ping interval in seconds
70+
(``0`` disables keep-alive).
6771
"""
6872

6973
def __init__(
@@ -80,6 +84,7 @@ def __init__(
8084
appinsights_connection_string: str,
8185
otlp_endpoint: str,
8286
sse_keepalive_interval: int,
87+
ws_ping_interval: float = 0.0,
8388
) -> None:
8489
self.agent_name = agent_name
8590
self.agent_version = agent_version
@@ -92,6 +97,7 @@ def __init__(
9297
self.appinsights_connection_string = appinsights_connection_string
9398
self.otlp_endpoint = otlp_endpoint
9499
self.sse_keepalive_interval = sse_keepalive_interval
100+
self.ws_ping_interval = ws_ping_interval
95101

96102
@classmethod
97103
def from_env(cls) -> Self:
@@ -123,6 +129,7 @@ def from_env(cls) -> Self:
123129
_ENV_APPLICATIONINSIGHTS_CONNECTION_STRING, ""),
124130
otlp_endpoint=os.environ.get(_ENV_OTEL_EXPORTER_OTLP_ENDPOINT, ""),
125131
sse_keepalive_interval=resolve_sse_keepalive_interval(None),
132+
ws_ping_interval=resolve_ws_ping_interval(),
126133
)
127134

128135

@@ -322,3 +329,40 @@ def resolve_otlp_endpoint() -> Optional[str]:
322329
"""
323330
value = os.environ.get(_ENV_OTEL_EXPORTER_OTLP_ENDPOINT, "")
324331
return value if value else None
332+
333+
334+
def resolve_ws_ping_interval() -> float:
335+
"""Resolve the WebSocket Ping/Pong keep-alive interval from the env var.
336+
337+
Reads the ``WS_KEEPALIVE_INTERVAL`` environment variable (auto-injected
338+
by AgentService into hosted-agent containers) and returns the parsed
339+
value in seconds. ``0`` (or unset) disables keep-alive.
340+
341+
The keep-alive interval is intentionally env-only — there is no
342+
programmatic constructor argument to override it. Hosted agents
343+
inherit the platform-injected value automatically; in tests, set the
344+
env var (e.g. via ``monkeypatch.setenv``).
345+
346+
:return: The resolved interval in seconds (``0`` means disabled).
347+
:rtype: float
348+
:raises ValueError: If the env var is set but not parseable as a
349+
non-negative finite number.
350+
"""
351+
import math # local import — only used here
352+
353+
env_raw = os.environ.get(_ENV_WS_KEEPALIVE_INTERVAL)
354+
if env_raw is None or env_raw == "":
355+
return _DEFAULT_WS_PING_INTERVAL
356+
try:
357+
resolved = float(env_raw)
358+
except ValueError as exc:
359+
raise ValueError(
360+
f"Invalid value for {_ENV_WS_KEEPALIVE_INTERVAL}: "
361+
f"{env_raw!r} (expected a non-negative number)"
362+
) from exc
363+
if math.isnan(resolved) or math.isinf(resolved) or resolved < 0.0:
364+
raise ValueError(
365+
f"Invalid value for {_ENV_WS_KEEPALIVE_INTERVAL}: "
366+
f"{env_raw!r} (expected a non-negative finite number)"
367+
)
368+
return resolved

sdk/agentserver/azure-ai-agentserver-invocations/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
### Features Added
66

77
- Error source classification headers: All HTTP error responses now include `x-platform-error-source` with a value of `user`, `platform`, or `upstream` to indicate which component caused the error. Developer handler exceptions and missing handler registrations are classified as `upstream`. Exceptions tagged with the platform error tag are classified as `platform` and additionally include `x-platform-error-detail` with truncated exception details (max 2048 characters) for diagnostics.
8+
- WebSocket protocol support — `InvocationAgentServerHost` now hosts `/invocations_ws` alongside `POST /invocations`. Register the handler with the new `@app.ws_handler` decorator. The route is registered lazily on first decoration, so hosts without a registered handler return HTTP 404.
9+
- WebSocket Ping/Pong keep-alive — disabled by default; enable by setting the `WS_KEEPALIVE_INTERVAL` env var (auto-injected by AgentService into hosted-agent containers; surfaced on `app.config.ws_ping_interval` in `azure-ai-agentserver-core>=2.0.0b4`). `0` (or unset) disables keep-alive. Wired through to Hypercorn's `websocket_ping_interval` by `AgentServerHost._build_hypercorn_config`.
10+
- WebSocket telemetry — single connection-scoped `websocket_session` OpenTelemetry span per WS connection, plus a structured close-event log line carrying `azure.ai.agentserver.invocations_ws.session_id`, `close_code`, and `duration_ms`. Session ID honours the `FOUNDRY_AGENT_SESSION_ID` env var for HTTP/WS correlation.
11+
- New samples: `samples/ws_invoke_agent/` (echo) and `samples/ws_bidirectional_streaming_agent/` (concurrent token streaming with cancel/bye control messages).
812

913
### Other Changes
1014

sdk/agentserver/azure-ai-agentserver-invocations/README.md

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Azure AI Agent Server Invocations client library for Python
22

3-
The `azure-ai-agentserver-invocations` package provides the invocation protocol endpoints for Azure AI Hosted Agent containers. It plugs into the [`azure-ai-agentserver-core`](https://pypi.org/project/azure-ai-agentserver-core/) host framework and adds the full invocation lifecycle: `POST /invocations`, `GET /invocations/{id}`, `POST /invocations/{id}/cancel`, and `GET /invocations/docs/openapi.json`.
3+
The `azure-ai-agentserver-invocations` package provides the invocation protocol endpoints for Azure AI Hosted Agent containers. It plugs into the [`azure-ai-agentserver-core`](https://pypi.org/project/azure-ai-agentserver-core/) host framework and supports two transports on the same host:
4+
5+
- **HTTP** (`invocations` protocol) — `POST /invocations`, `GET /invocations/{id}`, `POST /invocations/{id}/cancel`, `GET /invocations/docs/openapi.json`.
6+
- **WebSocket** (`invocations_ws` protocol) — full-duplex streaming at `/invocations_ws`, registered with `@app.ws_handler`.
47

58
## Getting started
69

@@ -25,6 +28,7 @@ This automatically installs `azure-ai-agentserver-core` as a dependency.
2528
- `@app.invoke_handler`**Required.** Handles `POST /invocations`.
2629
- `@app.get_invocation_handler` — Optional. Handles `GET /invocations/{id}`.
2730
- `@app.cancel_invocation_handler` — Optional. Handles `POST /invocations/{id}/cancel`.
31+
- `@app.ws_handler` — Optional. Handles WebSocket connections at `/invocations_ws`.
2832

2933
### Protocol endpoints
3034

@@ -34,6 +38,7 @@ This automatically installs `azure-ai-agentserver-core` as a dependency.
3438
| `GET` | `/invocations/{invocation_id}` | No | Retrieve invocation status or result |
3539
| `POST` | `/invocations/{invocation_id}/cancel` | No | Cancel a running invocation |
3640
| `GET` | `/invocations/docs/openapi.json` | No | Serve the agent's OpenAPI 3.x spec |
41+
| `WS` | `/invocations_ws` | No | Full-duplex WebSocket transport (`invocations_ws` protocol) |
3742

3843
### Request and response headers
3944

@@ -182,6 +187,69 @@ app = InvocationAgentServerHost(openapi_spec={
182187
})
183188
```
184189

190+
## WebSocket protocol (`invocations_ws`)
191+
192+
The same `InvocationAgentServerHost` object also exposes a WebSocket transport at `/invocations_ws`. Container authors do not install or import a second package — registering an `@app.ws_handler` is the only step. A multi-protocol agent shares one host, one session, and one process.
193+
194+
### Quick start
195+
196+
```python
197+
from azure.ai.agentserver.invocations import InvocationAgentServerHost
198+
from starlette.requests import Request
199+
from starlette.responses import JSONResponse, Response
200+
from starlette.websockets import WebSocket
201+
202+
app = InvocationAgentServerHost()
203+
204+
205+
@app.invoke_handler # POST /invocations (HTTP)
206+
async def invoke(request: Request) -> Response:
207+
payload = await request.json()
208+
return JSONResponse({"echo": payload})
209+
210+
211+
@app.ws_handler # /invocations_ws (WebSocket)
212+
async def ws(websocket: WebSocket) -> None:
213+
async for message in websocket.iter_text():
214+
await websocket.send_text(message)
215+
216+
217+
app.run()
218+
```
219+
220+
### What the SDK does for `@app.ws_handler`
221+
222+
- Registers `/invocations_ws` on the same Starlette host as `/invocations` and `/readiness`.
223+
- Calls `await websocket.accept()` before invoking your handler.
224+
- Runs WebSocket Ping/Pong keep-alive in the background — disabled by default; enable by setting the `WS_KEEPALIVE_INTERVAL` environment variable (auto-injected by AgentService into hosted-agent containers). Set the value to `0` to disable. Frames are sent at the WebSocket protocol layer (RFC 6455 opcode `0x9`/`0xA`) by the underlying Hypercorn server, which keeps the connection alive across upstream proxy / load-balancer idle timeouts without any extra application traffic.
225+
- Closes the connection cleanly on handler return (close code `1000`) or maps an uncaught handler exception to close code `1011`.
226+
- Emits a structured close-event log line carrying `azure.ai.agentserver.invocations_ws.session_id`, `azure.ai.agentserver.invocations_ws.close_code`, and `azure.ai.agentserver.invocations_ws.duration_ms`. The same fields are recorded as OpenTelemetry span attributes so the connection lifetime is visible end-to-end.
227+
- Inherits `/readiness`, OpenTelemetry export, graceful shutdown, and the `x-platform-server` identity header from `azure-ai-agentserver-core`.
228+
229+
### Per-connection tracing
230+
231+
A WebSocket connection is wrapped by the SDK in a single connection-scoped `websocket_session` OpenTelemetry span. The span carries the GenAI semantic-convention attributes plus `azure.ai.agentserver.invocations_ws.session_id`, `close_code`, and `duration_ms`. Any child spans your handler opens — e.g. via `opentelemetry.trace.get_tracer(...).start_as_current_span(...)` — are automatically parented to the connection span.
232+
233+
### Handler signature
234+
235+
The handler receives a Starlette [`WebSocket`][starlette-ws] and returns `None`. The full WebSocket API — `iter_text`, `iter_bytes`, `iter_json`, `send_text`, `send_bytes`, `send_json`, `close`, `headers`, `query_params`, `client`, `state` — is available, so application protocols on top of `invocations_ws` are entirely under your control.
236+
237+
[starlette-ws]: https://www.starlette.io/websockets/
238+
239+
### Reference: configuration
240+
241+
| Environment variable | Default | Description |
242+
|---|---|---|
243+
| `WS_KEEPALIVE_INTERVAL` | unset (disabled) | Platform-injected WebSocket Ping interval, in seconds. `0` disables keep-alive. Surfaced on `app.config.ws_ping_interval` and wired into Hypercorn's `websocket_ping_interval` by `AgentServerHost`. |
244+
245+
### Reference: close codes
246+
247+
| Close code | Meaning |
248+
|---|---|
249+
| `1000` | Handler returned cleanly (normal close). |
250+
| `1011` | Handler raised an unhandled exception (mapped by the SDK). |
251+
| `4000`-`4999` | Application-defined codes (set by the handler via `await websocket.close(code=...)` — surfaced unchanged to the client). |
252+
185253
## Troubleshooting
186254

187255
### Reporting issues
@@ -196,6 +264,8 @@ Visit the [Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/
196264
|---|---|
197265
| [simple_invoke_agent](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/agentserver/azure-ai-agentserver-invocations/samples/simple_invoke_agent/) | Minimal synchronous request-response |
198266
| [async_invoke_agent](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/agentserver/azure-ai-agentserver-invocations/samples/async_invoke_agent/) | Long-running operations with polling and cancellation |
267+
| [ws_invoke_agent](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/agentserver/azure-ai-agentserver-invocations/samples/ws_invoke_agent/) | Combined `POST /invocations` (HTTP) and `/invocations_ws` (WebSocket) host |
268+
| [ws_bidirectional_streaming_agent](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/agentserver/azure-ai-agentserver-invocations/samples/ws_bidirectional_streaming_agent/) | Full-duplex `/invocations_ws` agent: concurrent token streams + mid-flight cancel (relies on the SDK's WS protocol Ping/Pong keep-alive, not application-level heartbeats) |
199269

200270
## Contributing
201271

sdk/agentserver/azure-ai-agentserver-invocations/azure/ai/agentserver/invocations/_constants.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,25 @@ class InvocationConstants:
2121
ATTR_SPAN_SESSION_ID = "azure.ai.agentserver.invocations.session_id"
2222
ATTR_SPAN_ERROR_CODE = "azure.ai.agentserver.invocations.error.code"
2323
ATTR_SPAN_ERROR_MESSAGE = "azure.ai.agentserver.invocations.error.message"
24+
25+
26+
class InvocationsWSConstants:
27+
"""invocations_ws (WebSocket) protocol constants.
28+
29+
Route, span attribute keys, and ping/pong defaults for the
30+
WebSocket endpoint hosted alongside the HTTP invocations protocol.
31+
"""
32+
33+
# Route
34+
ROUTE_PATH = "/invocations_ws"
35+
36+
# Close codes (RFC 6455)
37+
CLOSE_NORMAL = 1000 # handler returned cleanly
38+
CLOSE_INTERNAL_ERROR = 1011 # handler raised an unhandled exception
39+
40+
# Span attribute keys
41+
ATTR_SPAN_SESSION_ID = "azure.ai.agentserver.invocations_ws.session_id"
42+
ATTR_SPAN_CLOSE_CODE = "azure.ai.agentserver.invocations_ws.close_code"
43+
ATTR_SPAN_DURATION_MS = "azure.ai.agentserver.invocations_ws.duration_ms"
44+
ATTR_SPAN_ERROR_CODE = "azure.ai.agentserver.invocations_ws.error.code"
45+
ATTR_SPAN_ERROR_MESSAGE = "azure.ai.agentserver.invocations_ws.error.message"

sdk/agentserver/azure-ai-agentserver-invocations/azure/ai/agentserver/invocations/_invocation.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
)
4040

4141
from ._constants import InvocationConstants
42+
from ._invocation_ws import _WSHandlerMixin
4243

4344
logger = logging.getLogger("azure.ai.agentserver")
4445

@@ -147,13 +148,18 @@ def _sanitize_id(value: str, fallback: str) -> str:
147148
return value
148149

149150

150-
class InvocationAgentServerHost(AgentServerHost):
151+
class InvocationAgentServerHost(_WSHandlerMixin, AgentServerHost):
151152
"""Invocation protocol host for Azure AI Hosted Agents.
152153
153154
A :class:`~azure.ai.agentserver.core.AgentServerHost` subclass that adds
154155
the invocation protocol endpoints. Use the decorator methods to wire
155156
handler functions to the endpoints.
156157
158+
The same host object also exposes the ``invocations_ws`` (WebSocket)
159+
transport at :data:`/invocations_ws` — register a handler with the
160+
:meth:`ws_handler` decorator. Multi-protocol agents share a single
161+
host, session, and process.
162+
157163
For multi-protocol agents, compose via cooperative inheritance::
158164
159165
class MyHost(InvocationAgentServerHost, ResponsesAgentServerHost):
@@ -162,13 +168,19 @@ class MyHost(InvocationAgentServerHost, ResponsesAgentServerHost):
162168
Usage::
163169
164170
from azure.ai.agentserver.invocations import InvocationAgentServerHost
171+
from starlette.websockets import WebSocket
165172
166173
app = InvocationAgentServerHost()
167174
168-
@app.invoke_handler
175+
@app.invoke_handler # POST /invocations
169176
async def handle(request):
170177
return JSONResponse({"ok": True})
171178
179+
@app.ws_handler # /invocations_ws
180+
async def ws(websocket: WebSocket) -> None:
181+
async for message in websocket.iter_text():
182+
await websocket.send_text(message)
183+
172184
app.run()
173185
174186
:param openapi_spec: Optional OpenAPI spec dict. When provided, the spec
@@ -189,8 +201,13 @@ def __init__(
189201
self._cancel_invocation_fn: Optional[Callable] = None
190202
self._openapi_spec = openapi_spec
191203

204+
# Initialise WS handler slots (no parameters — the keep-alive
205+
# interval lives on ``AgentConfig`` and is wired into Hypercorn
206+
# by ``AgentServerHost._build_hypercorn_config``).
207+
self._init_ws_state()
208+
192209
# Build invocation routes and pass to parent via routes kwarg
193-
invocation_routes = [
210+
invocation_routes: list[Any] = [
194211
Route(
195212
"/invocations/docs/openapi.json",
196213
self._get_openapi_spec_endpoint,

0 commit comments

Comments
 (0)