Skip to content

Commit 01ae50b

Browse files
Add optional client ping (#611)
* Add optional client ping * Add test * - shut down ping task group on close (was delaying shutdown) - add client side health data to `/mcp` display, deduplicate response counting - make /mcp non-blocking, tie health information - make client ping on by default * pathological cases/timeout handling --------- Co-authored-by: evalstate <1936278+evalstate@users.noreply.github.com>
1 parent 5658edb commit 01ae50b

8 files changed

Lines changed: 581 additions & 53 deletions

File tree

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,20 @@ mcp:
266266
267267
- To disable OAuth for a specific server , set `auth.oauth: false` for that server.
268268

269+
## MCP Ping (optional)
270+
271+
The MCP ping utility can be enabled by either peer (client or server). See the [Ping overview](https://modelcontextprotocol.io/specification/2025-11-25/basic/utilities/ping#overview).
272+
273+
Client-side pinging is configured per server (default: 30s interval, 3 missed pings):
274+
275+
```yaml
276+
mcp:
277+
servers:
278+
myserver:
279+
ping_interval_seconds: 30 # optional; <=0 disables
280+
max_missed_pings: 3 # optional; consecutive timeouts before marking failed
281+
```
282+
269283
## Workflows
270284

271285
### Chain

src/fast_agent/config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,12 @@ class MCPServerSettings(BaseModel):
213213
read_timeout_seconds: int | None = None
214214
"""The timeout in seconds for the session."""
215215

216+
ping_interval_seconds: int = 30
217+
"""Interval for MCP ping requests. Set <=0 to disable pinging."""
218+
219+
max_missed_pings: int = 3
220+
"""Number of consecutive missed ping responses before treating the connection as failed."""
221+
216222
http_timeout_seconds: int | None = None
217223
"""Overall HTTP timeout (seconds) for StreamableHTTP transport. Defaults to MCP SDK."""
218224

@@ -262,6 +268,16 @@ class MCPServerSettings(BaseModel):
262268

263269
implementation: Implementation | None = None
264270

271+
@field_validator("max_missed_pings", mode="before")
272+
@classmethod
273+
def _coerce_max_missed_pings(cls, value: Any) -> int:
274+
if isinstance(value, str):
275+
value = int(value.strip())
276+
value = int(value)
277+
if value <= 0:
278+
raise ValueError("max_missed_pings must be greater than zero.")
279+
return value
280+
265281
@model_validator(mode="before")
266282
@classmethod
267283
def validate_transport_inference(cls, values):

src/fast_agent/mcp/mcp_agent_client_session.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@
1818
CallToolRequestParams,
1919
CallToolResult,
2020
ClientRequest,
21+
EmptyResult,
2122
GetPromptRequest,
2223
GetPromptRequestParams,
2324
GetPromptResult,
2425
Implementation,
2526
ListRootsResult,
27+
PingRequest,
2628
ReadResourceRequest,
2729
ReadResourceRequestParams,
2830
ReadResourceResult,
@@ -224,6 +226,13 @@ async def send_request(
224226
) -> ReceiveResultT:
225227
logger.debug("send_request: request=", data=request.model_dump())
226228
request_id = getattr(self, "_request_id", None)
229+
is_ping_request = self._is_ping_request(request)
230+
if (
231+
is_ping_request
232+
and request_id is not None
233+
and self._transport_metrics is not None
234+
):
235+
self._transport_metrics.register_ping_request(request_id)
227236
try:
228237
result = await super().send_request(
229238
request=request,
@@ -237,8 +246,20 @@ async def send_request(
237246
data=result.model_dump() if result is not None else "no response returned",
238247
)
239248
self._attach_transport_channel(request_id, result)
249+
if (
250+
is_ping_request
251+
and request_id is not None
252+
and self._transport_metrics is not None
253+
):
254+
self._transport_metrics.discard_ping_request(request_id)
240255
return result
241256
except Exception as e:
257+
if (
258+
is_ping_request
259+
and request_id is not None
260+
and self._transport_metrics is not None
261+
):
262+
self._transport_metrics.discard_ping_request(request_id)
242263
from anyio import ClosedResourceError
243264

244265
from fast_agent.core.exceptions import ServerSessionTerminatedError
@@ -262,6 +283,15 @@ async def send_request(
262283
logger.error(f"send_request failed: {str(e)}")
263284
raise
264285

286+
@staticmethod
287+
def _is_ping_request(request: ClientRequest) -> bool:
288+
root = getattr(request, "root", None)
289+
method = getattr(root, "method", None)
290+
if not isinstance(method, str):
291+
return False
292+
method_lower = method.lower()
293+
return method_lower == "ping" or method_lower.endswith("/ping") or method_lower.endswith(".ping")
294+
265295
def _is_session_terminated_error(self, exc: Exception) -> bool:
266296
"""Check if exception is a session terminated error (code 32600 from 404)."""
267297
from mcp.shared.exceptions import McpError
@@ -365,6 +395,15 @@ async def call_tool(
365395
progress_callback=progress_callback,
366396
)
367397

398+
async def ping(self, read_timeout_seconds: timedelta | None = None) -> EmptyResult:
399+
"""Send a ping request to check server liveness."""
400+
request = PingRequest(method="ping")
401+
return await self.send_request(
402+
ClientRequest(request),
403+
EmptyResult,
404+
request_read_timeout_seconds=read_timeout_seconds,
405+
)
406+
368407
async def read_resource(
369408
self, uri: AnyUrl | str, _meta: dict | None = None, **kwargs
370409
) -> ReadResourceResult:

src/fast_agent/mcp/mcp_aggregator.py

Lines changed: 100 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@ class ServerStatus(BaseModel):
115115
transport_channels: TransportSnapshot | None = None
116116
skybridge: SkybridgeServerConfig | None = None
117117
reconnect_count: int = 0
118+
ping_interval_seconds: int | None = None
119+
ping_max_missed: int | None = None
120+
ping_ok_count: int | None = None
121+
ping_fail_count: int | None = None
122+
ping_consecutive_failures: int | None = None
123+
ping_last_ok_at: datetime | None = None
124+
ping_last_fail_at: datetime | None = None
125+
ping_last_error: str | None = None
126+
ping_activity_buckets: list[str] | None = None
127+
ping_activity_bucket_seconds: int | None = None
128+
ping_activity_bucket_count: int | None = None
118129

119130
model_config = ConfigDict(arbitrary_types_allowed=True)
120131

@@ -997,49 +1008,88 @@ async def collect_server_status(self) -> dict[str, ServerStatus]:
9971008
server_conn = None
9981009
transport: str | None = None
9991010
transport_snapshot: TransportSnapshot | None = None
1011+
ping_interval_seconds: int | None = None
1012+
ping_max_missed: int | None = None
1013+
ping_ok_count: int | None = None
1014+
ping_fail_count: int | None = None
1015+
ping_consecutive_failures: int | None = None
1016+
ping_last_ok_at: datetime | None = None
1017+
ping_last_fail_at: datetime | None = None
1018+
ping_last_error: str | None = None
1019+
ping_activity_buckets: list[str] | None = None
1020+
ping_activity_bucket_seconds: int | None = None
1021+
ping_activity_bucket_count: int | None = None
10001022

10011023
manager = getattr(self, "_persistent_connection_manager", None)
10021024
if self.connection_persistence and manager is not None:
10031025
try:
1004-
server_conn = await manager.get_server(
1005-
server_name,
1006-
client_session_factory=self._create_session_factory(server_name),
1007-
)
1008-
implementation = server_conn.server_implementation
1009-
if implementation is not None:
1010-
implementation_name = implementation.name
1011-
implementation_version = implementation.version
1012-
capabilities = server_conn.server_capabilities
1013-
client_capabilities = server_conn.client_capabilities
1014-
session = server_conn.session
1015-
client_info = getattr(session, "client_info", None) if session else None
1016-
if client_info:
1017-
client_info_name = getattr(client_info, "name", None)
1018-
client_info_version = getattr(client_info, "version", None)
1019-
is_connected = server_conn.is_healthy()
1020-
error_message = server_conn._error_message
1021-
instructions_available = server_conn.server_instructions_available
1022-
instructions_enabled = server_conn.server_instructions_enabled
1023-
instructions_included = bool(server_conn.server_instructions)
1024-
server_cfg = server_conn.server_config
1025-
if session:
1026-
elicitation_mode = session.effective_elicitation_mode
1027-
session_id = server_conn.session_id
1028-
if not session_id and server_conn._get_session_id_cb:
1026+
async with manager._lock:
1027+
server_conn = manager.running_servers.get(server_name)
1028+
if server_conn is None:
1029+
is_connected = False
1030+
else:
1031+
implementation = server_conn.server_implementation
1032+
if implementation is not None:
1033+
implementation_name = implementation.name
1034+
implementation_version = implementation.version
1035+
capabilities = server_conn.server_capabilities
1036+
client_capabilities = server_conn.client_capabilities
1037+
session = server_conn.session
1038+
client_info = getattr(session, "client_info", None) if session else None
1039+
if client_info:
1040+
client_info_name = getattr(client_info, "name", None)
1041+
client_info_version = getattr(client_info, "version", None)
1042+
if server_conn._initialized_event.is_set():
1043+
is_connected = server_conn.is_healthy()
1044+
else:
1045+
is_connected = False
1046+
error_message = error_message or "initializing..."
1047+
error_message = error_message or server_conn._error_message
1048+
instructions_available = server_conn.server_instructions_available
1049+
instructions_enabled = server_conn.server_instructions_enabled
1050+
instructions_included = bool(server_conn.server_instructions)
1051+
server_cfg = server_conn.server_config
1052+
ping_interval_seconds = server_cfg.ping_interval_seconds
1053+
ping_max_missed = server_cfg.max_missed_pings
1054+
ping_ok_count = server_conn._ping_ok_count
1055+
ping_fail_count = server_conn._ping_fail_count
1056+
ping_consecutive_failures = server_conn._ping_consecutive_failures
1057+
ping_last_ok_at = server_conn._ping_last_ok_at
1058+
ping_last_fail_at = server_conn._ping_last_fail_at
1059+
ping_last_error = server_conn._ping_last_error
1060+
if session:
1061+
elicitation_mode = session.effective_elicitation_mode
1062+
session_id = server_conn.session_id
1063+
if not session_id and server_conn._get_session_id_cb:
1064+
try:
1065+
session_id = server_conn._get_session_id_cb() # type: ignore[attr-defined]
1066+
except Exception:
1067+
session_id = None
1068+
metrics = server_conn.transport_metrics
1069+
if metrics is not None:
10291070
try:
1030-
session_id = server_conn._get_session_id_cb() # type: ignore[attr-defined]
1071+
transport_snapshot = metrics.snapshot()
10311072
except Exception:
1032-
session_id = None
1033-
metrics = server_conn.transport_metrics
1034-
if metrics is not None:
1035-
try:
1036-
transport_snapshot = metrics.snapshot()
1037-
except Exception:
1038-
logger.debug(
1039-
"Failed to snapshot transport metrics for server '%s'",
1040-
server_name,
1041-
exc_info=True,
1042-
)
1073+
logger.debug(
1074+
"Failed to snapshot transport metrics for server '%s'",
1075+
server_name,
1076+
exc_info=True,
1077+
)
1078+
bucket_seconds = (
1079+
transport_snapshot.activity_bucket_seconds
1080+
if transport_snapshot and transport_snapshot.activity_bucket_seconds
1081+
else 30
1082+
)
1083+
bucket_count = (
1084+
transport_snapshot.activity_bucket_count
1085+
if transport_snapshot and transport_snapshot.activity_bucket_count
1086+
else 20
1087+
)
1088+
ping_activity_buckets = server_conn.build_ping_activity_buckets(
1089+
bucket_seconds, bucket_count
1090+
)
1091+
ping_activity_bucket_seconds = bucket_seconds
1092+
ping_activity_bucket_count = bucket_count
10431093
except Exception as exc:
10441094
logger.debug(
10451095
f"Failed to collect status for server '{server_name}'",
@@ -1068,6 +1118,8 @@ async def collect_server_status(self) -> dict[str, ServerStatus]:
10681118
elicitation_mode = (
10691119
getattr(elicitation, "mode", None) if elicitation else elicitation_mode
10701120
)
1121+
ping_interval_seconds = ping_interval_seconds or server_cfg.ping_interval_seconds
1122+
ping_max_missed = ping_max_missed or server_cfg.max_missed_pings
10711123
sampling_cfg = server_cfg.sampling
10721124
spoofing_enabled = server_cfg.implementation is not None
10731125
if implementation_name is None and server_cfg.implementation is not None:
@@ -1123,6 +1175,17 @@ async def collect_server_status(self) -> dict[str, ServerStatus]:
11231175
transport_channels=transport_snapshot,
11241176
skybridge=self._skybridge_configs.get(server_name),
11251177
reconnect_count=reconnect_count,
1178+
ping_interval_seconds=ping_interval_seconds,
1179+
ping_max_missed=ping_max_missed,
1180+
ping_ok_count=ping_ok_count,
1181+
ping_fail_count=ping_fail_count,
1182+
ping_consecutive_failures=ping_consecutive_failures,
1183+
ping_last_ok_at=ping_last_ok_at,
1184+
ping_last_fail_at=ping_last_fail_at,
1185+
ping_last_error=ping_last_error,
1186+
ping_activity_buckets=ping_activity_buckets,
1187+
ping_activity_bucket_seconds=ping_activity_bucket_seconds,
1188+
ping_activity_bucket_count=ping_activity_bucket_count,
11261189
)
11271190

11281191
return status_map

0 commit comments

Comments
 (0)