Skip to content

Commit cef5444

Browse files
authored
Report MCP error codes with server attribution (#29969)
## Why MCP error-code telemetry special-cased Codex Apps: its reported error codes were retained, while codes from every other MCP server were replaced with `unknown`. Error reporting should behave consistently for every MCP server. The server name already identifies where an error came from, so telemetry does not need a separate Codex Apps classification. This follows up on [#28976](#28976), which introduced MCP error-code telemetry. ## What changed - Add the MCP server name to call, duration, and error metrics. - Retain bounded, sanitized tool error codes from every MCP server. - Remove `McpErrorCodeSource` and the Codex Apps ownership lookup from telemetry collection. - Use the same metric-tagging path for blocked, rejected, and executed MCP calls. ## Test plan - Verify the complete metric tag set includes the sanitized MCP server name. - Verify error codes from ordinary MCP servers are retained, bounded, and sanitized. - Preserve coverage for request failures, tool-result failures, nested auth failures, and span attributes.
1 parent a747713 commit cef5444

4 files changed

Lines changed: 101 additions & 106 deletions

File tree

codex-rs/core/src/mcp_tool_call.rs

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,7 @@ use url::Url;
9292

9393
mod telemetry;
9494

95-
use telemetry::MCP_CALL_COUNT_METRIC;
9695
use telemetry::McpCallMetricOutcome;
97-
use telemetry::McpErrorCodeSource;
9896
use telemetry::emit_mcp_call_metrics;
9997
use telemetry::mcp_call_metric_outcome;
10098
use telemetry::record_mcp_call_outcome_span_telemetry;
@@ -182,6 +180,13 @@ pub(crate) async fn handle_mcp_tool_call(
182180
.await
183181
};
184182

183+
let connector_id = metadata
184+
.as_ref()
185+
.and_then(|metadata| metadata.connector_id.clone());
186+
let connector_name = metadata
187+
.as_ref()
188+
.and_then(|metadata| metadata.connector_name.clone());
189+
185190
if server == CODEX_APPS_MCP_SERVER_NAME && !app_tool_policy.enabled {
186191
let result = notify_mcp_tool_call_skip(
187192
sess.as_ref(),
@@ -194,24 +199,22 @@ pub(crate) async fn handle_mcp_tool_call(
194199
)
195200
.await;
196201
let status = if result.is_ok() { "ok" } else { "error" };
197-
turn_context.session_telemetry.counter(
198-
MCP_CALL_COUNT_METRIC,
199-
/*inc*/ 1,
200-
&[("status", status)],
202+
let outcome = McpCallMetricOutcome::from_status(status);
203+
emit_mcp_call_metrics(
204+
turn_context.as_ref(),
205+
&outcome,
206+
&server,
207+
&tool_name,
208+
connector_id.as_deref(),
209+
connector_name.as_deref(),
210+
/*duration*/ None,
201211
);
202212
return HandledMcpToolCall {
203213
result: CallToolResult::from_result(result),
204214
tool_input: arguments_value
205215
.unwrap_or_else(|| JsonValue::Object(serde_json::Map::new())),
206216
};
207217
}
208-
let connector_id = metadata
209-
.as_ref()
210-
.and_then(|metadata| metadata.connector_id.clone());
211-
let connector_name = metadata
212-
.as_ref()
213-
.and_then(|metadata| metadata.connector_name.clone());
214-
215218
notify_mcp_tool_call_started(
216219
sess.as_ref(),
217220
turn_context.as_ref(),
@@ -279,6 +282,7 @@ pub(crate) async fn handle_mcp_tool_call(
279282
emit_mcp_call_metrics(
280283
turn_context.as_ref(),
281284
&outcome,
285+
&server,
282286
&tool_name,
283287
connector_id.as_deref(),
284288
connector_name.as_deref(),
@@ -348,17 +352,11 @@ async fn handle_approved_mcp_tool_call(
348352
let arguments_value = invocation.arguments.clone();
349353
let connector_id = metadata.and_then(|metadata| metadata.connector_id.as_deref());
350354
let connector_name = metadata.and_then(|metadata| metadata.connector_name.as_deref());
351-
let (server_origin, error_code_source) = {
355+
let server_origin = {
352356
let mcp_connection_manager = sess.services.mcp_connection_manager.load_full();
353-
let server_origin = mcp_connection_manager
357+
mcp_connection_manager
354358
.server_origin(&server)
355-
.map(str::to_string);
356-
let error_code_source = if mcp_connection_manager.is_host_owned_codex_apps_server(&server) {
357-
McpErrorCodeSource::HostedPluginService
358-
} else {
359-
McpErrorCodeSource::Untrusted
360-
};
361-
(server_origin, error_code_source)
359+
.map(str::to_string)
362360
};
363361

364362
let start = Instant::now();
@@ -392,7 +390,7 @@ async fn handle_approved_mcp_tool_call(
392390
.await
393391
}
394392
.await;
395-
record_mcp_result_span_telemetry(&Span::current(), &result, error_code_source);
393+
record_mcp_result_span_telemetry(&Span::current(), &result);
396394
result
397395
}
398396
.instrument(mcp_tool_call_span(
@@ -424,10 +422,11 @@ async fn handle_approved_mcp_tool_call(
424422
.await;
425423
maybe_track_codex_app_used(sess, turn_context, &server, &tool_name).await;
426424

427-
let outcome = mcp_call_metric_outcome(&result, error_code_source);
425+
let outcome = mcp_call_metric_outcome(&result);
428426
emit_mcp_call_metrics(
429427
turn_context,
430428
&outcome,
429+
&server,
431430
&tool_name,
432431
connector_id,
433432
connector_name,
@@ -501,12 +500,8 @@ fn record_server_fields(span: &Span, url: Option<&str>) {
501500
}
502501
}
503502

504-
fn record_mcp_result_span_telemetry(
505-
span: &Span,
506-
result: &Result<CallToolResult, String>,
507-
error_code_source: McpErrorCodeSource,
508-
) {
509-
record_mcp_call_outcome_span_telemetry(span, result, error_code_source);
503+
fn record_mcp_result_span_telemetry(span: &Span, result: &Result<CallToolResult, String>) {
504+
record_mcp_call_outcome_span_telemetry(span, result);
510505

511506
let Some(span_telemetry) = result
512507
.as_ref()

codex-rs/core/src/mcp_tool_call/telemetry.rs

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use codex_protocol::mcp::CallToolResult;
77
use serde_json::Value as JsonValue;
88
use tracing::Span;
99

10-
pub(super) const MCP_CALL_COUNT_METRIC: &str = "codex.mcp.call";
10+
const MCP_CALL_COUNT_METRIC: &str = "codex.mcp.call";
1111
const MCP_CALL_DURATION_METRIC: &str = "codex.mcp.call.duration_ms";
1212
const MCP_CALL_ERROR_COUNT_METRIC: &str = "codex.mcp.call.error";
1313
// No CallToolResult was received. This includes request setup, transport, timeout, protocol, and
@@ -37,21 +37,22 @@ impl McpCallMetricOutcome {
3737
}
3838
}
3939

40-
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
41-
pub(super) enum McpErrorCodeSource {
42-
HostedPluginService,
43-
Untrusted,
44-
}
45-
4640
pub(super) fn emit_mcp_call_metrics(
4741
turn_context: &TurnContext,
4842
outcome: &McpCallMetricOutcome,
43+
server_name: &str,
4944
tool_name: &str,
5045
connector_id: Option<&str>,
5146
connector_name: Option<&str>,
5247
duration: Option<Duration>,
5348
) {
54-
let tags = mcp_call_metric_tags(outcome.status, tool_name, connector_id, connector_name);
49+
let tags = mcp_call_metric_tags(
50+
outcome.status,
51+
server_name,
52+
tool_name,
53+
connector_id,
54+
connector_name,
55+
);
5556
let tag_refs: Vec<(&str, &str)> = tags
5657
.iter()
5758
.map(|(key, value)| (*key, value.as_str()))
@@ -87,12 +88,14 @@ pub(super) fn emit_mcp_call_metrics(
8788

8889
fn mcp_call_metric_tags(
8990
status: &str,
91+
server_name: &str,
9092
tool_name: &str,
9193
connector_id: Option<&str>,
9294
connector_name: Option<&str>,
9395
) -> Vec<(&'static str, String)> {
9496
let mut tags = vec![
9597
("status", sanitize_metric_tag_value(status)),
98+
("server", sanitize_metric_tag_value(server_name)),
9699
("tool", sanitize_metric_tag_value(tool_name)),
97100
];
98101
if let Some(connector_id) = connector_id.filter(|connector_id| !connector_id.is_empty()) {
@@ -107,40 +110,35 @@ fn mcp_call_metric_tags(
107110

108111
pub(super) fn mcp_call_metric_outcome(
109112
result: &Result<CallToolResult, String>,
110-
error_code_source: McpErrorCodeSource,
111113
) -> McpCallMetricOutcome {
112114
match result {
113115
Ok(result) if result.is_error.unwrap_or(false) => {
114-
let error_code = if error_code_source == McpErrorCodeSource::HostedPluginService {
115-
result
116-
.structured_content
117-
.as_ref()
118-
.and_then(JsonValue::as_object)
119-
.and_then(|structured_content| structured_content.get("error_code"))
120-
.and_then(JsonValue::as_str)
121-
.filter(|error_code| !error_code.is_empty())
122-
.or_else(|| {
123-
result
124-
.meta
125-
.as_ref()
126-
.and_then(JsonValue::as_object)
127-
.and_then(|meta| meta.get(MCP_TOOL_CODEX_APPS_META_KEY))
128-
.and_then(JsonValue::as_object)
129-
.and_then(|codex_apps| codex_apps.get("connector_auth_failure"))
130-
.and_then(JsonValue::as_object)
131-
.filter(|auth_failure| {
132-
auth_failure
133-
.get("is_auth_failure")
134-
.and_then(JsonValue::as_bool)
135-
== Some(true)
136-
})
137-
.and_then(|auth_failure| auth_failure.get("error_code"))
138-
.and_then(JsonValue::as_str)
139-
.filter(|error_code| !error_code.is_empty())
140-
})
141-
} else {
142-
None
143-
};
116+
let error_code = result
117+
.structured_content
118+
.as_ref()
119+
.and_then(JsonValue::as_object)
120+
.and_then(|structured_content| structured_content.get("error_code"))
121+
.and_then(JsonValue::as_str)
122+
.filter(|error_code| !error_code.is_empty())
123+
.or_else(|| {
124+
result
125+
.meta
126+
.as_ref()
127+
.and_then(JsonValue::as_object)
128+
.and_then(|meta| meta.get(MCP_TOOL_CODEX_APPS_META_KEY))
129+
.and_then(JsonValue::as_object)
130+
.and_then(|codex_apps| codex_apps.get("connector_auth_failure"))
131+
.and_then(JsonValue::as_object)
132+
.filter(|auth_failure| {
133+
auth_failure
134+
.get("is_auth_failure")
135+
.and_then(JsonValue::as_bool)
136+
== Some(true)
137+
})
138+
.and_then(|auth_failure| auth_failure.get("error_code"))
139+
.and_then(JsonValue::as_str)
140+
.filter(|error_code| !error_code.is_empty())
141+
});
144142
let error_code: String = error_code
145143
.unwrap_or(MCP_CALL_ERROR_CODE_UNKNOWN)
146144
.chars()
@@ -164,9 +162,8 @@ pub(super) fn mcp_call_metric_outcome(
164162
pub(super) fn record_mcp_call_outcome_span_telemetry(
165163
span: &Span,
166164
result: &Result<CallToolResult, String>,
167-
error_code_source: McpErrorCodeSource,
168165
) {
169-
let outcome = mcp_call_metric_outcome(result, error_code_source);
166+
let outcome = mcp_call_metric_outcome(result);
170167
let (Some(error_type), Some(error_code)) = (outcome.error_type, outcome.error_code) else {
171168
return;
172169
};

codex-rs/core/src/mcp_tool_call/telemetry_tests.rs

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,39 +14,50 @@ fn metric_call_tool_result(
1414
}
1515

1616
#[test]
17-
fn mcp_call_metric_outcome_distinguishes_request_and_tool_errors() {
17+
fn mcp_call_metric_tags_include_server_name() {
1818
assert_eq!(
19-
mcp_call_metric_outcome(
20-
&Ok(metric_call_tool_result(
21-
/*is_error*/ false, /*structured_content*/ None,
22-
)),
23-
McpErrorCodeSource::HostedPluginService,
19+
mcp_call_metric_tags(
20+
"error",
21+
"docs server",
22+
"search docs",
23+
Some("connector/docs"),
24+
Some("Docs connector"),
2425
),
26+
vec![
27+
("status", "error".to_string()),
28+
("server", "docs_server".to_string()),
29+
("tool", "search_docs".to_string()),
30+
("connector_id", "connector/docs".to_string()),
31+
("connector_name", "Docs_connector".to_string()),
32+
],
33+
);
34+
}
35+
36+
#[test]
37+
fn mcp_call_metric_outcome_distinguishes_request_and_tool_errors() {
38+
assert_eq!(
39+
mcp_call_metric_outcome(&Ok(metric_call_tool_result(
40+
/*is_error*/ false, /*structured_content*/ None,
41+
)),),
2542
McpCallMetricOutcome {
2643
status: "ok",
2744
error_type: None,
2845
error_code: None,
2946
}
3047
);
3148
assert_eq!(
32-
mcp_call_metric_outcome(
33-
&Ok(metric_call_tool_result(
34-
/*is_error*/ true,
35-
Some(serde_json::json!({"error_code": "RATE_LIMITED"})),
36-
)),
37-
McpErrorCodeSource::HostedPluginService,
38-
),
49+
mcp_call_metric_outcome(&Ok(metric_call_tool_result(
50+
/*is_error*/ true,
51+
Some(serde_json::json!({"error_code": "RATE_LIMITED"})),
52+
)),),
3953
McpCallMetricOutcome {
4054
status: "error",
4155
error_type: Some(MCP_CALL_ERROR_TYPE_TOOL_RESULT),
4256
error_code: Some("RATE_LIMITED".to_string()),
4357
}
4458
);
4559
assert_eq!(
46-
mcp_call_metric_outcome(
47-
&Err("connection closed".to_string()),
48-
McpErrorCodeSource::HostedPluginService,
49-
),
60+
mcp_call_metric_outcome(&Err("connection closed".to_string())),
5061
McpCallMetricOutcome {
5162
status: "error",
5263
error_type: Some(MCP_CALL_ERROR_TYPE_MCP_REQUEST),
@@ -56,24 +67,24 @@ fn mcp_call_metric_outcome_distinguishes_request_and_tool_errors() {
5667
}
5768

5869
#[test]
59-
fn mcp_call_metric_outcome_ignores_untrusted_tool_error_codes() {
70+
fn mcp_call_metric_outcome_reports_server_tool_error_codes() {
6071
let result = Ok(metric_call_tool_result(
6172
/*is_error*/ true,
6273
Some(serde_json::json!({"error_code": "arbitrary-user-value"})),
6374
));
6475

6576
assert_eq!(
66-
mcp_call_metric_outcome(&result, McpErrorCodeSource::Untrusted),
77+
mcp_call_metric_outcome(&result),
6778
McpCallMetricOutcome {
6879
status: "error",
6980
error_type: Some(MCP_CALL_ERROR_TYPE_TOOL_RESULT),
70-
error_code: Some(MCP_CALL_ERROR_CODE_UNKNOWN.to_string()),
81+
error_code: Some("arbitrary-user-value".to_string()),
7182
}
7283
);
7384
}
7485

7586
#[test]
76-
fn mcp_call_metric_outcome_reads_hosted_auth_error_code_from_meta() {
87+
fn mcp_call_metric_outcome_reads_auth_error_code_from_meta() {
7788
let result = CallToolResult {
7889
content: Vec::new(),
7990
structured_content: None,
@@ -89,7 +100,7 @@ fn mcp_call_metric_outcome_reads_hosted_auth_error_code_from_meta() {
89100
};
90101

91102
assert_eq!(
92-
mcp_call_metric_outcome(&Ok(result), McpErrorCodeSource::HostedPluginService),
103+
mcp_call_metric_outcome(&Ok(result)),
93104
McpCallMetricOutcome {
94105
status: "error",
95106
error_type: Some(MCP_CALL_ERROR_TYPE_TOOL_RESULT),
@@ -99,15 +110,15 @@ fn mcp_call_metric_outcome_reads_hosted_auth_error_code_from_meta() {
99110
}
100111

101112
#[test]
102-
fn mcp_call_metric_outcome_bounds_and_sanitizes_hosted_error_code() {
113+
fn mcp_call_metric_outcome_bounds_and_sanitizes_error_code() {
103114
let raw_error_code = format!("BAD CODE {}", "x".repeat(300));
104115
let result = Ok(metric_call_tool_result(
105116
/*is_error*/ true,
106117
Some(serde_json::json!({"error_code": raw_error_code})),
107118
));
108119

109120
assert_eq!(
110-
mcp_call_metric_outcome(&result, McpErrorCodeSource::HostedPluginService),
121+
mcp_call_metric_outcome(&result),
111122
McpCallMetricOutcome {
112123
status: "error",
113124
error_type: Some(MCP_CALL_ERROR_TYPE_TOOL_RESULT),

0 commit comments

Comments
 (0)