Skip to content

Commit ca3f63c

Browse files
lramos15Copilot
andauthored
Cleanup auto with better telemetry + newer API shape (#4374)
* Cleanup auto with better telemetry + newer API shape * Update src/platform/endpoint/node/automodeService.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Fix tests --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 998fef0 commit ca3f63c

2 files changed

Lines changed: 39 additions & 42 deletions

File tree

src/platform/endpoint/node/automodeService.ts

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ export class AutomodeService extends Disposable implements IAutomodeService {
194194
throw new Error('No auto mode endpoints provided.');
195195
}
196196

197-
const conversationId = getConversationId(chatRequest);
197+
const conversationId = chatRequest?.sessionResource?.toString() ?? chatRequest?.sessionId ?? 'unknown';
198198
const entry = this._autoModelCache.get(conversationId);
199199

200200
// Acquire token bank: reuse from cache or take from reserve pool
@@ -212,34 +212,63 @@ export class AutomodeService extends Disposable implements IAutomodeService {
212212

213213
let selectedModel: IChatEndpoint | undefined;
214214
let lastRoutedPrompt = entry?.lastRoutedPrompt;
215+
let routerFallbackReason: string | undefined;
215216

216217
// Try router-based model selection (skip for vision requests to avoid unnecessary latency)
217-
if (!hasImage(chatRequest) && this._isRouterEnabled(chatRequest)) {
218+
if (hasImage(chatRequest)) {
219+
routerFallbackReason = 'hasImage';
220+
} else if (this._isRouterEnabled(chatRequest)) {
218221
const prompt = chatRequest?.prompt?.trim();
219222
// Only route when the prompt has changed since the last decision, to avoid
220223
// redundant calls during tool-calling iterations with the same prompt.
221-
if (prompt?.length && (!entry || entry.lastRoutedPrompt !== prompt)) {
224+
if (!prompt?.length) {
225+
routerFallbackReason = 'emptyPrompt';
226+
} else if (entry && entry.lastRoutedPrompt === prompt) {
227+
// Prompt hasn't changed since the last router decision — skip the
228+
// router call but fall through to the endpoint reuse/recreate path
229+
// so the endpoint is rebuilt if the session token has changed.
230+
// Router fallback reason isn't set here because we don't want telemetry for this case
231+
} else {
222232
try {
223233
const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, token.available_models);
224-
if (entry?.endpoint) {
234+
if (!result.candidate_models.length) {
235+
routerFallbackReason = 'emptyCandidateList';
236+
} else if (entry?.endpoint) {
225237
// Prefer a same-provider model from the router's candidate list
226238
selectedModel = this._findSameProviderModel(entry.endpoint.modelProvider, result.candidate_models, knownEndpoints);
227239
}
228-
selectedModel ??= knownEndpoints.find(e => e.model === result.chosen_model);
240+
if (!routerFallbackReason) {
241+
selectedModel ??= knownEndpoints.find(e => e.model === result.candidate_models[0]);
242+
}
229243
if (selectedModel) {
230244
lastRoutedPrompt = prompt;
231245
if (result.sticky_override) {
232-
this._logService.trace(`[AutomodeService] Sticky routing override: confidence=${(result.confidence * 100).toFixed(1)}%, label=${result.predicted_label}, router_model=${result.chosen_model}, actual_model=${selectedModel.model}`);
246+
this._logService.trace(`[AutomodeService] Sticky routing override: confidence=${(result.confidence * 100).toFixed(1)}%, label=${result.predicted_label}, router_model=${result.candidate_models[0]}, actual_model=${selectedModel.model}`);
233247
}
248+
} else {
249+
routerFallbackReason = 'noMatchingEndpoint';
234250
}
235251
} catch (e) {
236252
this._logService.error(`Failed to get routed model for conversation ${conversationId}:`, (e as Error).message);
253+
routerFallbackReason = 'routerError';
237254
}
238255
}
239256
}
240257

241258
// Default model selection when router was skipped or failed
242259
if (!selectedModel) {
260+
if (routerFallbackReason) {
261+
/* __GDPR__
262+
"automode.routerFallback" : {
263+
"owner": "lramos15",
264+
"comment": "Reports when the auto mode router is skipped or fails and falls back to default model selection",
265+
"reason": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "The reason the router was skipped or failed (hasImage, noMatchingEndpoint, routerError)" }
266+
}
267+
*/
268+
this._telemetryService.sendMSFTTelemetryEvent('automode.routerFallback', {
269+
reason: routerFallbackReason,
270+
});
271+
}
243272
// Pick a model: prefer same provider when refreshing, otherwise first available
244273
if (entry?.endpoint) {
245274
selectedModel = this._findSameProviderModel(entry.endpoint.modelProvider, token.available_models, knownEndpoints);
@@ -336,18 +365,6 @@ export class AutomodeService extends Disposable implements IAutomodeService {
336365
}
337366
}
338367

339-
/**
340-
* Get the conversation ID from the chat request. This is representative of a single chat thread
341-
* @param chatRequest The chat request object.
342-
* @returns The conversation ID or 'unknown' if not available.
343-
*/
344-
function getConversationId(chatRequest: ChatRequest | undefined): string {
345-
if (!chatRequest) {
346-
return 'unknown';
347-
}
348-
return chatRequest?.sessionId || 'unknown';
349-
}
350-
351368
function hasImage(chatRequest: ChatRequest | undefined): boolean {
352369
if (!chatRequest || !chatRequest.references) {
353370
return false;

src/platform/endpoint/node/routerDecisionFetcher.ts

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import { RequestType } from '@vscode/copilot-api';
77
import { Codicon } from '../../../util/vs/base/common/codicons';
88
import { IAuthenticationService } from '../../authentication/common/authentication';
9-
import { IValidator, vArray, vBoolean, vEnum, vNumber, vObj, vRequired, vString } from '../../configuration/common/validator';
109
import { ILogService } from '../../log/common/logService';
1110
import { Response } from '../../networking/common/fetcherService';
1211
import { IRequestLogger, LoggedRequestKind } from '../../requestLogger/node/requestLogger';
@@ -17,7 +16,6 @@ export interface RouterDecisionResponse {
1716
predicted_label: 'needs_reasoning' | 'no_reasoning';
1817
confidence: number;
1918
latency_ms: number;
20-
chosen_model: string;
2119
candidate_models: string[];
2220
scores: {
2321
needs_reasoning: number;
@@ -26,18 +24,6 @@ export interface RouterDecisionResponse {
2624
sticky_override?: boolean;
2725
}
2826

29-
const routerDecisionResponseValidator: IValidator<RouterDecisionResponse> = vObj({
30-
predicted_label: vRequired(vEnum('needs_reasoning', 'no_reasoning')),
31-
confidence: vRequired(vNumber()),
32-
latency_ms: vRequired(vNumber()),
33-
chosen_model: vRequired(vString()),
34-
candidate_models: vRequired(vArray(vString())),
35-
scores: vRequired(vObj({
36-
needs_reasoning: vRequired(vNumber()),
37-
no_reasoning: vRequired(vNumber())
38-
})),
39-
sticky_override: vBoolean()
40-
});
4127

4228
/**
4329
* Fetches routing decisions from a classification API to determine which model should handle a query.
@@ -75,12 +61,9 @@ export class RouterDecisionFetcher {
7561
}
7662

7763
const text = await response.text();
78-
const { content: result, error: validationError } = routerDecisionResponseValidator.validate(JSON.parse(text));
79-
if (validationError) {
80-
throw new Error(`Invalid router decision response: ${validationError.message}`);
81-
}
64+
const result: RouterDecisionResponse = JSON.parse(text);
8265
const e2eLatencyMs = Date.now() - startTime;
83-
this._logService.trace(`[RouterDecisionFetcher] Prediction: ${result.predicted_label}, model: ${result.chosen_model} (confidence: ${(result.confidence * 100).toFixed(1)}%, scores: needs_reasoning=${(result.scores.needs_reasoning * 100).toFixed(1)}%, no_reasoning=${(result.scores.no_reasoning * 100).toFixed(1)}%) (latency_ms: ${result.latency_ms}, e2e_latency_ms: ${e2eLatencyMs}, candidate models: ${result.candidate_models.join(', ')}, sticky_override: ${result.sticky_override ?? false})`);
66+
this._logService.trace(`[RouterDecisionFetcher] Prediction: ${result.predicted_label}, (confidence: ${(result.confidence * 100).toFixed(1)}%, scores: needs_reasoning=${(result.scores.needs_reasoning * 100).toFixed(1)}%, no_reasoning=${(result.scores.no_reasoning * 100).toFixed(1)}%) (latency_ms: ${result.latency_ms}, e2e_latency_ms: ${e2eLatencyMs}, candidate models: ${result.candidate_models.join(', ')}, sticky_override: ${result.sticky_override ?? false})`);
8467

8568
this._requestLogger.addEntry({
8669
type: LoggedRequestKind.MarkdownContentRequest,
@@ -91,10 +74,9 @@ export class RouterDecisionFetcher {
9174
`# Auto Mode Router Decision`,
9275
`## Result`,
9376
`- **Predicted Label**: ${result.predicted_label}`,
94-
`- **Chosen Model**: ${result.chosen_model}`,
9577
`- **Confidence**: ${(result.confidence * 100).toFixed(1)}%`,
96-
`- **Sticky Override**: ${result.sticky_override ?? false}`,
97-
`## Scores`,
78+
`- **Sticky Override**: ${result.sticky_override ?? false}`,
79+
`## Scores`,
9880
`- **Needs Reasoning**: ${(result.scores.needs_reasoning * 100).toFixed(1)}%`,
9981
`- **No Reasoning**: ${(result.scores.no_reasoning * 100).toFixed(1)}%`,
10082
`## Latency`,
@@ -112,7 +94,6 @@ export class RouterDecisionFetcher {
11294
"owner": "lramos15",
11395
"comment": "Reports the routing decision made by the auto mode router API",
11496
"predictedLabel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The predicted classification label (needs_reasoning or no_reasoning)" },
115-
"chosenModel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model selected by the router" },
11697
"confidence": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The confidence score of the routing decision" },
11798
"latencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "The latency of the router API call in milliseconds" },
11899
"e2eLatencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "The end-to-end latency of the router request in milliseconds, including network overhead" }
@@ -121,7 +102,6 @@ export class RouterDecisionFetcher {
121102
this._telemetryService.sendMSFTTelemetryEvent('automode.routerDecision',
122103
{
123104
predictedLabel: result.predicted_label,
124-
chosenModel: result.chosen_model,
125105
},
126106
{
127107
confidence: result.confidence,

0 commit comments

Comments
 (0)