Skip to content

Commit c4333ce

Browse files
committed
fix(voice): read rootSpanContext live in AudioRecognition
AudioRecognition was snapshotting rootSpanContext at construction time, so any mutation of session.rootSpanContext after _startSession (e.g. in an Agent.onEnter hook) was invisible to user_turn spans. Store a live reference to the AgentSession instead, and read rootSpanContext fresh on each span creation. Fixes #924
1 parent 389a050 commit c4333ce

2 files changed

Lines changed: 8 additions & 8 deletions

File tree

agents/src/voice/agent_activity.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ export class AgentActivity implements RecognitionHooks {
483483
maxEndpointingDelay:
484484
this.agent.turnHandling?.endpointing?.maxDelay ??
485485
this.agentSession.sessionOptions.turnHandling.endpointing.maxDelay,
486-
rootSpanContext: this.agentSession.rootSpanContext,
486+
agentSession: this.agentSession,
487487
sttModel: this.stt?.label,
488488
sttProvider: this.getSttProvider(),
489489
getLinkedParticipant: () => this.agentSession._roomIO?.linkedParticipant,

agents/src/voice/audio_recognition.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
3434
import { traceTypes, tracer } from '../telemetry/index.js';
3535
import { Task, cancelAndWait, delay, readStream, waitForAbort } from '../utils.js';
3636
import { type VAD, type VADEvent, VADEventType } from '../vad.js';
37-
import type { TurnDetectionMode } from './agent_session.js';
37+
import type { AgentSession, TurnDetectionMode } from './agent_session.js';
3838
import type { STTNode } from './io.js';
3939
import { setParticipantSpanAttributes } from './utils.js';
4040

@@ -142,8 +142,8 @@ export interface AudioRecognitionOptions {
142142
minEndpointingDelay: number;
143143
/** Maximum endpointing delay in milliseconds. */
144144
maxEndpointingDelay: number;
145-
/** Root span context for tracing. */
146-
rootSpanContext?: Context;
145+
/** Live reference to AgentSession — used to read rootSpanContext fresh on each span creation. */
146+
agentSession?: AgentSession;
147147
/** STT model name for tracing */
148148
sttModel?: string;
149149
/** STT provider name for tracing */
@@ -173,7 +173,7 @@ export class AudioRecognition {
173173
private minEndpointingDelay: number;
174174
private maxEndpointingDelay: number;
175175
private lastLanguage?: LanguageCode;
176-
private rootSpanContext?: Context;
176+
private agentSession?: AgentSession;
177177
private sttModel?: string;
178178
private sttProvider?: string;
179179
private getLinkedParticipant?: () => ParticipantLike | undefined;
@@ -227,7 +227,7 @@ export class AudioRecognition {
227227
this.minEndpointingDelay = opts.minEndpointingDelay;
228228
this.maxEndpointingDelay = opts.maxEndpointingDelay;
229229
this.lastLanguage = undefined;
230-
this.rootSpanContext = opts.rootSpanContext;
230+
this.agentSession = opts.agentSession;
231231
this.sttModel = opts.sttModel;
232232
this.sttProvider = opts.sttProvider;
233233
this.getLinkedParticipant = opts.getLinkedParticipant;
@@ -510,7 +510,7 @@ export class AudioRecognition {
510510

511511
this.userTurnSpan = tracer.startSpan({
512512
name: 'user_turn',
513-
context: this.rootSpanContext,
513+
context: this.agentSession?.rootSpanContext,
514514
startTime,
515515
});
516516

@@ -530,7 +530,7 @@ export class AudioRecognition {
530530
}
531531

532532
private userTurnContext(span: Span): Context {
533-
const base = this.rootSpanContext ?? ROOT_CONTEXT;
533+
const base = this.agentSession?.rootSpanContext ?? ROOT_CONTEXT;
534534
return trace.setSpan(base, span);
535535
}
536536

0 commit comments

Comments
 (0)