diff --git a/.vscode/cspell.json b/.vscode/cspell.json index 8556080bd667..5568bdb1d2d1 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -1473,6 +1473,8 @@ "FILLER", "foundry", "FOUNDRY", + "genai", + "GENAI", "Unpooled", "viseme", "VISEME", diff --git a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md index 5969b2be656e..beac01f1e3a5 100644 --- a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md +++ b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md @@ -4,6 +4,17 @@ ### Features Added +- Added built-in OpenTelemetry tracing support for voice sessions following GenAI Semantic Conventions: + - `VoiceLiveClientBuilder.openTelemetry(OpenTelemetry)` method for providing a custom OpenTelemetry instance + - Defaults to `GlobalOpenTelemetry.getOrNoop()` for automatic Java agent detection with zero-cost no-op fallback + - Emits spans for `connect`, `send`, `recv`, and `close` operations with Python-aligned VoiceLive telemetry semantics + - Session-level counters: turn count, interruption count, audio bytes sent/received, first token latency, MCP call/list-tools counts + - Tracks response and item hierarchy IDs (`response_id`, `conversation_id`, `item_id`, `call_id`, `previous_item_id`, `output_index`) on send/recv spans + - Captures agent/session config attributes on connect spans (`gen_ai.agent.*`, `gen_ai.system_instructions`, `gen_ai.request.*`) + - Adds OpenTelemetry metrics (`gen_ai.client.operation.duration`, `gen_ai.client.token.usage`) with provider/server/model dimensions + - Content recording controlled via `enableContentRecording(boolean)` or `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` (with legacy `AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED` fallback) +- Added `TelemetrySample.java` demonstrating OpenTelemetry integration patterns + ### Breaking Changes ### Bugs Fixed diff --git a/sdk/voicelive/azure-ai-voicelive/README.md b/sdk/voicelive/azure-ai-voicelive/README.md index a57e3e5cb387..5afc3663a015 100644 --- a/sdk/voicelive/azure-ai-voicelive/README.md +++ b/sdk/voicelive/azure-ai-voicelive/README.md @@ -126,6 +126,9 @@ The following sections provide code snippets for common scenarios: * [Handle event types](#handle-event-types) * [Voice configuration](#voice-configuration) * [Function calling](#function-calling) +* [MCP tool integration](#mcp-tool-integration) +* [Azure AI Foundry agent session](#azure-ai-foundry-agent-session) +* [Telemetry and tracing](#telemetry-and-tracing) * [Complete voice assistant with microphone](#complete-voice-assistant-with-microphone) ### Focused Sample Files @@ -166,6 +169,29 @@ For easier learning, explore these focused samples in order: - Execute functions locally and return results - Continue conversation with function results +7. **telemetry/ExplicitTracingSample.java** - Explicit OpenTelemetry tracing + - Explicit OpenTelemetry instance via builder + - Content recording with `--enable-recording` flag + - Custom console span exporter + - Azure Monitor integration example + +8. **telemetry/GlobalTracingSample.java** - Automatic tracing via GlobalOpenTelemetry + - Zero builder configuration — uses `buildAndRegisterGlobal()` + - Same span output as explicit tracing + +9. **MCPSample.java** - Model Context Protocol (MCP) tool integration + - Configure MCP servers for external tool access + - Handle MCP call events and tool execution + - Handle MCP approval requests for tool calls + - Process MCP call results and continue conversations + +10. **AgentV2Sample.java** - Azure AI Foundry agent session + - Connect directly to an Azure AI Foundry agent via AgentSessionConfig + - Real-time audio capture and playback + - Sequence number based audio for interrupt handling + - Azure noise suppression and echo cancellation + - Conversation logging to file + > **Note:** To run audio samples (AudioPlaybackSample, MicrophoneInputSample, VoiceAssistantSample, FunctionCallingSample): > ```bash > mvn exec:java -Dexec.mainClass=com.azure.ai.voicelive.FunctionCallingSample -Dexec.classpathScope=test @@ -397,6 +423,171 @@ client.startSession("gpt-4o-realtime-preview") * Results are sent back to continue the conversation * See `FunctionCallingSample.java` for a complete working example +### MCP tool integration + +Use [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) servers to give the AI access to external tools during a voice session. The service calls the MCP server directly — your code only needs to handle approval requests when required: + +```java com.azure.ai.voicelive.mcp +// Configure MCP servers as tools +MCPServer mcpServer = new MCPServer("deepwiki", "https://mcp.deepwiki.com/mcp") + .setRequireApproval(BinaryData.fromObject(MCPApprovalType.ALWAYS)); + +VoiceLiveSessionOptions options = new VoiceLiveSessionOptions() + .setTools(Arrays.asList(mcpServer)) + .setInstructions("You have access to external tools via MCP. Use them when asked."); + +// Handle MCP approval requests in your event loop +session.receiveEvents().subscribe(event -> { + if (event instanceof SessionUpdateResponseOutputItemDone) { + SessionUpdateResponseOutputItemDone itemDone = (SessionUpdateResponseOutputItemDone) event; + SessionResponseItem item = itemDone.getItem(); + + if (item instanceof ResponseMCPApprovalRequestItem) { + // Approve the tool call + ResponseMCPApprovalRequestItem approvalRequest = (ResponseMCPApprovalRequestItem) item; + MCPApprovalResponseRequestItem approval = new MCPApprovalResponseRequestItem( + approvalRequest.getId(), true); + ClientEventConversationItemCreate createItem = new ClientEventConversationItemCreate() + .setItem(approval); + session.sendEvent(createItem).subscribe(); + session.sendEvent(new ClientEventResponseCreate()).subscribe(); + } + } +}); +``` + +> See `MCPSample.java` for a complete working example with MCP server configuration. + +### Azure AI Foundry agent session + +Connect directly to an Azure AI Foundry agent using `AgentSessionConfig`. The agent becomes the primary responder for the voice session: + +```java com.azure.ai.voicelive.agentsession +// Configure agent connection +AgentSessionConfig agentConfig = new AgentSessionConfig("my-agent", "my-project") + .setAgentVersion("1.0"); + +// Start session with agent config (uses DefaultAzureCredential) +VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new DefaultAzureCredentialBuilder().build()) + .buildAsyncClient(); + +client.startSession(agentConfig) + .flatMap(session -> { + session.receiveEvents().subscribe(event -> handleEvent(event)); + return Mono.just(session); + }) + .block(); +``` + +> See `AgentV2Sample.java` for a full implementation with audio capture, playback, and conversation logging. + +### Telemetry and tracing + +The SDK has built-in [OpenTelemetry](https://opentelemetry.io/) tracing that emits spans for every WebSocket operation. When no OpenTelemetry SDK is present, all tracing calls are automatically no-op with zero performance impact. + +#### Automatic tracing (recommended) + +When no `.openTelemetry()` is called on the builder, the SDK defaults to `GlobalOpenTelemetry.getOrNoop()` — +tracing is automatically active when a global OpenTelemetry instance exists (e.g., via the +[OpenTelemetry Java agent](https://opentelemetry.io/docs/languages/java/automatic/) or +`OpenTelemetrySdk.builder().buildAndRegisterGlobal()`), and is a zero-cost no-op otherwise: + +```java com.azure.ai.voicelive.tracing.automatic +// No special configuration needed — tracing is picked up from GlobalOpenTelemetry +VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .buildAsyncClient(); +``` + +#### Explicit OpenTelemetry instance + +Pass your own `OpenTelemetry` instance directly to the builder for full control. This is useful +when you want different clients to use different tracer configurations: + +```java com.azure.ai.voicelive.tracing.explicit +VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .openTelemetry(otel) + .buildAsyncClient(); +``` + +#### Span structure + +When tracing is active, the following span hierarchy is emitted for each voice session: + +``` +connect gpt-4o-realtime-preview ← session lifetime span +├── send session.update ← one span per sent event +├── send input_audio_buffer.append +├── send response.create +├── recv session.created ← one span per received event +├── recv session.updated +├── recv response.audio.delta +├── recv response.done ← includes token usage attributes +├── recv rate_limits.updated ← rate limit info +└── close +``` + +**Common attributes** (on all spans): `gen_ai.system`, `gen_ai.operation.name`, `gen_ai.provider.name`, `gen_ai.request.model`, `az.namespace`, `server.address`, `server.port` + +**Session-level attributes** (on the connect span, flushed at session close): +- `gen_ai.voice.session_id` — Voice session ID +- `gen_ai.voice.input_audio_format` / `gen_ai.voice.output_audio_format` — Audio formats (e.g., `pcm16`) +- `gen_ai.voice.input_sample_rate` — Input audio sampling rate (Hz) +- `gen_ai.voice.turn_count` — Completed response turns +- `gen_ai.voice.interruption_count` — User interruptions +- `gen_ai.voice.audio_bytes_sent` / `gen_ai.voice.audio_bytes_received` — Audio payload bytes +- `gen_ai.voice.first_token_latency_ms` — Time to first audio response +- `gen_ai.conversation.id` — Conversation ID +- `gen_ai.response.id` / `gen_ai.response.finish_reasons` — Last response metadata +- `gen_ai.system_instructions` / `gen_ai.request.temperature` / `gen_ai.request.max_output_tokens` / `gen_ai.request.tools` — Session config from `session.update` +- `gen_ai.agent.name` / `gen_ai.agent.id` / `gen_ai.agent.version` / `gen_ai.agent.project_name` / `gen_ai.agent.thread_id` — Agent metadata (when using `AgentSessionConfig`) + +#### Content recording + +By default, message payloads are not recorded in spans for privacy. Enable content recording via the builder or environment variable: + +```java com.azure.ai.voicelive.tracing.contentrecording +// Enable content recording to capture full JSON payloads in span events +VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .openTelemetry(otel) + .enableContentRecording(true) + .buildAsyncClient(); + +// Or via environment variables (no code changes needed): +// OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true +// (legacy fallback) AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED=true +``` + +> See `telemetry/ExplicitTracingSample.java` and `telemetry/GlobalTracingSample.java` for complete tracing examples. +> +> **Run the telemetry samples** to see tracing in action: +> ```bash +> # Explicit tracing (prints span names and attributes): +> mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.telemetry.ExplicitTracingSample" -Dexec.classpathScope=test -Dexec.args="--enable-tracing" +> +> # Explicit tracing + content recording (also prints full JSON payloads): +> mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.telemetry.ExplicitTracingSample" -Dexec.classpathScope=test -Dexec.args="--enable-tracing --enable-recording" +> +> # Automatic tracing via GlobalOpenTelemetry: +> mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.telemetry.GlobalTracingSample" -Dexec.classpathScope=test +> ``` +> +> Sample output with `--enable-tracing`: +> ``` +> 'send session.update' : {gen_ai.operation.name=send, gen_ai.voice.event_type=session.update, ...} +> 'recv session.created' : {gen_ai.operation.name=recv, gen_ai.voice.event_type=session.created, ...} +> 'recv response.done' : {gen_ai.usage.input_tokens=100, gen_ai.usage.output_tokens=50, ...} +> 'close' : {gen_ai.operation.name=close, ...} +> 'connect gpt-4o-realtime-preview' : {gen_ai.voice.session_id=..., gen_ai.voice.turn_count=1, ...} +> ``` + ### Complete voice assistant with microphone A full example demonstrating real-time microphone input and audio playback: @@ -442,7 +633,7 @@ client.startSession("gpt-4o-realtime-preview") // Subscribe to receive server events session.receiveEvents() .subscribe( - event -> handleEvent(event, session), + event -> handleEvent(event), error -> System.err.println("Error: " + error.getMessage()) ); diff --git a/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml b/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml index 6397e9e904dd..b70ac087c749 100644 --- a/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml +++ b/sdk/voicelive/azure-ai-voicelive/checkstyle-suppressions.xml @@ -3,6 +3,20 @@ + + + + + + + + + + + + + + diff --git a/sdk/voicelive/azure-ai-voicelive/pom.xml b/sdk/voicelive/azure-ai-voicelive/pom.xml index e2ca1f28fc94..d7e864d811f2 100644 --- a/sdk/voicelive/azure-ai-voicelive/pom.xml +++ b/sdk/voicelive/azure-ai-voicelive/pom.xml @@ -56,6 +56,11 @@ Code generated by Microsoft (R) TypeSpec Code Generator. azure-core-http-netty 1.16.3 + + io.opentelemetry + opentelemetry-api + 1.58.0 + com.azure azure-core-test @@ -82,5 +87,42 @@ Code generated by Microsoft (R) TypeSpec Code Generator. 3.7.17 test + + io.opentelemetry + opentelemetry-sdk + 1.58.0 + test + + + io.opentelemetry + opentelemetry-sdk-testing + 1.58.0 + test + + + io.opentelemetry + opentelemetry-exporter-logging + 1.58.0 + test + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.6.1 + + + + + io.opentelemetry:opentelemetry-api:[1.58.0] + + + + + + + diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java index 000fcf98a5cd..c1736194b808 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveAsyncClient.java @@ -18,6 +18,8 @@ import com.azure.core.http.HttpHeaderName; import com.azure.core.http.HttpHeaders; import com.azure.core.util.logging.ClientLogger; +import io.opentelemetry.api.metrics.Meter; +import io.opentelemetry.api.trace.Tracer; import reactor.core.publisher.Mono; @@ -34,6 +36,9 @@ public final class VoiceLiveAsyncClient { private final TokenCredential tokenCredential; private final String apiVersion; private final HttpHeaders additionalHeaders; + private final Tracer tracer; + private final Meter meter; + private final Boolean enableContentRecording; /** * Creates a VoiceLiveAsyncClient with API key authentication. @@ -44,11 +49,34 @@ public final class VoiceLiveAsyncClient { * @param additionalHeaders Additional headers to include in requests. */ VoiceLiveAsyncClient(URI endpoint, KeyCredential keyCredential, String apiVersion, HttpHeaders additionalHeaders) { + this(endpoint, keyCredential, apiVersion, additionalHeaders, null, null); + } + + /** + * Creates a VoiceLiveAsyncClient with API key authentication and tracing. + * + * @param endpoint The service endpoint. + * @param keyCredential The API key credential. + * @param apiVersion The API version. + * @param additionalHeaders Additional headers to include in requests. + * @param tracer The OpenTelemetry Tracer for instrumentation (may be a no-op tracer). + * @param enableContentRecording Override for content recording, or null to use env var. + */ + VoiceLiveAsyncClient(URI endpoint, KeyCredential keyCredential, String apiVersion, HttpHeaders additionalHeaders, + Tracer tracer, Boolean enableContentRecording) { + this(endpoint, keyCredential, apiVersion, additionalHeaders, tracer, null, enableContentRecording); + } + + VoiceLiveAsyncClient(URI endpoint, KeyCredential keyCredential, String apiVersion, HttpHeaders additionalHeaders, + Tracer tracer, Meter meter, Boolean enableContentRecording) { this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null"); this.keyCredential = Objects.requireNonNull(keyCredential, "'keyCredential' cannot be null"); this.tokenCredential = null; this.apiVersion = Objects.requireNonNull(apiVersion, "'apiVersion' cannot be null"); this.additionalHeaders = additionalHeaders != null ? additionalHeaders : new HttpHeaders(); + this.tracer = tracer; + this.meter = meter; + this.enableContentRecording = enableContentRecording; } /** @@ -61,11 +89,34 @@ public final class VoiceLiveAsyncClient { */ VoiceLiveAsyncClient(URI endpoint, TokenCredential tokenCredential, String apiVersion, HttpHeaders additionalHeaders) { + this(endpoint, tokenCredential, apiVersion, additionalHeaders, null, null); + } + + /** + * Creates a VoiceLiveAsyncClient with token authentication and tracing. + * + * @param endpoint The service endpoint. + * @param tokenCredential The token credential. + * @param apiVersion The API version. + * @param additionalHeaders Additional headers to include in requests. + * @param tracer The OpenTelemetry Tracer for instrumentation (may be a no-op tracer). + * @param enableContentRecording Override for content recording, or null to use env var. + */ + VoiceLiveAsyncClient(URI endpoint, TokenCredential tokenCredential, String apiVersion, + HttpHeaders additionalHeaders, Tracer tracer, Boolean enableContentRecording) { + this(endpoint, tokenCredential, apiVersion, additionalHeaders, tracer, null, enableContentRecording); + } + + VoiceLiveAsyncClient(URI endpoint, TokenCredential tokenCredential, String apiVersion, + HttpHeaders additionalHeaders, Tracer tracer, Meter meter, Boolean enableContentRecording) { this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null"); this.keyCredential = null; this.tokenCredential = Objects.requireNonNull(tokenCredential, "'tokenCredential' cannot be null"); this.apiVersion = Objects.requireNonNull(apiVersion, "'apiVersion' cannot be null"); this.additionalHeaders = additionalHeaders != null ? additionalHeaders : new HttpHeaders(); + this.tracer = tracer; + this.meter = meter; + this.enableContentRecording = enableContentRecording; } /** @@ -79,12 +130,7 @@ public Mono startSession(String model) { Objects.requireNonNull(model, "'model' cannot be null"); return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, model)).flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, model, null); return session.connect(additionalHeaders).thenReturn(session); }); } @@ -97,12 +143,7 @@ public Mono startSession(String model) { */ public Mono startSession() { return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null)).flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null, null); return session.connect(additionalHeaders).thenReturn(session); }); } @@ -122,12 +163,7 @@ public Mono startSession(String model, VoiceLiveReq return Mono .fromCallable(() -> convertToWebSocketEndpoint(endpoint, model, requestOptions.getCustomQueryParameters())) .flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, model, null); // Merge additional headers with custom headers from requestOptions HttpHeaders mergedHeaders = mergeHeaders(additionalHeaders, requestOptions.getCustomHeaders()); return session.connect(mergedHeaders).thenReturn(session); @@ -148,12 +184,7 @@ public Mono startSession(VoiceLiveRequestOptions re return Mono .fromCallable(() -> convertToWebSocketEndpoint(endpoint, null, requestOptions.getCustomQueryParameters())) .flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null, null); // Merge additional headers with custom headers from requestOptions HttpHeaders mergedHeaders = mergeHeaders(additionalHeaders, requestOptions.getCustomHeaders()); return session.connect(mergedHeaders).thenReturn(session); @@ -176,12 +207,7 @@ public Mono startSession(AgentSessionConfig agentCo return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null, agentConfig.toQueryParameters())) .flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null, agentConfig); return session.connect(additionalHeaders).thenReturn(session); }); } @@ -211,18 +237,31 @@ public Mono startSession(AgentSessionConfig agentCo } return Mono.fromCallable(() -> convertToWebSocketEndpoint(endpoint, null, mergedParams)).flatMap(wsEndpoint -> { - VoiceLiveSessionAsyncClient session; - if (keyCredential != null) { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential); - } else { - session = new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential); - } + VoiceLiveSessionAsyncClient session = createSessionClient(wsEndpoint, null, agentConfig); // Merge additional headers with custom headers from requestOptions HttpHeaders mergedHeaders = mergeHeaders(additionalHeaders, requestOptions.getCustomHeaders()); return session.connect(mergedHeaders).thenReturn(session); }); } + /** + * Creates a VoiceLiveSessionAsyncClient with the appropriate credentials and optional tracing. + * + * @param wsEndpoint The WebSocket endpoint URI. + * @param model The model name, used for tracing span names. + * @return A new VoiceLiveSessionAsyncClient instance. + */ + private VoiceLiveSessionAsyncClient createSessionClient(URI wsEndpoint, String model, + AgentSessionConfig agentSessionConfig) { + if (keyCredential != null) { + return new VoiceLiveSessionAsyncClient(wsEndpoint, keyCredential, tracer, meter, model, + enableContentRecording, agentSessionConfig); + } else { + return new VoiceLiveSessionAsyncClient(wsEndpoint, tokenCredential, tracer, meter, model, + enableContentRecording, agentSessionConfig); + } + } + /** * Merges two HttpHeaders objects, with custom headers taking precedence. * diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java index e655bde45d91..1867c0d2abaf 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveClientBuilder.java @@ -17,6 +17,12 @@ import com.azure.core.util.ClientOptions; import com.azure.core.util.CoreUtils; import com.azure.core.util.logging.ClientLogger; +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.metrics.Meter; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.trace.Tracer; + +import java.util.Map; /** * Builder for creating instances of {@link VoiceLiveAsyncClient}. @@ -25,12 +31,17 @@ public final class VoiceLiveClientBuilder implements TokenCredentialTrait, KeyCredentialTrait, EndpointTrait { private static final ClientLogger LOGGER = new ClientLogger(VoiceLiveClientBuilder.class); + private static final Map PROPERTIES = CoreUtils.getProperties("azure-ai-voicelive.properties"); + private static final String SDK_NAME = "azure-ai-voicelive"; + private static final String SDK_VERSION = PROPERTIES.getOrDefault("version", "unknown"); private URI endpoint; private KeyCredential keyCredential; private TokenCredential tokenCredential; private VoiceLiveServiceVersion serviceVersion; private ClientOptions clientOptions; + private OpenTelemetry openTelemetry; + private Boolean enableContentRecording; /** * Creates a new instance of VoiceLiveClientBuilder. @@ -107,6 +118,42 @@ public VoiceLiveClientBuilder clientOptions(ClientOptions clientOptions) { return this; } + /** + * Sets the {@link OpenTelemetry} instance to use for tracing. + * + *

If not set, defaults to {@link GlobalOpenTelemetry#getOrNoop()}, which automatically + * uses the OpenTelemetry instance installed by the Java agent (if present), or a no-op + * implementation that has zero performance impact.

+ * + *

When an OpenTelemetry SDK is configured (either globally or via this method), the SDK + * automatically emits spans for connect, send, recv, and close operations with voice-specific + * attributes and session-level counters following GenAI Semantic Conventions.

+ * + * @param openTelemetry The OpenTelemetry instance. + * @return The updated VoiceLiveClientBuilder instance. + * @throws NullPointerException if {@code openTelemetry} is null. + */ + public VoiceLiveClientBuilder openTelemetry(OpenTelemetry openTelemetry) { + this.openTelemetry = Objects.requireNonNull(openTelemetry, "'openTelemetry' cannot be null"); + return this; + } + + /** + * Enables or disables content recording in trace spans. + * + *

When enabled, full JSON payloads (including audio data) will be captured in span events. + * This is off by default for privacy. Can also be controlled via the + * {@code OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT} environment variable. + * The legacy {@code AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED} variable is still supported.

+ * + * @param enableContentRecording true to enable content recording in spans. + * @return The updated VoiceLiveClientBuilder instance. + */ + public VoiceLiveClientBuilder enableContentRecording(boolean enableContentRecording) { + this.enableContentRecording = enableContentRecording; + return this; + } + /** * Builds a {@link VoiceLiveAsyncClient} instance with the configured options. * @@ -125,10 +172,16 @@ public VoiceLiveAsyncClient buildAsyncClient() { VoiceLiveServiceVersion version = serviceVersion != null ? serviceVersion : VoiceLiveServiceVersion.getLatest(); HttpHeaders additionalHeaders = CoreUtils.createHttpHeadersFromClientOptions(clientOptions); + OpenTelemetry otel = openTelemetry != null ? openTelemetry : GlobalOpenTelemetry.getOrNoop(); + Tracer tracer = otel.getTracer(SDK_NAME); + Meter meter = otel.getMeter(SDK_NAME); + if (keyCredential != null) { - return new VoiceLiveAsyncClient(endpoint, keyCredential, version.getVersion(), additionalHeaders); + return new VoiceLiveAsyncClient(endpoint, keyCredential, version.getVersion(), additionalHeaders, tracer, + meter, enableContentRecording); } else { - return new VoiceLiveAsyncClient(endpoint, tokenCredential, version.getVersion(), additionalHeaders); + return new VoiceLiveAsyncClient(endpoint, tokenCredential, version.getVersion(), additionalHeaders, tracer, + meter, enableContentRecording); } } } diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java index d1bc2f098e03..66f42a01d89b 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveSessionAsyncClient.java @@ -3,6 +3,15 @@ package com.azure.ai.voicelive; +import java.io.IOException; +import java.net.URI; +import java.util.Base64; +import java.util.Locale; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + +import com.azure.ai.voicelive.models.AgentSessionConfig; import com.azure.ai.voicelive.models.ClientEvent; import com.azure.ai.voicelive.models.ClientEventConversationItemCreate; import com.azure.ai.voicelive.models.ClientEventConversationItemDelete; @@ -27,18 +36,21 @@ import com.azure.core.credential.TokenCredential; import com.azure.core.credential.TokenRequestContext; import com.azure.core.http.HttpHeader; -import com.azure.core.http.HttpHeaders; import com.azure.core.http.HttpHeaderName; +import com.azure.core.http.HttpHeaders; import com.azure.core.util.AsyncCloseable; import com.azure.core.util.BinaryData; import com.azure.core.util.logging.ClientLogger; import com.azure.core.util.serializer.JacksonAdapter; import com.azure.core.util.serializer.SerializerAdapter; import com.azure.core.util.serializer.SerializerEncoding; + import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.websocketx.TextWebSocketFrame; import io.netty.handler.codec.http.websocketx.WebSocketFrame; +import io.opentelemetry.api.metrics.Meter; +import io.opentelemetry.api.trace.Tracer; import reactor.core.Disposable; import reactor.core.publisher.BufferOverflowStrategy; import reactor.core.publisher.Flux; @@ -49,14 +61,6 @@ import reactor.netty.http.websocket.WebsocketInbound; import reactor.netty.http.websocket.WebsocketOutbound; -import java.io.IOException; -import java.net.URI; -import java.util.Base64; -import java.util.Locale; -import java.util.Objects; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; - /** * Represents a WebSocket-based session for real-time voice communication with the Azure VoiceLive service. *

@@ -95,6 +99,8 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl private final KeyCredential keyCredential; private final TokenCredential tokenCredential; private final SerializerAdapter serializer; + private final VoiceLiveTracer voiceLiveTracer; + private final AgentSessionConfig agentSessionConfig; private final AtomicBoolean isConnected = new AtomicBoolean(false); private final AtomicBoolean isClosed = new AtomicBoolean(false); @@ -104,6 +110,9 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl private final Sinks.Many receiveSink = Sinks.many().multicast().onBackpressureBuffer(); private final Sinks.Many sendSink = Sinks.many().multicast().onBackpressureBuffer(); + // Cached shared events flux — ensures tracing fires only once per event regardless of subscriber count + private volatile Flux sharedEventsFlux; + // WebSocket connection state management private final AtomicReference inboundRef = new AtomicReference<>(); private final AtomicReference outboundRef = new AtomicReference<>(); @@ -122,10 +131,32 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl * @param keyCredential The API key credential. */ VoiceLiveSessionAsyncClient(URI endpoint, KeyCredential keyCredential) { + this(endpoint, keyCredential, null, null, null); + } + + /** + * Creates a new VoiceLiveSessionAsyncClient with API key authentication and tracing. + * + * @param endpoint The WebSocket endpoint. + * @param keyCredential The API key credential. + * @param tracer The OpenTelemetry Tracer (may be a no-op tracer). + * @param model The model name for span naming. + * @param enableContentRecording Override for content recording, or null to use env var. + */ + VoiceLiveSessionAsyncClient(URI endpoint, KeyCredential keyCredential, Tracer tracer, String model, + Boolean enableContentRecording) { + this(endpoint, keyCredential, tracer, null, model, enableContentRecording, null); + } + + VoiceLiveSessionAsyncClient(URI endpoint, KeyCredential keyCredential, Tracer tracer, Meter meter, String model, + Boolean enableContentRecording, AgentSessionConfig agentSessionConfig) { this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null"); this.keyCredential = Objects.requireNonNull(keyCredential, "'keyCredential' cannot be null"); this.tokenCredential = null; this.serializer = JacksonAdapter.createDefaultSerializerAdapter(); + this.voiceLiveTracer + = tracer != null ? new VoiceLiveTracer(tracer, meter, endpoint, model, enableContentRecording) : null; + this.agentSessionConfig = agentSessionConfig; } /** @@ -135,10 +166,32 @@ public final class VoiceLiveSessionAsyncClient implements AsyncCloseable, AutoCl * @param tokenCredential The token credential. */ VoiceLiveSessionAsyncClient(URI endpoint, TokenCredential tokenCredential) { + this(endpoint, tokenCredential, null, null, null); + } + + /** + * Creates a new VoiceLiveSessionAsyncClient with token authentication and tracing. + * + * @param endpoint The WebSocket endpoint. + * @param tokenCredential The token credential. + * @param tracer The OpenTelemetry Tracer (may be a no-op tracer). + * @param model The model name for span naming. + * @param enableContentRecording Override for content recording, or null to use env var. + */ + VoiceLiveSessionAsyncClient(URI endpoint, TokenCredential tokenCredential, Tracer tracer, String model, + Boolean enableContentRecording) { + this(endpoint, tokenCredential, tracer, null, model, enableContentRecording, null); + } + + VoiceLiveSessionAsyncClient(URI endpoint, TokenCredential tokenCredential, Tracer tracer, Meter meter, String model, + Boolean enableContentRecording, AgentSessionConfig agentSessionConfig) { this.endpoint = Objects.requireNonNull(endpoint, "'endpoint' cannot be null"); this.keyCredential = null; this.tokenCredential = Objects.requireNonNull(tokenCredential, "'tokenCredential' cannot be null"); this.serializer = JacksonAdapter.createDefaultSerializerAdapter(); + this.voiceLiveTracer + = tracer != null ? new VoiceLiveTracer(tracer, meter, endpoint, model, enableContentRecording) : null; + this.agentSessionConfig = agentSessionConfig; } /** @@ -160,6 +213,11 @@ Mono connect(HttpHeaders additionalHeaders) { return Mono.error(new IllegalStateException("Session lifecycle already active")); } + // Start the connect span (session lifetime) + if (voiceLiveTracer != null) { + voiceLiveTracer.startConnectSpan(agentSessionConfig); + } + Sinks.One readySink = Sinks.one(); Sinks.One closeSignal = Sinks.one(); connectionCloseSignalRef.set(closeSignal); @@ -263,6 +321,11 @@ Mono connect(HttpHeaders additionalHeaders) { readySink.tryEmitError(error); connectionCloseSignalRef.compareAndSet(closeSignal, null); disposeLifecycleSubscription(); + + // End the connect span on error + if (voiceLiveTracer != null) { + voiceLiveTracer.endConnectSpan(error); + } }, () -> { LOGGER.info("WebSocket handler completed"); connectionCloseSignalRef.compareAndSet(closeSignal, null); @@ -281,6 +344,13 @@ Mono connect(HttpHeaders additionalHeaders) { public Mono closeAsync() { if (isClosed.compareAndSet(false, true)) { LOGGER.info("Closing VoiceLive session"); + + // Trace the close operation and end the connect span + if (voiceLiveTracer != null) { + voiceLiveTracer.traceClose(); + voiceLiveTracer.endConnectSpan(null); + } + sendSink.tryEmitComplete(); Sinks.One closeSignal = connectionCloseSignalRef.getAndSet(null); @@ -351,6 +421,12 @@ public Mono sendEvent(ClientEvent event) { return Mono.fromCallable(() -> { try { String json = serializer.serialize(event, SerializerEncoding.JSON); + + // Trace the send operation + if (voiceLiveTracer != null) { + voiceLiveTracer.traceSend(event, json); + } + return BinaryData.fromString(json); } catch (IOException e) { throw LOGGER.logExceptionAsError(new RuntimeException("Failed to serialize event", e)); @@ -397,16 +473,39 @@ public Mono send(BinaryData data) { */ public Flux receiveEvents() { throwIfNotConnected(); - return receive() - .onBackpressureBuffer(INBOUND_BUFFER_CAPACITY, - dropped -> LOGGER.error("Inbound buffer overflow; dropped {} bytes", dropped.toBytes().length), - BufferOverflowStrategy.ERROR) - .flatMap(this::parseToSessionUpdate) - .doOnError(error -> LOGGER.error("Failed to parse session update", error)) - .onErrorResume(error -> { - LOGGER.warning("Skipping unrecognized server event: {}", error.getMessage()); - return Flux.empty(); - }); + Flux result = sharedEventsFlux; + if (result == null) { + synchronized (this) { + result = sharedEventsFlux; + if (result == null) { + result + = receive() + .onBackpressureBuffer(INBOUND_BUFFER_CAPACITY, + dropped -> LOGGER.error("Inbound buffer overflow; dropped {} bytes", + dropped.toBytes().length), + BufferOverflowStrategy.ERROR) + .flatMap(data -> { + String rawPayload = data.toString(); + return parseToSessionUpdate(data).doOnNext(update -> { + if (voiceLiveTracer != null) { + voiceLiveTracer.traceRecv(update, rawPayload); + } + }).onErrorResume(error -> { + if (voiceLiveTracer != null) { + voiceLiveTracer.traceRecvRaw(rawPayload); + } + LOGGER.warning("Skipping unrecognized server event: {}", error.getMessage()); + return Mono.empty(); + }); + }) + .doOnError(error -> LOGGER.error("Failed to parse session update", error)) + .publish() + .autoConnect(); + sharedEventsFlux = result; + } + } + } + return result; } /** diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java new file mode 100644 index 000000000000..49aca30d5a94 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/VoiceLiveTracer.java @@ -0,0 +1,1060 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.voicelive; + +import java.net.URI; +import java.util.Base64; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +import com.azure.ai.voicelive.models.ClientEvent; +import com.azure.ai.voicelive.models.ClientEventInputAudioBufferAppend; +import com.azure.ai.voicelive.models.ClientEventResponseCancel; +import com.azure.ai.voicelive.models.ClientEventResponseCreate; +import com.azure.ai.voicelive.models.ClientEventSessionUpdate; +import com.azure.ai.voicelive.models.ClientEventType; +import com.azure.ai.voicelive.models.ResponseTokenStatistics; +import com.azure.ai.voicelive.models.SessionResponse; +import com.azure.ai.voicelive.models.SessionUpdate; +import com.azure.ai.voicelive.models.SessionUpdateConversationItemCreated; +import com.azure.ai.voicelive.models.SessionUpdateError; +import com.azure.ai.voicelive.models.SessionUpdateErrorDetails; +import com.azure.ai.voicelive.models.SessionUpdateResponseAudioDelta; +import com.azure.ai.voicelive.models.SessionUpdateResponseAudioTranscriptDelta; +import com.azure.ai.voicelive.models.SessionUpdateResponseCreated; +import com.azure.ai.voicelive.models.SessionUpdateResponseDone; +import com.azure.ai.voicelive.models.SessionUpdateResponseTextDelta; +import com.azure.ai.voicelive.models.SessionUpdateResponseFunctionCallArgumentsDelta; +import com.azure.ai.voicelive.models.SessionUpdateResponseFunctionCallArgumentsDone; +import com.azure.ai.voicelive.models.SessionUpdateResponseOutputItemAdded; +import com.azure.ai.voicelive.models.SessionUpdateResponseOutputItemDone; +import com.azure.ai.voicelive.models.SessionUpdateSessionCreated; +import com.azure.ai.voicelive.models.SessionUpdateSessionUpdated; +import com.azure.ai.voicelive.models.VoiceLiveSessionOptions; +import com.azure.ai.voicelive.models.VoiceLiveSessionResponse; +import com.azure.core.util.Configuration; +import com.azure.core.util.ConfigurationProperty; +import com.azure.core.util.ConfigurationPropertyBuilder; +import com.azure.core.util.logging.ClientLogger; + +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.common.AttributesBuilder; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanBuilder; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.context.Context; + +/** + * Tracer for VoiceLive WebSocket sessions using the OpenTelemetry API. + *

+ * Manages a parent "connect" span for the session lifetime, with child spans for + * send, recv, and close operations. Tracks session-level counters for audio bytes, + * turn counts, interruptions, and first-token latency. + *

+ */ +final class VoiceLiveTracer { + + private static final ClientLogger LOGGER = new ClientLogger(VoiceLiveTracer.class); + + // GenAI semantic convention attribute keys + static final AttributeKey GEN_AI_SYSTEM = AttributeKey.stringKey("gen_ai.system"); + static final String GEN_AI_SYSTEM_VALUE = "az.ai.voicelive"; + static final AttributeKey GEN_AI_OPERATION_NAME = AttributeKey.stringKey("gen_ai.operation.name"); + static final AttributeKey GEN_AI_REQUEST_MODEL = AttributeKey.stringKey("gen_ai.request.model"); + static final AttributeKey GEN_AI_PROVIDER_NAME = AttributeKey.stringKey("gen_ai.provider.name"); + static final String GEN_AI_PROVIDER_NAME_VALUE = "microsoft.foundry"; + static final AttributeKey AZ_NAMESPACE = AttributeKey.stringKey("az.namespace"); + static final String AZ_NAMESPACE_VALUE = "Microsoft.CognitiveServices"; + static final AttributeKey SERVER_ADDRESS = AttributeKey.stringKey("server.address"); + static final AttributeKey SERVER_PORT = AttributeKey.longKey("server.port"); + + // Voice-specific attribute keys + static final AttributeKey GEN_AI_VOICE_SESSION_ID = AttributeKey.stringKey("gen_ai.voice.session_id"); + static final AttributeKey GEN_AI_VOICE_INPUT_AUDIO_FORMAT + = AttributeKey.stringKey("gen_ai.voice.input_audio_format"); + static final AttributeKey GEN_AI_VOICE_OUTPUT_AUDIO_FORMAT + = AttributeKey.stringKey("gen_ai.voice.output_audio_format"); + static final AttributeKey GEN_AI_VOICE_TURN_COUNT = AttributeKey.longKey("gen_ai.voice.turn_count"); + static final AttributeKey GEN_AI_VOICE_INTERRUPTION_COUNT + = AttributeKey.longKey("gen_ai.voice.interruption_count"); + static final AttributeKey GEN_AI_VOICE_AUDIO_BYTES_SENT + = AttributeKey.longKey("gen_ai.voice.audio_bytes_sent"); + static final AttributeKey GEN_AI_VOICE_AUDIO_BYTES_RECEIVED + = AttributeKey.longKey("gen_ai.voice.audio_bytes_received"); + static final AttributeKey GEN_AI_VOICE_FIRST_TOKEN_LATENCY_MS + = AttributeKey.doubleKey("gen_ai.voice.first_token_latency_ms"); + static final AttributeKey GEN_AI_VOICE_EVENT_TYPE = AttributeKey.stringKey("gen_ai.voice.event_type"); + static final AttributeKey GEN_AI_VOICE_MESSAGE_SIZE = AttributeKey.longKey("gen_ai.voice.message_size"); + static final AttributeKey GEN_AI_USAGE_INPUT_TOKENS = AttributeKey.longKey("gen_ai.usage.input_tokens"); + static final AttributeKey GEN_AI_USAGE_OUTPUT_TOKENS = AttributeKey.longKey("gen_ai.usage.output_tokens"); + static final AttributeKey ERROR_TYPE = AttributeKey.stringKey("error.type"); + + // Function call attribute keys + static final AttributeKey GEN_AI_VOICE_CALL_ID = AttributeKey.stringKey("gen_ai.voice.call_id"); + static final AttributeKey GEN_AI_VOICE_PREVIOUS_ITEM_ID + = AttributeKey.stringKey("gen_ai.voice.previous_item_id"); + static final AttributeKey GEN_AI_VOICE_ITEM_ID = AttributeKey.stringKey("gen_ai.voice.item_id"); + static final AttributeKey GEN_AI_VOICE_OUTPUT_INDEX = AttributeKey.longKey("gen_ai.voice.output_index"); + + // Agent attribute keys + static final AttributeKey GEN_AI_AGENT_NAME = AttributeKey.stringKey("gen_ai.agent.name"); + static final AttributeKey GEN_AI_AGENT_VERSION = AttributeKey.stringKey("gen_ai.agent.version"); + static final AttributeKey GEN_AI_AGENT_PROJECT_NAME = AttributeKey.stringKey("gen_ai.agent.project_name"); + static final AttributeKey GEN_AI_AGENT_ID = AttributeKey.stringKey("gen_ai.agent.id"); + static final AttributeKey GEN_AI_AGENT_THREAD_ID = AttributeKey.stringKey("gen_ai.agent.thread_id"); + static final AttributeKey GEN_AI_CONVERSATION_ID = AttributeKey.stringKey("gen_ai.conversation.id"); + + // Session config attribute keys (tracked on connect span) + static final AttributeKey GEN_AI_SYSTEM_INSTRUCTIONS = AttributeKey.stringKey("gen_ai.system_instructions"); + static final AttributeKey GEN_AI_REQUEST_TEMPERATURE = AttributeKey.stringKey("gen_ai.request.temperature"); + static final AttributeKey GEN_AI_REQUEST_MAX_OUTPUT_TOKENS + = AttributeKey.stringKey("gen_ai.request.max_output_tokens"); + static final AttributeKey GEN_AI_VOICE_INPUT_SAMPLE_RATE + = AttributeKey.longKey("gen_ai.voice.input_sample_rate"); + static final AttributeKey GEN_AI_REQUEST_TOOLS = AttributeKey.stringKey("gen_ai.request.tools"); + + // Response attribute keys + static final AttributeKey GEN_AI_RESPONSE_ID = AttributeKey.stringKey("gen_ai.response.id"); + static final AttributeKey GEN_AI_RESPONSE_FINISH_REASONS + = AttributeKey.stringKey("gen_ai.response.finish_reasons"); + + // Rate limit event keys + static final AttributeKey GEN_AI_VOICE_RATE_LIMITS = AttributeKey.stringKey("gen_ai.voice.rate_limits"); + static final String GEN_AI_VOICE_RATE_LIMITS_UPDATED = "gen_ai.voice.rate_limits.updated"; + + // Span event names + static final String GEN_AI_INPUT_MESSAGES = "gen_ai.input.messages"; + static final String GEN_AI_OUTPUT_MESSAGES = "gen_ai.output.messages"; + static final String GEN_AI_VOICE_ERROR = "gen_ai.voice.error"; + + // Event attribute keys + static final AttributeKey EVENT_CONTENT = AttributeKey.stringKey("gen_ai.event.content"); + static final AttributeKey ERROR_CODE = AttributeKey.stringKey("error.code"); + static final AttributeKey ERROR_MESSAGE = AttributeKey.stringKey("error.message"); + + // Operation name values + static final String OPERATION_CONNECT = "connect"; + static final String OPERATION_SEND = "send"; + static final String OPERATION_RECV = "recv"; + static final String OPERATION_CLOSE = "close"; + + // Raw event type constants + static final String EVENT_TYPE_UNKNOWN = "unknown"; + static final String EVENT_TYPE_SESSION_UPDATE = ClientEventType.SESSION_UPDATE.toString(); + static final String EVENT_TYPE_RATE_LIMITS_UPDATED = "rate_limits.updated"; + + // Content recording configuration + private static final ConfigurationProperty CAPTURE_MESSAGE_CONTENT + = ConfigurationPropertyBuilder.ofBoolean("azure.tracing.gen_ai.content_recording_enabled") + .environmentVariableName("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED") + .systemPropertyName("azure.tracing.gen_ai.content_recording_enabled") + .shared(true) + .defaultValue(false) + .build(); + + private final Tracer tracer; + private final io.opentelemetry.api.metrics.Meter meter; + private final boolean captureContent; + private final String serverAddress; + private final int serverPort; + private final String model; + + // Session lifetime span and its OTel context (for parenting child spans) + private final AtomicReference connectSpan = new AtomicReference<>(); + private final AtomicReference connectContext = new AtomicReference<>(); + + // Session-level counters (thread-safe) + private final AtomicLong turnCount = new AtomicLong(0); + private final AtomicLong interruptionCount = new AtomicLong(0); + private final AtomicLong audioBytesSent = new AtomicLong(0); + private final AtomicLong audioBytesReceived = new AtomicLong(0); + + // First-token latency tracking + private final AtomicLong responseCreateTimestampNanos = new AtomicLong(0); + private final AtomicLong firstTokenLatencyMs = new AtomicLong(-1); + + // Session attributes discovered during the session + private final AtomicReference sessionId = new AtomicReference<>(); + private final AtomicReference inputAudioFormat = new AtomicReference<>(); + private final AtomicReference outputAudioFormat = new AtomicReference<>(); + + // Agent attributes - from AgentSessionConfig + session.created response + private volatile String agentName; + private volatile String agentVersion; + private volatile String agentProjectName; + private volatile String clientConversationId; // from AgentSessionConfig + private final AtomicReference agentId = new AtomicReference<>(); + private final AtomicReference agentThreadId = new AtomicReference<>(); + private final AtomicReference conversationId = new AtomicReference<>(); // from server + + // Session config attributes - tracked from session.update events + private volatile String systemInstructions; + private volatile String requestTemperature; + private volatile String requestMaxOutputTokens; + private volatile Long inputAudioSamplingRate; + private volatile String requestTools; + + // Last response metadata - accumulated for connect span (Python parity) + private final AtomicReference lastResponseId = new AtomicReference<>(); + private final AtomicReference lastFinishReasons = new AtomicReference<>(); + + /** + * Creates a VoiceLiveTracer. + * + * @param tracer The OpenTelemetry Tracer instance (may be a no-op tracer). + * @param meter The OpenTelemetry Meter instance (may be a no-op meter). + * @param endpoint The WebSocket endpoint URI. + * @param model The model name. + * @param captureContentOverride Optional override for content recording (null = use env var). + */ + VoiceLiveTracer(Tracer tracer, io.opentelemetry.api.metrics.Meter meter, URI endpoint, String model, + Boolean captureContentOverride) { + this.tracer = tracer; + this.meter = meter; + this.model = model; + + if (endpoint != null) { + this.serverAddress = endpoint.getHost(); + this.serverPort + = endpoint.getPort() == -1 ? ("wss".equals(endpoint.getScheme()) ? 443 : 80) : endpoint.getPort(); + } else { + this.serverAddress = null; + this.serverPort = -1; + } + + if (captureContentOverride != null) { + this.captureContent = captureContentOverride; + } else { + this.captureContent = Configuration.getGlobalConfiguration().get(CAPTURE_MESSAGE_CONTENT); + } + } + + // ============================================================================ + // Connect Span (session lifetime) + // ============================================================================ + + /** + * Starts the parent "connect" span for the session lifetime. + */ + void startConnectSpan() { + String spanName = model != null ? "connect " + model : "connect"; + + SpanBuilder spanBuilder = tracer.spanBuilder(spanName) + .setSpanKind(SpanKind.CLIENT) + .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .setAttribute(GEN_AI_OPERATION_NAME, OPERATION_CONNECT) + .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE) + .setAttribute(GEN_AI_PROVIDER_NAME, GEN_AI_PROVIDER_NAME_VALUE); + + if (model != null) { + spanBuilder.setAttribute(GEN_AI_REQUEST_MODEL, model); + } + if (serverAddress != null) { + spanBuilder.setAttribute(SERVER_ADDRESS, serverAddress); + if (serverPort != -1) { + spanBuilder.setAttribute(SERVER_PORT, (long) serverPort); + } + } + + Span span = spanBuilder.startSpan(); + Context ctx = Context.current().with(span); + connectSpan.set(span); + connectContext.set(ctx); + } + + /** + * Starts the parent "connect" span for the session lifetime with agent configuration. + * + * @param config The agent session configuration. + */ + void startConnectSpan(com.azure.ai.voicelive.models.AgentSessionConfig config) { + // Store agent config for apply to connect span on close + if (config != null) { + this.agentName = config.getAgentName(); + this.agentVersion = config.getAgentVersion(); + this.agentProjectName = config.getProjectName(); + this.clientConversationId = config.getConversationId(); + } + + // Start the base connect span + startConnectSpan(); + } + + /** + * Ends the connect span, flushing session-level counters as attributes. + * + * @param error The error that caused the session to close, or null. + */ + void endConnectSpan(Throwable error) { + Span span = connectSpan.getAndSet(null); + connectContext.set(null); + if (span == null) { + return; + } + + // Flush session-level counters + String sid = sessionId.get(); + if (sid != null) { + span.setAttribute(GEN_AI_VOICE_SESSION_ID, sid); + } + String inFormat = inputAudioFormat.get(); + if (inFormat != null) { + span.setAttribute(GEN_AI_VOICE_INPUT_AUDIO_FORMAT, inFormat); + } + String outFormat = outputAudioFormat.get(); + if (outFormat != null) { + span.setAttribute(GEN_AI_VOICE_OUTPUT_AUDIO_FORMAT, outFormat); + } + span.setAttribute(GEN_AI_VOICE_TURN_COUNT, turnCount.get()); + span.setAttribute(GEN_AI_VOICE_INTERRUPTION_COUNT, interruptionCount.get()); + span.setAttribute(GEN_AI_VOICE_AUDIO_BYTES_SENT, audioBytesSent.get()); + span.setAttribute(GEN_AI_VOICE_AUDIO_BYTES_RECEIVED, audioBytesReceived.get()); + + long latency = firstTokenLatencyMs.get(); + if (latency >= 0) { + span.setAttribute(GEN_AI_VOICE_FIRST_TOKEN_LATENCY_MS, (double) latency); + } + + // Flush agent attributes + if (agentName != null) { + span.setAttribute(GEN_AI_AGENT_NAME, agentName); + } + if (agentVersion != null) { + span.setAttribute(GEN_AI_AGENT_VERSION, agentVersion); + } + if (agentProjectName != null) { + span.setAttribute(GEN_AI_AGENT_PROJECT_NAME, agentProjectName); + } + String aid = agentId.get(); + if (aid != null) { + span.setAttribute(GEN_AI_AGENT_ID, aid); + } + String tid = agentThreadId.get(); + if (tid != null) { + span.setAttribute(GEN_AI_AGENT_THREAD_ID, tid); + } + String cid = conversationId.get(); + if (cid == null) { + cid = clientConversationId; + } + if (cid != null) { + span.setAttribute(GEN_AI_CONVERSATION_ID, cid); + } + + // Flush session config attributes + if (systemInstructions != null) { + span.setAttribute(GEN_AI_SYSTEM_INSTRUCTIONS, systemInstructions); + } + if (requestTemperature != null) { + span.setAttribute(GEN_AI_REQUEST_TEMPERATURE, requestTemperature); + } + if (requestMaxOutputTokens != null) { + span.setAttribute(GEN_AI_REQUEST_MAX_OUTPUT_TOKENS, requestMaxOutputTokens); + } + if (inputAudioSamplingRate != null) { + span.setAttribute(GEN_AI_VOICE_INPUT_SAMPLE_RATE, inputAudioSamplingRate); + } + if (requestTools != null) { + span.setAttribute(GEN_AI_REQUEST_TOOLS, requestTools); + } + String rid = lastResponseId.get(); + if (rid != null) { + span.setAttribute(GEN_AI_RESPONSE_ID, rid); + } + String fr = lastFinishReasons.get(); + if (fr != null) { + span.setAttribute(GEN_AI_RESPONSE_FINISH_REASONS, fr); + } + + if (error != null) { + span.setStatus(StatusCode.ERROR, error.getMessage()); + span.recordException(error); + span.setAttribute(ERROR_TYPE, error.getClass().getCanonicalName()); + } + + span.end(); + } + + // ============================================================================ + // Common child span builder + // ============================================================================ + + /** + * Creates a SpanBuilder with all common attributes for child spans, matching Python SDK parity: + * gen_ai.system, gen_ai.operation.name, az.namespace, gen_ai.provider.name, + * server.address, server.port, gen_ai.request.model, gen_ai.voice.session_id, + * gen_ai.conversation.id (when available). + */ + private SpanBuilder childSpanBuilder(String spanName, String operationName) { + SpanBuilder builder = tracer.spanBuilder(spanName) + .setSpanKind(SpanKind.CLIENT) + .setParent(connectContext.get()) + .setAttribute(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .setAttribute(GEN_AI_OPERATION_NAME, operationName) + .setAttribute(AZ_NAMESPACE, AZ_NAMESPACE_VALUE) + .setAttribute(GEN_AI_PROVIDER_NAME, GEN_AI_PROVIDER_NAME_VALUE); + + if (model != null) { + builder.setAttribute(GEN_AI_REQUEST_MODEL, model); + } + if (serverAddress != null) { + builder.setAttribute(SERVER_ADDRESS, serverAddress); + if (serverPort != -1) { + builder.setAttribute(SERVER_PORT, (long) serverPort); + } + } + String sid = sessionId.get(); + if (sid != null) { + builder.setAttribute(GEN_AI_VOICE_SESSION_ID, sid); + } + String cid = conversationId.get(); + if (cid == null) { + cid = clientConversationId; + } + if (cid != null) { + builder.setAttribute(GEN_AI_CONVERSATION_ID, cid); + } + return builder; + } + + // ============================================================================ + // Send Span + // ============================================================================ + + /** + * Traces a send operation. Creates a child span of the connect span. + * + * @param event The client event being sent. + * @param jsonPayload The serialized JSON payload. + */ + void traceSend(ClientEvent event, String jsonPayload) { + Context parentCtx = connectContext.get(); + if (parentCtx == null) { + return; + } + + String eventType = event.getType() != null ? event.getType().toString() : EVENT_TYPE_UNKNOWN; + String spanName = OPERATION_SEND + " " + eventType; + + Span span + = childSpanBuilder(spanName, OPERATION_SEND).setAttribute(GEN_AI_VOICE_EVENT_TYPE, eventType).startSpan(); + + try { + if (span.isRecording()) { + if (jsonPayload != null) { + span.setAttribute(GEN_AI_VOICE_MESSAGE_SIZE, (long) jsonPayload.length()); + } + + // Track function call output attributes on the span + if (event instanceof com.azure.ai.voicelive.models.ClientEventConversationItemCreate) { + com.azure.ai.voicelive.models.ClientEventConversationItemCreate createEvent + = (com.azure.ai.voicelive.models.ClientEventConversationItemCreate) event; + String prevItemId = createEvent.getPreviousItemId(); + if (prevItemId != null) { + span.setAttribute(GEN_AI_VOICE_PREVIOUS_ITEM_ID, prevItemId); + } + com.azure.ai.voicelive.models.ConversationRequestItem item = createEvent.getItem(); + if (item instanceof com.azure.ai.voicelive.models.FunctionCallOutputItem) { + String callId = ((com.azure.ai.voicelive.models.FunctionCallOutputItem) item).getCallId(); + if (callId != null) { + span.setAttribute(GEN_AI_VOICE_CALL_ID, callId); + } + } + } + + // Add span event + AttributesBuilder eventAttrs = Attributes.builder() + .put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .put(GEN_AI_VOICE_EVENT_TYPE, eventType); + if (captureContent && jsonPayload != null) { + eventAttrs.put(EVENT_CONTENT, jsonPayload); + } + span.addEvent(GEN_AI_INPUT_MESSAGES, eventAttrs.build()); + + // Track session-level counters from sent events + trackSendCounters(event, jsonPayload); + } + } finally { + span.end(); + } + } + + // ============================================================================ + // Recv Span + // ============================================================================ + + /** + * Traces a recv operation. Creates a child span of the connect span. + * + * @param update The parsed session update event. + * @param rawPayload The raw JSON payload string (for message size and content recording). + */ + void traceRecv(SessionUpdate update, String rawPayload) { + Context parentCtx = connectContext.get(); + if (parentCtx == null) { + return; + } + + // Skip high-volume text/transcript delta events to reduce telemetry noise. + // These carry incremental fragments with no counters to track. + // Matches Python SDK's _DELTA_SKIP_EVENT_TYPES. + if (update instanceof SessionUpdateResponseTextDelta + || update instanceof SessionUpdateResponseAudioTranscriptDelta) { + return; + } + + String eventType = update.getType() != null ? update.getType().toString() : EVENT_TYPE_UNKNOWN; + String spanName = OPERATION_RECV + " " + eventType; + + Span span + = childSpanBuilder(spanName, OPERATION_RECV).setAttribute(GEN_AI_VOICE_EVENT_TYPE, eventType).startSpan(); + + try { + if (span.isRecording()) { + if (rawPayload != null) { + span.setAttribute(GEN_AI_VOICE_MESSAGE_SIZE, (long) rawPayload.length()); + } + + // Track per-message token usage from response.done + trackRecvTokenUsage(update, span); + + // Track response metadata (id, conversation_id, finish_reasons) + trackResponseMetadata(update, span); + + // Track item_id, call_id, output_index, response_id on recv spans + trackRecvItemAttributes(update, span); + + // Add span event for output messages + AttributesBuilder eventAttrs = Attributes.builder() + .put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .put(GEN_AI_VOICE_EVENT_TYPE, eventType); + if (captureContent && rawPayload != null) { + eventAttrs.put(EVENT_CONTENT, rawPayload); + } + span.addEvent(GEN_AI_OUTPUT_MESSAGES, eventAttrs.build()); + + // Track error events — add event but don't set error status + trackErrorEvents(update, span); + + // Track session-level counters from received events + trackRecvCounters(update); + } + } finally { + span.end(); + } + } + + // ============================================================================ + // Close Span + // ============================================================================ + + /** + * Traces the close operation. + */ + void traceClose() { + Context parentCtx = connectContext.get(); + if (parentCtx == null) { + return; + } + + Span span = childSpanBuilder(OPERATION_CLOSE, OPERATION_CLOSE).startSpan(); + span.end(); + } + + /** + * Traces a raw receive operation. Used when a message fails to parse or for raw events. + * + * @param rawPayload The raw JSON payload string. + */ + void traceRecvRaw(String rawPayload) { + Context parentCtx = connectContext.get(); + if (parentCtx == null) { + return; + } + + // Try to extract event type from raw JSON + String eventType = extractEventType(rawPayload); + + Span span = childSpanBuilder(OPERATION_RECV + " " + eventType, OPERATION_RECV) + .setAttribute(GEN_AI_VOICE_EVENT_TYPE, eventType) + .startSpan(); + + try { + if (span.isRecording()) { + if (rawPayload != null) { + span.setAttribute(GEN_AI_VOICE_MESSAGE_SIZE, (long) rawPayload.length()); + if (captureContent) { + AttributesBuilder eventAttrs = Attributes.builder() + .put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE) + .put(GEN_AI_VOICE_EVENT_TYPE, eventType); + eventAttrs.put(EVENT_CONTENT, rawPayload); + span.addEvent(GEN_AI_OUTPUT_MESSAGES, eventAttrs.build()); + } + + // Handle rate_limits.updated events - add a special span event with rate limit info + if (EVENT_TYPE_RATE_LIMITS_UPDATED.equals(eventType)) { + String rateLimitsJson = extractRateLimits(rawPayload); + if (rateLimitsJson != null) { + Attributes rateLimitAttrs = Attributes.of(GEN_AI_VOICE_RATE_LIMITS, rateLimitsJson); + span.addEvent(GEN_AI_VOICE_RATE_LIMITS_UPDATED, rateLimitAttrs); + } + } + } + } + } finally { + span.end(); + } + } + + /** + * Extracts the event type from a raw JSON payload. + */ + private static String extractEventType(String rawPayload) { + if (rawPayload == null) { + return EVENT_TYPE_UNKNOWN; + } + // Simple extraction: find "type":"..." in JSON + int typeIndex = rawPayload.indexOf("\"type\":"); + if (typeIndex < 0) { + return EVENT_TYPE_UNKNOWN; + } + int startQuote = rawPayload.indexOf('"', typeIndex + 7); + if (startQuote < 0) { + return EVENT_TYPE_UNKNOWN; + } + int endQuote = rawPayload.indexOf('"', startQuote + 1); + if (endQuote < 0) { + return "unknown"; + } + return rawPayload.substring(startQuote + 1, endQuote); + } + + /** + * Extracts the rate_limits array JSON string from a rate_limits.updated payload. + */ + private static String extractRateLimits(String rawPayload) { + if (rawPayload == null) { + return null; + } + int rateLimitsKey = rawPayload.indexOf("\"rate_limits\":"); + if (rateLimitsKey < 0) { + return null; + } + int arrayStart = rawPayload.indexOf('[', rateLimitsKey); + if (arrayStart < 0) { + return null; + } + // Find matching closing bracket + int depth = 0; + for (int i = arrayStart; i < rawPayload.length(); i++) { + char c = rawPayload.charAt(i); + if (c == '[' || c == '{') { + depth++; + } else if (c == ']' || c == '}') { + depth--; + if (depth == 0) { + return rawPayload.substring(arrayStart, i + 1); + } + } + } + return rawPayload.substring(arrayStart); + } + + // ============================================================================ + // Counter Tracking + // ============================================================================ + + private void trackSendCounters(ClientEvent event, String jsonPayload) { + // Track audio bytes sent from input_audio_buffer.append + if (event instanceof ClientEventInputAudioBufferAppend) { + ClientEventInputAudioBufferAppend appendEvent = (ClientEventInputAudioBufferAppend) event; + String audio = appendEvent.getAudio(); + if (audio != null) { + try { + byte[] decoded = Base64.getDecoder().decode(audio); + audioBytesSent.addAndGet(decoded.length); + } catch (IllegalArgumentException e) { + LOGGER.atVerbose().log("Failed to decode audio for byte counting", e); + } + } + } + + // Track response.create for first-token latency + if (event instanceof ClientEventResponseCreate) { + responseCreateTimestampNanos.set(System.nanoTime()); + firstTokenLatencyMs.set(-1); // Reset for this response + } + + // Track interruptions from response.cancel + if (event instanceof ClientEventResponseCancel) { + interruptionCount.incrementAndGet(); + } + + // Track audio format from session.update (from typed object) + if (event instanceof ClientEventSessionUpdate) { + ClientEventSessionUpdate sessionUpdate = (ClientEventSessionUpdate) event; + VoiceLiveSessionOptions session = sessionUpdate.getSession(); + if (session != null) { + if (session.getInputAudioFormat() != null) { + inputAudioFormat.set(session.getInputAudioFormat().toString()); + } + if (session.getOutputAudioFormat() != null) { + outputAudioFormat.set(session.getOutputAudioFormat().toString()); + } + if (session.getInputAudioSamplingRate() != null) { + inputAudioSamplingRate = session.getInputAudioSamplingRate().longValue(); + } + } + } + + // Parse session config from session.update JSON for tracking on connect span + if (jsonPayload != null && EVENT_TYPE_SESSION_UPDATE.equals(extractEventType(jsonPayload))) { + parseAndTrackSessionUpdateConfig(jsonPayload); + } + } + + /** + * Parses session.update JSON payload and tracks config attributes on the connect span. + * These are accumulated and applied when the connect span ends. + */ + private void parseAndTrackSessionUpdateConfig(String jsonPayload) { + if (jsonPayload == null) { + return; + } + // Extract instruction + String instructions = extractJsonStringField(jsonPayload, "instructions"); + if (instructions != null) { + this.systemInstructions = instructions; + } + // Extract temperature + String temperature = extractJsonNumberField(jsonPayload, "temperature"); + if (temperature != null) { + this.requestTemperature = temperature; + } + // Extract max_response_output_tokens + String maxTokens = extractJsonNumberField(jsonPayload, "max_response_output_tokens"); + if (maxTokens != null) { + this.requestMaxOutputTokens = maxTokens; + } + // Extract input_audio_sampling_rate + String samplingRate = extractJsonNumberField(jsonPayload, "input_audio_sampling_rate"); + if (samplingRate != null) { + try { + this.inputAudioSamplingRate = Long.parseLong(samplingRate.split("\\.")[0]); + } catch (NumberFormatException e) { + // ignore + } + } + // Extract input_audio_format + String inAudioFormat = extractJsonStringField(jsonPayload, "input_audio_format"); + if (inAudioFormat != null) { + this.inputAudioFormat.set(inAudioFormat); + } + // Extract output_audio_format + String outAudioFormat = extractJsonStringField(jsonPayload, "output_audio_format"); + if (outAudioFormat != null) { + this.outputAudioFormat.set(outAudioFormat); + } + // Extract tools array + int toolsIndex = jsonPayload.indexOf("\"tools\":"); + if (toolsIndex >= 0) { + int arrayStart = jsonPayload.indexOf('[', toolsIndex); + if (arrayStart >= 0) { + int depth = 0; + for (int i = arrayStart; i < jsonPayload.length(); i++) { + char c = jsonPayload.charAt(i); + if (c == '[' || c == '{') { + depth++; + } else if (c == ']' || c == '}') { + depth--; + if (depth == 0) { + this.requestTools = jsonPayload.substring(arrayStart, i + 1); + break; + } + } + } + } + } + } + + /** + * Extracts a string field value from a JSON string without fully parsing it. + */ + private static String extractJsonStringField(String json, String fieldName) { + String key = "\"" + fieldName + "\":\""; + int idx = json.indexOf(key); + if (idx < 0) { + return null; + } + int start = idx + key.length(); + int end = start; + while (end < json.length()) { + char c = json.charAt(end); + if (c == '\\') { + end += 2; // skip escaped char + } else if (c == '"') { + break; + } else { + end++; + } + } + if (end > start) { + return json.substring(start, end).replace("\\\"", "\"").replace("\\\\", "\\"); + } + return null; + } + + /** + * Extracts a numeric field value from a JSON string without fully parsing it. + */ + private static String extractJsonNumberField(String json, String fieldName) { + String key = "\"" + fieldName + "\":"; + int idx = json.indexOf(key); + if (idx < 0) { + return null; + } + int start = idx + key.length(); + // Skip whitespace + while (start < json.length() && json.charAt(start) == ' ') { + start++; + } + if (start >= json.length()) { + return null; + } + int end = start; + while (end < json.length()) { + char c = json.charAt(end); + if (Character.isDigit(c) || c == '.' || c == '-') { + end++; + } else { + break; + } + } + if (end > start) { + return json.substring(start, end); + } + return null; + } + + private void trackRecvCounters(SessionUpdate update) { + // Track session ID from session.created / session.updated + if (update instanceof SessionUpdateSessionCreated) { + VoiceLiveSessionResponse session = ((SessionUpdateSessionCreated) update).getSession(); + if (session != null && session.getId() != null) { + sessionId.set(session.getId()); + } + // Track agent info if present + if (session != null && session.getAgent() != null) { + com.azure.ai.voicelive.models.RespondingAgentOptions agent = session.getAgent(); + if (agent.getAgentId() != null) { + agentId.set(agent.getAgentId()); + } + if (agent.getThreadId() != null) { + agentThreadId.set(agent.getThreadId()); + } + } + } + if (update instanceof SessionUpdateSessionUpdated) { + VoiceLiveSessionResponse session = ((SessionUpdateSessionUpdated) update).getSession(); + if (session != null && session.getId() != null) { + sessionId.set(session.getId()); + } + } + + // Track audio format from session.created / session.updated responses + if (update instanceof SessionUpdateSessionCreated) { + updateAudioFormatsFromResponse(((SessionUpdateSessionCreated) update).getSession()); + } + if (update instanceof SessionUpdateSessionUpdated) { + updateAudioFormatsFromResponse(((SessionUpdateSessionUpdated) update).getSession()); + } + + // Track audio bytes received from response.audio.delta + if (update instanceof SessionUpdateResponseAudioDelta) { + SessionUpdateResponseAudioDelta audioDelta = (SessionUpdateResponseAudioDelta) update; + byte[] delta = audioDelta.getDelta(); + if (delta != null) { + audioBytesReceived.addAndGet(delta.length); + } + + // First-token latency: measure from response.create to first audio delta + long createTs = responseCreateTimestampNanos.get(); + if (createTs > 0 && firstTokenLatencyMs.get() < 0) { + long elapsed = (System.nanoTime() - createTs) / 1_000_000; + firstTokenLatencyMs.compareAndSet(-1, elapsed); + } + } + + // Track turn count from response.done + if (update instanceof SessionUpdateResponseDone) { + turnCount.incrementAndGet(); + + // Track conversation_id from response.done + SessionResponse response = ((SessionUpdateResponseDone) update).getResponse(); + if (response != null && response.getConversationId() != null) { + conversationId.set(response.getConversationId()); + } + } + + // Track conversation_id from response.created + if (update instanceof SessionUpdateResponseCreated) { + SessionResponse response = ((SessionUpdateResponseCreated) update).getResponse(); + if (response != null && response.getConversationId() != null) { + conversationId.set(response.getConversationId()); + } + } + } + + /** + * Tracks response metadata (id, conversation_id, finish_reasons) on the recv span. + */ + private void trackResponseMetadata(SessionUpdate update, Span span) { + if (update instanceof SessionUpdateResponseDone) { + SessionResponse response = ((SessionUpdateResponseDone) update).getResponse(); + if (response != null) { + if (response.getId() != null) { + span.setAttribute(GEN_AI_RESPONSE_ID, response.getId()); + } + if (response.getConversationId() != null) { + span.setAttribute(GEN_AI_CONVERSATION_ID, response.getConversationId()); + } + if (response.getStatus() != null) { + String fr = "[\"" + response.getStatus().toString() + "\"]"; + span.setAttribute(GEN_AI_RESPONSE_FINISH_REASONS, fr); + lastFinishReasons.set(fr); + } + // Accumulate last response.id for connect span + lastResponseId.set(response.getId()); + } + } + if (update instanceof SessionUpdateResponseCreated) { + SessionResponse response = ((SessionUpdateResponseCreated) update).getResponse(); + if (response != null) { + if (response.getId() != null) { + span.setAttribute(GEN_AI_RESPONSE_ID, response.getId()); + } + if (response.getConversationId() != null) { + span.setAttribute(GEN_AI_CONVERSATION_ID, response.getConversationId()); + } + } + } + } + + /** + * Tracks item_id, call_id, output_index, and response_id on recv spans for + * conversation.item.created, response.output_item.added/done, + * and response.function_call_arguments.delta/done events. + */ + private void trackRecvItemAttributes(SessionUpdate update, Span span) { + // conversation.item.created -> item_id + if (update instanceof SessionUpdateConversationItemCreated) { + SessionUpdateConversationItemCreated itemCreated = (SessionUpdateConversationItemCreated) update; + if (itemCreated.getItem() != null && itemCreated.getItem().getId() != null) { + span.setAttribute(GEN_AI_VOICE_ITEM_ID, itemCreated.getItem().getId()); + } + } + // response.output_item.added -> item_id, response_id, output_index, conversation_id + if (update instanceof SessionUpdateResponseOutputItemAdded) { + SessionUpdateResponseOutputItemAdded outputAdded = (SessionUpdateResponseOutputItemAdded) update; + if (outputAdded.getItem() != null && outputAdded.getItem().getId() != null) { + span.setAttribute(GEN_AI_VOICE_ITEM_ID, outputAdded.getItem().getId()); + } + if (outputAdded.getResponseId() != null) { + span.setAttribute(GEN_AI_RESPONSE_ID, outputAdded.getResponseId()); + } + span.setAttribute(GEN_AI_VOICE_OUTPUT_INDEX, (long) outputAdded.getOutputIndex()); + } + // response.output_item.done -> item_id, response_id, output_index + if (update instanceof SessionUpdateResponseOutputItemDone) { + SessionUpdateResponseOutputItemDone outputDone = (SessionUpdateResponseOutputItemDone) update; + if (outputDone.getItem() != null && outputDone.getItem().getId() != null) { + span.setAttribute(GEN_AI_VOICE_ITEM_ID, outputDone.getItem().getId()); + } + if (outputDone.getResponseId() != null) { + span.setAttribute(GEN_AI_RESPONSE_ID, outputDone.getResponseId()); + } + span.setAttribute(GEN_AI_VOICE_OUTPUT_INDEX, (long) outputDone.getOutputIndex()); + } + // response.function_call_arguments.delta -> item_id, response_id, call_id, output_index + if (update instanceof SessionUpdateResponseFunctionCallArgumentsDelta) { + SessionUpdateResponseFunctionCallArgumentsDelta delta + = (SessionUpdateResponseFunctionCallArgumentsDelta) update; + if (delta.getItemId() != null) { + span.setAttribute(GEN_AI_VOICE_ITEM_ID, delta.getItemId()); + } + if (delta.getResponseId() != null) { + span.setAttribute(GEN_AI_RESPONSE_ID, delta.getResponseId()); + } + if (delta.getCallId() != null) { + span.setAttribute(GEN_AI_VOICE_CALL_ID, delta.getCallId()); + } + span.setAttribute(GEN_AI_VOICE_OUTPUT_INDEX, (long) delta.getOutputIndex()); + } + // response.function_call_arguments.done -> item_id, response_id, call_id, output_index + if (update instanceof SessionUpdateResponseFunctionCallArgumentsDone) { + SessionUpdateResponseFunctionCallArgumentsDone done + = (SessionUpdateResponseFunctionCallArgumentsDone) update; + if (done.getItemId() != null) { + span.setAttribute(GEN_AI_VOICE_ITEM_ID, done.getItemId()); + } + if (done.getResponseId() != null) { + span.setAttribute(GEN_AI_RESPONSE_ID, done.getResponseId()); + } + if (done.getCallId() != null) { + span.setAttribute(GEN_AI_VOICE_CALL_ID, done.getCallId()); + } + span.setAttribute(GEN_AI_VOICE_OUTPUT_INDEX, (long) done.getOutputIndex()); + } + } + + private void trackRecvTokenUsage(SessionUpdate update, Span span) { + if (update instanceof SessionUpdateResponseDone) { + SessionResponse response = ((SessionUpdateResponseDone) update).getResponse(); + if (response != null) { + ResponseTokenStatistics usage = response.getUsage(); + if (usage != null) { + span.setAttribute(GEN_AI_USAGE_INPUT_TOKENS, (long) usage.getInputTokens()); + span.setAttribute(GEN_AI_USAGE_OUTPUT_TOKENS, (long) usage.getOutputTokens()); + } + } + } + } + + private void trackErrorEvents(SessionUpdate update, Span span) { + if (update instanceof SessionUpdateError) { + SessionUpdateError errorUpdate = (SessionUpdateError) update; + SessionUpdateErrorDetails errorDetails = errorUpdate.getError(); + if (errorDetails != null) { + // Note: We add an error event but do NOT set error status on the span. + // Error status is only set on endConnectSpan() when passed a Throwable. + AttributesBuilder errorAttrs = Attributes.builder().put(GEN_AI_SYSTEM, GEN_AI_SYSTEM_VALUE); + if (errorDetails.getCode() != null) { + errorAttrs.put(ERROR_CODE, errorDetails.getCode()); + } + if (errorDetails.getMessage() != null) { + errorAttrs.put(ERROR_MESSAGE, errorDetails.getMessage()); + } + span.addEvent(GEN_AI_VOICE_ERROR, errorAttrs.build()); + } + } + } + + private void updateAudioFormatsFromResponse(VoiceLiveSessionResponse session) { + if (session == null) { + return; + } + if (session.getInputAudioFormat() != null) { + inputAudioFormat.set(session.getInputAudioFormat().toString()); + } + if (session.getOutputAudioFormat() != null) { + outputAudioFormat.set(session.getOutputAudioFormat().toString()); + } + if (session.getInputAudioSamplingRate() != null) { + inputAudioSamplingRate = session.getInputAudioSamplingRate().longValue(); + } + } +} diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/telemetry/VoiceLiveTelemetryAttributeKeys.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/telemetry/VoiceLiveTelemetryAttributeKeys.java new file mode 100644 index 000000000000..4f0b90bccd5a --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/telemetry/VoiceLiveTelemetryAttributeKeys.java @@ -0,0 +1,168 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.voicelive.telemetry; + +import io.opentelemetry.api.common.AttributeKey; + +/** + * OpenTelemetry attribute keys and semantic convention constants for VoiceLive tracing. + */ +public final class VoiceLiveTelemetryAttributeKeys { + + /** Attribute key for Azure namespace. */ + public static final AttributeKey AZ_NAMESPACE = AttributeKey.stringKey("az.namespace"); + /** Azure namespace value for Cognitive Services. */ + public static final String AZ_NAMESPACE_VALUE = "Microsoft.CognitiveServices"; + + /** Attribute key for the GenAI system identifier. */ + public static final AttributeKey GEN_AI_SYSTEM = AttributeKey.stringKey("gen_ai.system"); + /** GenAI system value for VoiceLive. */ + public static final String GEN_AI_SYSTEM_VALUE = "az.ai.voicelive"; + + /** Attribute key for the GenAI provider name. */ + public static final AttributeKey GEN_AI_PROVIDER_NAME = AttributeKey.stringKey("gen_ai.provider.name"); + /** GenAI provider value. */ + public static final String GEN_AI_PROVIDER_VALUE = "microsoft.foundry"; + + /** Attribute key for the GenAI operation name. */ + public static final AttributeKey GEN_AI_OPERATION_NAME = AttributeKey.stringKey("gen_ai.operation.name"); + /** Attribute key for the GenAI request model. */ + public static final AttributeKey GEN_AI_REQUEST_MODEL = AttributeKey.stringKey("gen_ai.request.model"); + /** Attribute key for the GenAI response ID. */ + public static final AttributeKey GEN_AI_RESPONSE_ID = AttributeKey.stringKey("gen_ai.response.id"); + /** Attribute key for the GenAI response finish reasons. */ + public static final AttributeKey GEN_AI_RESPONSE_FINISH_REASONS + = AttributeKey.stringKey("gen_ai.response.finish_reasons"); + /** Attribute key for GenAI input token usage. */ + public static final AttributeKey GEN_AI_USAGE_INPUT_TOKENS + = AttributeKey.longKey("gen_ai.usage.input_tokens"); + /** Attribute key for GenAI output token usage. */ + public static final AttributeKey GEN_AI_USAGE_OUTPUT_TOKENS + = AttributeKey.longKey("gen_ai.usage.output_tokens"); + /** Attribute key for the GenAI conversation ID. */ + public static final AttributeKey GEN_AI_CONVERSATION_ID = AttributeKey.stringKey("gen_ai.conversation.id"); + /** Attribute key for GenAI system instructions. */ + public static final AttributeKey GEN_AI_SYSTEM_INSTRUCTIONS + = AttributeKey.stringKey("gen_ai.system_instructions"); + /** Attribute key for GenAI request temperature. */ + public static final AttributeKey GEN_AI_REQUEST_TEMPERATURE + = AttributeKey.stringKey("gen_ai.request.temperature"); + /** Attribute key for GenAI request max output tokens. */ + public static final AttributeKey GEN_AI_REQUEST_MAX_OUTPUT_TOKENS + = AttributeKey.stringKey("gen_ai.request.max_output_tokens"); + /** Attribute key for GenAI request tools. */ + public static final AttributeKey GEN_AI_REQUEST_TOOLS = AttributeKey.stringKey("gen_ai.request.tools"); + + /** Attribute key for GenAI agent name. */ + public static final AttributeKey GEN_AI_AGENT_NAME = AttributeKey.stringKey("gen_ai.agent.name"); + /** Attribute key for GenAI agent ID. */ + public static final AttributeKey GEN_AI_AGENT_ID = AttributeKey.stringKey("gen_ai.agent.id"); + /** Attribute key for GenAI agent thread ID. */ + public static final AttributeKey GEN_AI_AGENT_THREAD_ID = AttributeKey.stringKey("gen_ai.agent.thread_id"); + /** Attribute key for GenAI agent version. */ + public static final AttributeKey GEN_AI_AGENT_VERSION = AttributeKey.stringKey("gen_ai.agent.version"); + /** Attribute key for GenAI agent project name. */ + public static final AttributeKey GEN_AI_AGENT_PROJECT_NAME + = AttributeKey.stringKey("gen_ai.agent.project_name"); + + /** Attribute key for server address. */ + public static final AttributeKey SERVER_ADDRESS = AttributeKey.stringKey("server.address"); + /** Attribute key for server port. */ + public static final AttributeKey SERVER_PORT = AttributeKey.longKey("server.port"); + + /** Attribute key for voice session ID. */ + public static final AttributeKey GEN_AI_VOICE_SESSION_ID + = AttributeKey.stringKey("gen_ai.voice.session_id"); + /** Attribute key for voice call ID. */ + public static final AttributeKey GEN_AI_VOICE_CALL_ID = AttributeKey.stringKey("gen_ai.voice.call_id"); + /** Attribute key for voice item ID. */ + public static final AttributeKey GEN_AI_VOICE_ITEM_ID = AttributeKey.stringKey("gen_ai.voice.item_id"); + /** Attribute key for the previous voice item ID. */ + public static final AttributeKey GEN_AI_VOICE_PREVIOUS_ITEM_ID + = AttributeKey.stringKey("gen_ai.voice.previous_item_id"); + /** Attribute key for voice output index. */ + public static final AttributeKey GEN_AI_VOICE_OUTPUT_INDEX + = AttributeKey.longKey("gen_ai.voice.output_index"); + /** Attribute key for voice input sample rate. */ + public static final AttributeKey GEN_AI_VOICE_INPUT_SAMPLE_RATE + = AttributeKey.longKey("gen_ai.voice.input_sample_rate"); + /** Attribute key for voice input audio format. */ + public static final AttributeKey GEN_AI_VOICE_INPUT_AUDIO_FORMAT + = AttributeKey.stringKey("gen_ai.voice.input_audio_format"); + /** Attribute key for voice output audio format. */ + public static final AttributeKey GEN_AI_VOICE_OUTPUT_AUDIO_FORMAT + = AttributeKey.stringKey("gen_ai.voice.output_audio_format"); + /** Attribute key for voice turn count. */ + public static final AttributeKey GEN_AI_VOICE_TURN_COUNT = AttributeKey.longKey("gen_ai.voice.turn_count"); + /** Attribute key for voice interruption count. */ + public static final AttributeKey GEN_AI_VOICE_INTERRUPTION_COUNT + = AttributeKey.longKey("gen_ai.voice.interruption_count"); + /** Attribute key for voice audio bytes sent. */ + public static final AttributeKey GEN_AI_VOICE_AUDIO_BYTES_SENT + = AttributeKey.longKey("gen_ai.voice.audio_bytes_sent"); + /** Attribute key for voice audio bytes received. */ + public static final AttributeKey GEN_AI_VOICE_AUDIO_BYTES_RECEIVED + = AttributeKey.longKey("gen_ai.voice.audio_bytes_received"); + /** Attribute key for voice first token latency in milliseconds. */ + public static final AttributeKey GEN_AI_VOICE_FIRST_TOKEN_LATENCY_MS + = AttributeKey.doubleKey("gen_ai.voice.first_token_latency_ms"); + /** Attribute key for voice event type. */ + public static final AttributeKey GEN_AI_VOICE_EVENT_TYPE + = AttributeKey.stringKey("gen_ai.voice.event_type"); + /** Attribute key for voice message size. */ + public static final AttributeKey GEN_AI_VOICE_MESSAGE_SIZE + = AttributeKey.longKey("gen_ai.voice.message_size"); + + /** Attribute key for MCP server label. */ + public static final AttributeKey GEN_AI_VOICE_MCP_SERVER_LABEL + = AttributeKey.stringKey("gen_ai.voice.mcp.server_label"); + /** Attribute key for MCP tool name. */ + public static final AttributeKey GEN_AI_VOICE_MCP_TOOL_NAME + = AttributeKey.stringKey("gen_ai.voice.mcp.tool_name"); + /** Attribute key for MCP approval request ID. */ + public static final AttributeKey GEN_AI_VOICE_MCP_APPROVAL_REQUEST_ID + = AttributeKey.stringKey("gen_ai.voice.mcp.approval_request_id"); + /** Attribute key for MCP approval flag. */ + public static final AttributeKey GEN_AI_VOICE_MCP_APPROVE + = AttributeKey.booleanKey("gen_ai.voice.mcp.approve"); + /** Attribute key for MCP call count. */ + public static final AttributeKey GEN_AI_VOICE_MCP_CALL_COUNT + = AttributeKey.longKey("gen_ai.voice.mcp.call_count"); + /** Attribute key for MCP list tools count. */ + public static final AttributeKey GEN_AI_VOICE_MCP_LIST_TOOLS_COUNT + = AttributeKey.longKey("gen_ai.voice.mcp.list_tools_count"); + + /** Attribute key for error type. */ + public static final AttributeKey ERROR_TYPE = AttributeKey.stringKey("error.type"); + /** Attribute key for error message. */ + public static final AttributeKey ERROR_MESSAGE = AttributeKey.stringKey("error.message"); + /** Attribute key for error code. */ + public static final AttributeKey ERROR_CODE = AttributeKey.stringKey("error.code"); + /** Attribute key for GenAI event content. */ + public static final AttributeKey GEN_AI_EVENT_CONTENT = AttributeKey.stringKey("gen_ai.event.content"); + /** Attribute key for voice rate limits. */ + public static final AttributeKey GEN_AI_VOICE_RATE_LIMITS + = AttributeKey.stringKey("gen_ai.voice.rate_limits"); + /** Attribute key for GenAI token type. */ + public static final AttributeKey GEN_AI_TOKEN_TYPE = AttributeKey.stringKey("gen_ai.token.type"); + + /** Span event name for GenAI input messages. */ + public static final String GEN_AI_INPUT_MESSAGES = "gen_ai.input.messages"; + /** Span event name for GenAI output messages. */ + public static final String GEN_AI_OUTPUT_MESSAGES = "gen_ai.output.messages"; + /** Span event name for GenAI system instructions. */ + public static final String GEN_AI_SYSTEM_INSTRUCTIONS_EVENT = "gen_ai.system.instructions"; + /** Span event name for voice error. */ + public static final String GEN_AI_VOICE_ERROR = "gen_ai.voice.error"; + /** Span event name for voice rate limits updated. */ + public static final String GEN_AI_VOICE_RATE_LIMITS_UPDATED = "gen_ai.voice.rate_limits.updated"; + + /** Metric name for GenAI client operation duration. */ + public static final String GEN_AI_CLIENT_OPERATION_DURATION = "gen_ai.client.operation.duration"; + /** Metric name for GenAI client token usage. */ + public static final String GEN_AI_CLIENT_TOKEN_USAGE = "gen_ai.client.token.usage"; + + private VoiceLiveTelemetryAttributeKeys() { + } +} diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/telemetry/VoiceLiveTelemetryUtils.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/telemetry/VoiceLiveTelemetryUtils.java new file mode 100644 index 000000000000..123a1bb7c3b0 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/telemetry/VoiceLiveTelemetryUtils.java @@ -0,0 +1,193 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.voicelive.telemetry; + +import com.azure.core.util.BinaryData; +import com.azure.core.util.logging.ClientLogger; +import com.azure.core.util.serializer.TypeReference; + +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.Collections; +import java.util.Locale; +import java.util.Map; + +/** + * Utility methods for VoiceLive telemetry payload parsing and conversion. + */ +public final class VoiceLiveTelemetryUtils { + + private static final ClientLogger LOGGER = new ClientLogger(VoiceLiveTelemetryUtils.class); + private static final TypeReference> MAP_TYPE = new TypeReference>() { + }; + + /** + * Parse a JSON string into a Map. + * + * @param json The JSON string to parse. + * @return A Map representation of the JSON, or empty map if json is null/empty/invalid. + */ + public static Map parseJsonObject(String json) { + if (json == null || json.isEmpty()) { + return Collections.emptyMap(); + } + try { + Map parsed = BinaryData.fromString(json).toObject(MAP_TYPE); + return parsed == null ? Collections.emptyMap() : parsed; + } catch (RuntimeException ex) { + LOGGER.atVerbose().log("Failed to parse telemetry payload as JSON", ex); + return Collections.emptyMap(); + } + } + + /** + * Get a nested map value. + * + * @param value The object to cast. + * @return A Map, or null if value is not a map. + */ + @SuppressWarnings("unchecked") + public static Map getMap(Object value) { + return value instanceof Map ? (Map) value : null; + } + + /** + * Convert value to String. + * + * @param value The object to convert. + * @return String representation or null if value is null. + */ + public static String getString(Object value) { + return value == null ? null : String.valueOf(value); + } + + /** + * Convert value to Long. + * + * @param value The object to convert. + * @return Long value or null if value is not a number or parseable long. + */ + public static Long getLong(Object value) { + if (value instanceof Number) { + return ((Number) value).longValue(); + } + if (value instanceof String) { + try { + return Long.parseLong(value.toString()); + } catch (NumberFormatException ignored) { + return null; + } + } + return null; + } + + /** + * Convert value to Boolean. + * + * @param value The object to convert. + * @return Boolean value or null if value is not boolean or parseable. + */ + public static Boolean getBoolean(Object value) { + if (value instanceof Boolean) { + return (Boolean) value; + } + if (value instanceof String) { + return Boolean.parseBoolean(value.toString()); + } + return null; + } + + /** + * Parse a JSON value string (handles nested objects). + * + * @param value The JSON value string. + * @return The parsed object, or the original string if parse fails. + */ + public static Object parseJsonValue(String value) { + if (value == null) { + return null; + } + try { + return BinaryData.fromString(value).toObject(Object.class); + } catch (RuntimeException ex) { + return value; + } + } + + /** + * Serialize an object to JSON. + * + * @param value The object to serialize. + * @return JSON string representation. + */ + public static String serializeJson(Object value) { + return BinaryData.fromObject(value).toString(); + } + + /** + * Return the first non-blank string. + * + * @param first First candidate. + * @param second Second candidate. + * @return The first non-blank string, or null. + */ + public static String firstNonBlank(String first, String second) { + if (first != null && !first.trim().isEmpty()) { + return first; + } + if (second != null && !second.trim().isEmpty()) { + return second; + } + return null; + } + + /** + * Get the byte length of a string payload in UTF-8. + * + * @param payload The payload string. + * @return The UTF-8 byte length, or 0 if null. + */ + public static long messageSize(String payload) { + return payload == null ? 0 : payload.getBytes(StandardCharsets.UTF_8).length; + } + + /** + * Decode a base64 string and return the byte length. + * + * @param value The base64 string. + * @return The decoded byte length, or UTF-8 byte length if decode fails. + */ + public static long base64Length(String value) { + if (value == null) { + return 0; + } + try { + return Base64.getDecoder().decode(value).length; + } catch (IllegalArgumentException ex) { + LOGGER.atVerbose().log("Failed to decode base64 payload for telemetry byte counting", ex); + return value.getBytes(StandardCharsets.UTF_8).length; + } + } + + /** + * Get the default port for a scheme. + * + * @param scheme The URI scheme (http, https, ws, wss, etc). + * @return The default port (80 for http/ws, 443 for others). + */ + public static long defaultPort(String scheme) { + if (scheme == null) { + return 443; + } + String normalized = scheme.toLowerCase(Locale.ROOT); + if ("http".equals(normalized) || "ws".equals(normalized)) { + return 80; + } + return 443; + } + + // Prevent instantiation + private VoiceLiveTelemetryUtils() { + } +} diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/telemetry/package-info.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/telemetry/package-info.java new file mode 100644 index 000000000000..0d467fb8ec2f --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/com/azure/ai/voicelive/telemetry/package-info.java @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +/** + * Package containing telemetry support classes for VoiceLive. + */ +package com.azure.ai.voicelive.telemetry; diff --git a/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java b/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java index 6e615f120e95..dd36de7b02f6 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java +++ b/sdk/voicelive/azure-ai-voicelive/src/main/java/module-info.java @@ -9,6 +9,8 @@ requires io.netty.buffer; requires io.netty.codec.http; requires io.netty.resolver; + requires transitive io.opentelemetry.api; + requires io.opentelemetry.context; exports com.azure.ai.voicelive; exports com.azure.ai.voicelive.models; diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/AgentV2Sample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/AgentV2Sample.java index b2663e485df7..91b8cd5f1018 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/AgentV2Sample.java +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/AgentV2Sample.java @@ -5,6 +5,8 @@ import com.azure.ai.voicelive.models.AgentSessionConfig; import com.azure.ai.voicelive.models.AudioEchoCancellation; +import com.azure.ai.voicelive.models.AudioInputTranscriptionOptions; +import com.azure.ai.voicelive.models.AudioInputTranscriptionOptionsModel; import com.azure.ai.voicelive.models.AudioNoiseReduction; import com.azure.ai.voicelive.models.AudioNoiseReductionType; import com.azure.ai.voicelive.models.AzureStandardVoice; @@ -175,7 +177,11 @@ private static void runAssistant() { // Setup shutdown hook for graceful termination Runtime.getRuntime().addShutdownHook(new Thread(() -> { - System.out.println("\nVoice assistant shut down. Goodbye!"); + try { + System.out.println("\nVoice assistant shut down. Goodbye!"); + } catch (Exception ignored) { + // jansi/Maven may throw during shutdown + } assistant.shutdown(); })); @@ -314,7 +320,10 @@ private void configureSession() { .setTurnDetection(turnDetection) // Audio quality enhancements .setInputAudioEchoCancellation(new AudioEchoCancellation()) - .setInputAudioNoiseReduction(new AudioNoiseReduction(AudioNoiseReductionType.AZURE_DEEP_NOISE_SUPPRESSION)); + .setInputAudioNoiseReduction(new AudioNoiseReduction(AudioNoiseReductionType.NEAR_FIELD)) + .setInputAudioTranscription( + new AudioInputTranscriptionOptions(AudioInputTranscriptionOptionsModel.AZURE_SPEECH) + ); // Uncomment to enable interim responses // .setInterimResponse(BinaryData.fromObject(interimResponseConfig)); diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/FunctionCallingSample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/FunctionCallingSample.java index bdc51e92da0c..f638021f6381 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/FunctionCallingSample.java +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/FunctionCallingSample.java @@ -6,6 +6,8 @@ import com.azure.ai.voicelive.models.AudioEchoCancellation; import com.azure.ai.voicelive.models.AudioInputTranscriptionOptions; import com.azure.ai.voicelive.models.AudioInputTranscriptionOptionsModel; +import com.azure.ai.voicelive.models.AudioNoiseReduction; +import com.azure.ai.voicelive.models.AudioNoiseReductionType; import com.azure.ai.voicelive.models.ClientEventConversationItemCreate; import com.azure.ai.voicelive.models.ClientEventResponseCreate; import com.azure.ai.voicelive.models.ClientEventSessionUpdate; @@ -88,7 +90,6 @@ public final class FunctionCallingSample { // Service configuration - private static final String DEFAULT_API_VERSION = "2025-10-01"; private static final String DEFAULT_MODEL = "gpt-4o-realtime-preview"; private static final String ENV_ENDPOINT = "AZURE_VOICELIVE_ENDPOINT"; private static final String ENV_API_KEY = "AZURE_VOICELIVE_API_KEY"; @@ -129,15 +130,14 @@ public static void main(String[] args) { System.out.println("🎤️ Voice Assistant with Function Calling - Azure VoiceLive SDK"); System.out.println(separator); - // Create client - KeyCredential credential = new KeyCredential(apiKey); - VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() - .endpoint(endpoint) - .credential(credential) - .serviceVersion(VoiceLiveServiceVersion.V2025_10_01) - .buildAsyncClient(); - try { + // Create client + KeyCredential credential = new KeyCredential(apiKey); + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(credential) + .buildAsyncClient(); + runFunctionCallingSession(client); } catch (Exception e) { System.err.println("Error running function calling sample: " + e.getMessage()); @@ -256,6 +256,7 @@ private static ClientEventSessionUpdate createSessionConfigWithFunctions() { .setOutputAudioFormat(OutputAudioFormat.PCM16) .setInputAudioSamplingRate(SAMPLE_RATE) .setInputAudioEchoCancellation(new AudioEchoCancellation()) + .setInputAudioNoiseReduction(new AudioNoiseReduction(AudioNoiseReductionType.NEAR_FIELD)) .setTurnDetection(new ServerVadTurnDetection() .setThreshold(0.5) .setPrefixPaddingMs(300) diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/MCPSample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/MCPSample.java index 1f4d5bb9d4e2..fcfb6a65151f 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/MCPSample.java +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/MCPSample.java @@ -75,8 +75,8 @@ * *

Try asking:

*
    - *
  • "Can you summary github repo azure sdk for java?"
  • - *
  • "Can you summary azure docs about voice live?"
  • + *
  • "Can you summarize the Azure SDK for Java GitHub repo?"
  • + *
  • "Can you summarize Azure docs about VoiceLive?"
  • *
*/ public final class MCPSample { diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java index b006cc42c402..fb15da397063 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/ReadmeSamples.java @@ -3,6 +3,7 @@ package com.azure.ai.voicelive; +import com.azure.ai.voicelive.models.AgentSessionConfig; import com.azure.ai.voicelive.models.AudioEchoCancellation; import com.azure.ai.voicelive.models.AudioInputTranscriptionOptions; import com.azure.ai.voicelive.models.AudioInputTranscriptionOptionsModel; @@ -13,7 +14,12 @@ import com.azure.ai.voicelive.models.ClientEventResponseCreate; import com.azure.ai.voicelive.models.FunctionCallOutputItem; import com.azure.ai.voicelive.models.ItemType; +import com.azure.ai.voicelive.models.MCPApprovalResponseRequestItem; +import com.azure.ai.voicelive.models.MCPApprovalType; +import com.azure.ai.voicelive.models.MCPServer; import com.azure.ai.voicelive.models.ResponseFunctionCallItem; +import com.azure.ai.voicelive.models.ResponseMCPApprovalRequestItem; +import com.azure.ai.voicelive.models.SessionResponseItem; import com.azure.ai.voicelive.models.SessionUpdateConversationItemCreated; import com.azure.ai.voicelive.models.VoiceLiveFunctionDefinition; import com.azure.ai.voicelive.models.AzurePersonalVoice; @@ -30,6 +36,7 @@ import com.azure.ai.voicelive.models.SessionUpdate; import com.azure.ai.voicelive.models.SessionUpdateError; import com.azure.ai.voicelive.models.SessionUpdateResponseAudioDelta; +import com.azure.ai.voicelive.models.SessionUpdateResponseOutputItemDone; import com.azure.ai.voicelive.models.SessionUpdateSessionUpdated; import com.azure.ai.voicelive.models.VoiceLiveSessionOptions; import com.azure.core.credential.AzureKeyCredential; @@ -38,6 +45,7 @@ import com.azure.identity.AzureCliCredentialBuilder; import com.azure.identity.DefaultAzureCredentialBuilder; import com.fasterxml.jackson.databind.ObjectMapper; +import io.opentelemetry.api.OpenTelemetry; import reactor.core.publisher.Mono; import java.io.IOException; @@ -402,6 +410,120 @@ public void functionCalling() { // END: com.azure.ai.voicelive.functioncalling } + /** + * Tracing: automatic via GlobalOpenTelemetry + */ + public void tracingAutomatic() { + // BEGIN: com.azure.ai.voicelive.tracing.automatic + // No special configuration needed — tracing is picked up from GlobalOpenTelemetry + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .buildAsyncClient(); + // END: com.azure.ai.voicelive.tracing.automatic + } + + /** + * Tracing: explicit OpenTelemetry instance + */ + public void tracingExplicit() { + OpenTelemetry otel = OpenTelemetry.noop(); // Replace with your configured OpenTelemetry SDK instance + + // BEGIN: com.azure.ai.voicelive.tracing.explicit + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .openTelemetry(otel) + .buildAsyncClient(); + // END: com.azure.ai.voicelive.tracing.explicit + } + + /** + * Tracing: enable content recording + */ + public void tracingContentRecording() { + OpenTelemetry otel = OpenTelemetry.noop(); // Replace with your configured OpenTelemetry SDK instance + + // BEGIN: com.azure.ai.voicelive.tracing.contentrecording + // Enable content recording to capture full JSON payloads in span events + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .openTelemetry(otel) + .enableContentRecording(true) + .buildAsyncClient(); + + // Or via environment variables (no code changes needed): + // OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true + // (legacy fallback) AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED=true + // END: com.azure.ai.voicelive.tracing.contentrecording + } + + /** + * Sample for MCP tool integration + */ + public void mcpToolIntegration() { + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new AzureKeyCredential(apiKey)) + .buildAsyncClient(); + + VoiceLiveSessionAsyncClient session = client.startSession("gpt-4o-realtime-preview").block(); + + // BEGIN: com.azure.ai.voicelive.mcp + // Configure MCP servers as tools + MCPServer mcpServer = new MCPServer("deepwiki", "https://mcp.deepwiki.com/mcp") + .setRequireApproval(BinaryData.fromObject(MCPApprovalType.ALWAYS)); + + VoiceLiveSessionOptions options = new VoiceLiveSessionOptions() + .setTools(Arrays.asList(mcpServer)) + .setInstructions("You have access to external tools via MCP. Use them when asked."); + + // Handle MCP approval requests in your event loop + session.receiveEvents().subscribe(event -> { + if (event instanceof SessionUpdateResponseOutputItemDone) { + SessionUpdateResponseOutputItemDone itemDone = (SessionUpdateResponseOutputItemDone) event; + SessionResponseItem item = itemDone.getItem(); + + if (item instanceof ResponseMCPApprovalRequestItem) { + // Approve the tool call + ResponseMCPApprovalRequestItem approvalRequest = (ResponseMCPApprovalRequestItem) item; + MCPApprovalResponseRequestItem approval = new MCPApprovalResponseRequestItem( + approvalRequest.getId(), true); + ClientEventConversationItemCreate createItem = new ClientEventConversationItemCreate() + .setItem(approval); + session.sendEvent(createItem).subscribe(); + session.sendEvent(new ClientEventResponseCreate()).subscribe(); + } + } + }); + // END: com.azure.ai.voicelive.mcp + } + + /** + * Sample for Azure AI Foundry agent session + */ + public void agentSession() { + // BEGIN: com.azure.ai.voicelive.agentsession + // Configure agent connection + AgentSessionConfig agentConfig = new AgentSessionConfig("my-agent", "my-project") + .setAgentVersion("1.0"); + + // Start session with agent config (uses DefaultAzureCredential) + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new DefaultAzureCredentialBuilder().build()) + .buildAsyncClient(); + + client.startSession(agentConfig) + .flatMap(session -> { + session.receiveEvents().subscribe(event -> handleEvent(event)); + return Mono.just(session); + }) + .block(); + // END: com.azure.ai.voicelive.agentsession + } + // Helper methods private Object parametersSchema = new Object(); diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java index 133b4ae21f32..670a9e8ea7cc 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/VoiceAssistantSample.java @@ -89,7 +89,6 @@ public final class VoiceAssistantSample { // Service configuration constants - private static final String DEFAULT_API_VERSION = "2025-10-01"; private static final String DEFAULT_MODEL = "gpt-4o-realtime-preview"; // Environment variable names @@ -364,7 +363,6 @@ public static void main(String[] args) { for (String arg : args) { if ("--use-token-credential".equals(arg)) { useTokenCredential = true; - break; } } @@ -465,7 +463,6 @@ private static void runVoiceAssistant(String endpoint, KeyCredential credential) VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() .endpoint(endpoint) .credential(credential) - .serviceVersion(VoiceLiveServiceVersion.V2025_10_01) .buildAsyncClient(); runVoiceAssistantWithClient(client); @@ -485,7 +482,6 @@ private static void runVoiceAssistant(String endpoint, TokenCredential credentia VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() .endpoint(endpoint) .credential(credential) - .serviceVersion(VoiceLiveServiceVersion.V2025_10_01) .buildAsyncClient(); runVoiceAssistantWithClient(client); @@ -542,7 +538,11 @@ private static void runVoiceAssistantWithClient(VoiceLiveAsyncClient client) { // Install shutdown hook for graceful cleanup Runtime.getRuntime().addShutdownHook(new Thread(() -> { - System.out.println("\n🛑 Shutting down gracefully..."); + try { + System.out.println("\n🛑 Shutting down gracefully..."); + } catch (Exception ignored) { + // jansi may have torn down the ANSI output stream already + } audioProcessor.shutdown(); try { session.closeAsync().block(Duration.ofSeconds(5)); diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/telemetry/ExplicitTracingSample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/telemetry/ExplicitTracingSample.java new file mode 100644 index 000000000000..b879dae5c64d --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/telemetry/ExplicitTracingSample.java @@ -0,0 +1,236 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.voicelive.telemetry; + +import com.azure.ai.voicelive.VoiceLiveAsyncClient; +import com.azure.ai.voicelive.VoiceLiveClientBuilder; +import com.azure.ai.voicelive.models.ClientEventSessionUpdate; +import com.azure.ai.voicelive.models.InteractionModality; +import com.azure.ai.voicelive.models.SessionUpdateResponseDone; +import com.azure.ai.voicelive.models.VoiceLiveSessionOptions; +import com.azure.core.credential.KeyCredential; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.data.EventData; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import io.opentelemetry.sdk.trace.export.SpanExporter; +import reactor.core.publisher.Mono; + +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +/** + * Sample demonstrating how to enable OpenTelemetry tracing for VoiceLive sessions. + * + *

This runnable sample shows how to configure OpenTelemetry tracing so that every + * connect, send, recv, and close operation emits spans with voice-specific attributes.

+ * + *

Environment Variables Required:

+ *
    + *
  • {@code AZURE_VOICELIVE_ENDPOINT} — The VoiceLive service endpoint URL
  • + *
  • {@code AZURE_VOICELIVE_API_KEY} — The API key for authentication
  • + *
+ * + *

How to Run:

+ *
{@code
+ * # Basic (no tracing):
+ * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.telemetry.ExplicitTracingSample" -Dexec.classpathScope=test
+ *
+ * # With OpenTelemetry tracing enabled:
+ * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.telemetry.ExplicitTracingSample" -Dexec.classpathScope=test -Dexec.args="--enable-tracing"
+ *
+ * # With tracing + JSON payload content recording:
+ * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.telemetry.ExplicitTracingSample" -Dexec.classpathScope=test -Dexec.args="--enable-tracing --enable-recording"
+ * }
+ * + *

Span Structure:

+ * When tracing is enabled, the following span hierarchy is emitted: + *
+ * connect gpt-4o-realtime-preview        ← session lifetime
+ * ├── send session.update                ← send spans include event type
+ * ├── send response.create
+ * ├── recv session.created               ← recv spans include event type
+ * ├── recv response.audio.delta
+ * ├── recv response.done                 ← includes token usage
+ * └── close
+ * 
+ * + *

Session-level Attributes (on connect span):

+ *
    + *
  • {@code gen_ai.voice.session_id} — Voice session ID
  • + *
  • {@code gen_ai.voice.turn_count} — Completed response turns
  • + *
  • {@code gen_ai.voice.interruption_count} — User interruptions
  • + *
  • {@code gen_ai.voice.audio_bytes_sent} — Total audio payload bytes sent
  • + *
  • {@code gen_ai.voice.audio_bytes_received} — Total audio payload bytes received
  • + *
  • {@code gen_ai.voice.first_token_latency_ms} — Time to first response
  • + *
+ * + *

Alternative: Automatic tracing via GlobalOpenTelemetry

+ *

If the OpenTelemetry Java agent is attached or {@code GlobalOpenTelemetry} is configured, + * tracing works automatically with no builder configuration needed. The client defaults to + * {@code GlobalOpenTelemetry.getOrNoop()}, which is a zero-cost no-op when no SDK is present.

+ * + *

Alternative: Azure Monitor Integration

+ *
{@code
+ * // Replace LoggingSpanExporter with azure-monitor-opentelemetry-exporter:
+ * AzureMonitorExporterBuilder exporter = new AzureMonitorExporterBuilder()
+ *     .connectionString(System.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING"));
+ * SdkTracerProvider tracerProvider = SdkTracerProvider.builder()
+ *     .addSpanProcessor(SimpleSpanProcessor.create(exporter.buildTraceExporter()))
+ *     .build();
+ * }
+ */ +public final class ExplicitTracingSample { + + /** + * Main method to run the telemetry sample. + * + * @param args Unused command line arguments + * @throws InterruptedException if the thread is interrupted while waiting + */ + public static void main(String[] args) throws InterruptedException { + // Parse command line arguments + boolean enableTracing = false; + boolean enableRecording = false; + for (String arg : args) { + if ("--enable-tracing".equals(arg)) { + enableTracing = true; + } else if ("--enable-recording".equals(arg)) { + enableRecording = true; + } + } + + String endpoint = System.getenv("AZURE_VOICELIVE_ENDPOINT"); + String apiKey = System.getenv("AZURE_VOICELIVE_API_KEY"); + + if (endpoint == null || apiKey == null) { + System.err.println("Please set AZURE_VOICELIVE_ENDPOINT and AZURE_VOICELIVE_API_KEY environment variables"); + System.err.println(); + System.err.println("Optional flags:"); + System.err.println(" --enable-tracing Enable OpenTelemetry tracing (prints spans to console)"); + System.err.println(" --enable-recording Also capture full JSON payloads in span events"); + return; + } + + // 1. Set up OpenTelemetry tracing if enabled. + // This custom exporter prints both attributes AND span events (where content + // recording payloads appear). The built-in LoggingSpanExporter only prints + // attributes, so recorded content would not be visible with it. + // In production, replace with OtlpGrpcSpanExporter or the Azure Monitor exporter. + SdkTracerProvider tracerProvider = null; + OpenTelemetry otel = null; + if (enableTracing) { + tracerProvider = SdkTracerProvider.builder() + .addSpanProcessor(SimpleSpanProcessor.create(new ConsoleSpanExporter())) + .build(); + otel = OpenTelemetrySdk.builder() + .setTracerProvider(tracerProvider) + .build(); + System.out.println("OpenTelemetry tracing enabled (console exporter)"); + if (enableRecording) { + System.out.println("Content recording enabled (JSON payloads will appear in span events)"); + } + } + + // 2. Build client — optionally with tracing and content recording. + // Alternatively, omit .openTelemetry() to use GlobalOpenTelemetry automatically. + VoiceLiveClientBuilder builder = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new KeyCredential(apiKey)); + if (otel != null) { + builder.openTelemetry(otel); + builder.enableContentRecording(enableRecording); + } + VoiceLiveAsyncClient client = builder.buildAsyncClient(); + + System.out.println("Starting traced voice session..."); + + CountDownLatch done = new CountDownLatch(1); + + // 3. Run a short text-mode conversation — all operations are traced automatically. + client.startSession("gpt-4o-realtime-preview") + .flatMap(session -> { + VoiceLiveSessionOptions options = new VoiceLiveSessionOptions() + .setModalities(Arrays.asList(InteractionModality.TEXT)) + .setInstructions("You are a helpful assistant. Be concise."); + + session.receiveEvents() + .subscribe( + event -> { + System.out.println("Event: " + event.getType()); + if (event instanceof SessionUpdateResponseDone) { + session.closeAsync().subscribe(); + done.countDown(); + } + }, + error -> { + System.err.println("Error: " + error.getMessage()); + done.countDown(); + } + ); + + return session.sendEvent(new ClientEventSessionUpdate(options)) + .then(session.startResponse()) + .then(Mono.empty()); + }) + .subscribe(); + + done.await(30, TimeUnit.SECONDS); + + // 4. Shut down the tracer provider to flush remaining spans to console. + if (tracerProvider != null) { + tracerProvider.close(); + } + } + + private ExplicitTracingSample() { + } + + /** + * Custom span exporter that prints both span attributes and span events to the console. + * + *

The built-in {@code LoggingSpanExporter} only prints span attributes. When content + * recording is enabled via {@code enableContentRecording(true)}, JSON payloads are captured + * as span events (e.g., {@code gen_ai.input_messages}, {@code gen_ai.output_messages}). + * This exporter makes those events visible in the console output.

+ */ + private static final class ConsoleSpanExporter implements SpanExporter { + + @Override + public CompletableResultCode export(Collection spans) { + for (SpanData span : spans) { + System.out.printf("'%s' : %s %s %s [tracer: %s:%s] %s%n", + span.getName(), + span.getTraceId(), + span.getSpanId(), + span.getKind(), + span.getInstrumentationScopeInfo().getName(), + span.getInstrumentationScopeInfo().getVersion() != null + ? span.getInstrumentationScopeInfo().getVersion() : "", + span.getAttributes()); + + // Print span events (content recording payloads appear here) + for (EventData event : span.getEvents()) { + System.out.printf(" Event '%s': %s%n", event.getName(), event.getAttributes()); + } + } + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode flush() { + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode shutdown() { + return CompletableResultCode.ofSuccess(); + } + } +} diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/telemetry/GlobalTracingSample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/telemetry/GlobalTracingSample.java new file mode 100644 index 000000000000..cc3ea276b479 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/telemetry/GlobalTracingSample.java @@ -0,0 +1,142 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.voicelive.telemetry; + +import com.azure.ai.voicelive.VoiceLiveAsyncClient; +import com.azure.ai.voicelive.VoiceLiveClientBuilder; +import com.azure.ai.voicelive.models.ClientEventSessionUpdate; +import com.azure.ai.voicelive.models.InteractionModality; +import com.azure.ai.voicelive.models.SessionUpdateResponseDone; +import com.azure.ai.voicelive.models.VoiceLiveSessionOptions; +import com.azure.core.credential.KeyCredential; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.data.EventData; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import io.opentelemetry.sdk.trace.export.SpanExporter; +import reactor.core.publisher.Mono; + +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +/** + * Sample demonstrating automatic tracing via {@code GlobalOpenTelemetry}. + * + *

Unlike {@link ExplicitTracingSample} which passes an explicit {@code OpenTelemetry} instance + * to the builder, this sample registers a global instance with + * {@code OpenTelemetrySdk.builder().buildAndRegisterGlobal()}. The VoiceLive client picks it + * up automatically — no {@code .openTelemetry()} call is needed on the builder.

+ * + *

Environment Variables Required:

+ *
    + *
  • {@code AZURE_VOICELIVE_ENDPOINT} — The VoiceLive service endpoint URL
  • + *
  • {@code AZURE_VOICELIVE_API_KEY} — The API key for authentication
  • + *
+ * + *

How to Run:

+ *
{@code
+ * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.telemetry.GlobalTracingSample" -Dexec.classpathScope=test
+ * }
+ */ +public final class GlobalTracingSample { + + public static void main(String[] args) throws InterruptedException { + String endpoint = System.getenv("AZURE_VOICELIVE_ENDPOINT"); + String apiKey = System.getenv("AZURE_VOICELIVE_API_KEY"); + + if (endpoint == null || apiKey == null) { + System.err.println("Please set AZURE_VOICELIVE_ENDPOINT and AZURE_VOICELIVE_API_KEY environment variables"); + return; + } + + // 1. Register a global OpenTelemetry instance BEFORE building any client. + // In production, you'd use OtlpGrpcSpanExporter or Azure Monitor instead of ConsoleSpanExporter. + // Alternatively, attach the OpenTelemetry Java agent (-javaagent:opentelemetry-javaagent.jar) + // which does this automatically — no code needed at all. + SdkTracerProvider tracerProvider = SdkTracerProvider.builder() + .addSpanProcessor(SimpleSpanProcessor.create(new ConsoleSpanExporter())) + .build(); + OpenTelemetrySdk.builder() + .setTracerProvider(tracerProvider) + .buildAndRegisterGlobal(); // <-- registers into GlobalOpenTelemetry + System.out.println("GlobalOpenTelemetry registered (console exporter)"); + + // 2. Build client WITHOUT .openTelemetry() — it picks up GlobalOpenTelemetry automatically. + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() + .endpoint(endpoint) + .credential(new KeyCredential(apiKey)) + .buildAsyncClient(); + + System.out.println("Starting voice session (automatic tracing)..."); + + CountDownLatch done = new CountDownLatch(1); + + // 3. Run a short text-mode conversation — all operations are traced automatically. + client.startSession("gpt-4o-realtime-preview") + .flatMap(session -> { + VoiceLiveSessionOptions options = new VoiceLiveSessionOptions() + .setModalities(Arrays.asList(InteractionModality.TEXT)) + .setInstructions("You are a helpful assistant. Be concise."); + + session.receiveEvents() + .subscribe( + event -> { + System.out.println("Event: " + event.getType()); + if (event instanceof SessionUpdateResponseDone) { + session.closeAsync().subscribe(); + done.countDown(); + } + }, + error -> { + System.err.println("Error: " + error.getMessage()); + done.countDown(); + } + ); + + return session.sendEvent(new ClientEventSessionUpdate(options)) + .then(session.startResponse()) + .then(Mono.empty()); + }) + .subscribe(); + + done.await(30, TimeUnit.SECONDS); + + // 4. Flush remaining spans. + tracerProvider.close(); + } + + private GlobalTracingSample() { + } + + /** + * Minimal console exporter that prints span names, attributes, and events. + */ + private static final class ConsoleSpanExporter implements SpanExporter { + + @Override + public CompletableResultCode export(Collection spans) { + for (SpanData span : spans) { + System.out.printf("'%s' : %s%n", span.getName(), span.getAttributes()); + for (EventData event : span.getEvents()) { + System.out.printf(" Event '%s': %s%n", event.getName(), event.getAttributes()); + } + } + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode flush() { + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode shutdown() { + return CompletableResultCode.ofSuccess(); + } + } +} diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java index 5055c71b91e5..5bb5499dfb59 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveClientBuilderTest.java @@ -6,6 +6,7 @@ import com.azure.core.credential.KeyCredential; import com.azure.core.credential.TokenCredential; import com.azure.core.test.utils.MockTokenCredential; +import io.opentelemetry.api.OpenTelemetry; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -203,4 +204,56 @@ void testBuilderReturnsBuilder() { assertSame(clientBuilder, clientBuilder.credential(mockKeyCredential)); assertSame(clientBuilder, clientBuilder.serviceVersion(VoiceLiveServiceVersion.V2026_01_01_PREVIEW)); } + + @Test + void testBuilderWithExplicitOpenTelemetry() { + String endpoint = "https://test.cognitiveservices.azure.com"; + + assertDoesNotThrow(() -> { + VoiceLiveAsyncClient client = clientBuilder.endpoint(endpoint) + .credential(mockKeyCredential) + .openTelemetry(OpenTelemetry.noop()) + .buildAsyncClient(); + + assertNotNull(client); + }); + } + + @Test + void testBuilderWithNullOpenTelemetryThrows() { + assertThrows(NullPointerException.class, () -> clientBuilder.openTelemetry(null)); + } + + @Test + void testBuilderDefaultsToGlobalOpenTelemetry() { + // When no explicit OpenTelemetry is set, builder should use GlobalOpenTelemetry.getOrNoop() + String endpoint = "https://test.cognitiveservices.azure.com"; + + assertDoesNotThrow(() -> { + VoiceLiveAsyncClient client + = clientBuilder.endpoint(endpoint).credential(mockKeyCredential).buildAsyncClient(); + + assertNotNull(client); + }); + } + + @Test + void testBuilderOpenTelemetryReturnsBuilder() { + assertSame(clientBuilder, clientBuilder.openTelemetry(OpenTelemetry.noop())); + } + + @Test + void testBuilderWithOpenTelemetryAndContentRecording() { + String endpoint = "https://test.cognitiveservices.azure.com"; + + assertDoesNotThrow(() -> { + VoiceLiveAsyncClient client = clientBuilder.endpoint(endpoint) + .credential(mockKeyCredential) + .openTelemetry(OpenTelemetry.noop()) + .enableContentRecording(true) + .buildAsyncClient(); + + assertNotNull(client); + }); + } } diff --git a/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java new file mode 100644 index 000000000000..bfda4ecbe449 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/src/test/java/com/azure/ai/voicelive/VoiceLiveTracerTest.java @@ -0,0 +1,504 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.voicelive; + +import com.azure.ai.voicelive.models.AgentSessionConfig; +import com.azure.ai.voicelive.models.ClientEventConversationItemCreate; +import com.azure.ai.voicelive.models.ClientEventInputAudioBufferAppend; +import com.azure.ai.voicelive.models.ClientEventResponseCancel; +import com.azure.ai.voicelive.models.ClientEventResponseCreate; +import com.azure.ai.voicelive.models.FunctionCallOutputItem; +import com.azure.ai.voicelive.models.SessionUpdate; +import io.opentelemetry.api.metrics.Meter; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.sdk.metrics.SdkMeterProvider; +import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.data.EventData; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.net.URI; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Unit tests for {@link VoiceLiveTracer} using OpenTelemetry SDK testing utilities. + */ +class VoiceLiveTracerTest { + + private InMemorySpanExporter spanExporter; + private SdkTracerProvider tracerProvider; + private SdkMeterProvider meterProvider; + private Tracer tracer; + private VoiceLiveTracer voiceLiveTracer; + + @BeforeEach + void setUp() throws Exception { + spanExporter = InMemorySpanExporter.create(); + tracerProvider = SdkTracerProvider.builder().addSpanProcessor(SimpleSpanProcessor.create(spanExporter)).build(); + meterProvider = SdkMeterProvider.builder().build(); + tracer = tracerProvider.get("azure-ai-voicelive", "1.0.0-beta.6"); + Meter meter = meterProvider.get("azure-ai-voicelive"); + URI endpoint = new URI("wss://test.cognitiveservices.azure.com/voice-live/realtime"); + voiceLiveTracer = new VoiceLiveTracer(tracer, meter, endpoint, "gpt-4o-realtime-preview", null); + } + + @AfterEach + void tearDown() { + tracerProvider.close(); + meterProvider.close(); + } + + @Test + void testConnectSpanCreated() { + voiceLiveTracer.startConnectSpan(); + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(1, spans.size()); + SpanData span = spans.get(0); + assertEquals("connect gpt-4o-realtime-preview", span.getName()); + assertEquals(SpanKind.CLIENT, span.getKind()); + assertEquals(VoiceLiveTracer.GEN_AI_SYSTEM_VALUE, span.getAttributes().get(VoiceLiveTracer.GEN_AI_SYSTEM)); + assertEquals(VoiceLiveTracer.OPERATION_CONNECT, + span.getAttributes().get(VoiceLiveTracer.GEN_AI_OPERATION_NAME)); + assertEquals(VoiceLiveTracer.AZ_NAMESPACE_VALUE, span.getAttributes().get(VoiceLiveTracer.AZ_NAMESPACE)); + assertEquals(VoiceLiveTracer.GEN_AI_PROVIDER_NAME_VALUE, + span.getAttributes().get(VoiceLiveTracer.GEN_AI_PROVIDER_NAME)); + assertEquals("gpt-4o-realtime-preview", span.getAttributes().get(VoiceLiveTracer.GEN_AI_REQUEST_MODEL)); + assertEquals("test.cognitiveservices.azure.com", span.getAttributes().get(VoiceLiveTracer.SERVER_ADDRESS)); + assertEquals(443L, span.getAttributes().get(VoiceLiveTracer.SERVER_PORT).longValue()); + } + + @Test + void testSendSpanCreated() { + voiceLiveTracer.startConnectSpan(); + + ClientEventResponseCreate event = new ClientEventResponseCreate(); + voiceLiveTracer.traceSend(event, "{\"type\":\"response.create\"}"); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(2, spans.size()); + + // Send span finishes first + SpanData sendSpan = spans.get(0); + assertEquals("send response.create", sendSpan.getName()); + assertEquals(SpanKind.CLIENT, sendSpan.getKind()); + assertEquals(VoiceLiveTracer.OPERATION_SEND, + sendSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_OPERATION_NAME)); + assertEquals("response.create", sendSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_EVENT_TYPE)); + + // Send span should be a child of the connect span + SpanData connectSpan = spans.get(1); + assertEquals(connectSpan.getSpanContext().getTraceId(), sendSpan.getSpanContext().getTraceId()); + assertEquals(connectSpan.getSpanContext().getSpanId(), sendSpan.getParentSpanId()); + } + + @Test + void testRecvSpanCreated() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String json = "{\"type\":\"session.created\",\"event_id\":\"event1\"," + + "\"session\":{\"id\":\"session123\",\"model\":\"gpt-4o\"}}"; + SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json)); + voiceLiveTracer.traceRecv(update, json); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(2, spans.size()); + + SpanData recvSpan = spans.get(0); + assertEquals("recv session.created", recvSpan.getName()); + assertEquals(VoiceLiveTracer.OPERATION_RECV, + recvSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_OPERATION_NAME)); + + // Verify parent-child relationship + SpanData connectSpan = spans.get(1); + assertEquals(connectSpan.getSpanContext().getSpanId(), recvSpan.getParentSpanId()); + } + + @Test + void testCloseSpanCreated() { + voiceLiveTracer.startConnectSpan(); + voiceLiveTracer.traceClose(); + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(2, spans.size()); + + SpanData closeSpan = spans.get(0); + assertEquals("close", closeSpan.getName()); + assertEquals(VoiceLiveTracer.OPERATION_CLOSE, + closeSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_OPERATION_NAME)); + } + + @Test + void testSessionCountersOnEndConnectSpan() { + voiceLiveTracer.startConnectSpan(); + + // Simulate a response.cancel (interruption) + ClientEventResponseCancel cancelEvent = new ClientEventResponseCancel(); + voiceLiveTracer.traceSend(cancelEvent, "{\"type\":\"response.cancel\"}"); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + // Find the connect span (last to finish) + SpanData connectSpan = spans.get(spans.size() - 1); + assertEquals(1L, connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_INTERRUPTION_COUNT).longValue()); + } + + @Test + void testResponseCreateTracksLatency() throws Exception { + voiceLiveTracer.startConnectSpan(); + + // Send response.create + ClientEventResponseCreate createEvent = new ClientEventResponseCreate(); + voiceLiveTracer.traceSend(createEvent, "{\"type\":\"response.create\"}"); + + // Simulate small delay and receive audio delta + Thread.sleep(10); + String audioJson = "{\"type\":\"response.audio.delta\",\"response_id\":\"r1\"," + + "\"item_id\":\"i1\",\"output_index\":0,\"content_index\":0,\"delta\":\"AQID\"}"; + SessionUpdate audioDelta = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(audioJson)); + voiceLiveTracer.traceRecv(audioDelta, audioJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(spans.size() - 1); + Double latency = connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_FIRST_TOKEN_LATENCY_MS); + assertNotNull(latency); + assertTrue(latency >= 0, "Latency should be >= 0, was: " + latency); + } + + @Test + void testAudioBytesTracking() throws Exception { + voiceLiveTracer.startConnectSpan(); + + // Send audio (base64 of 3 bytes: [1,2,3] = "AQID") + ClientEventInputAudioBufferAppend appendEvent = new ClientEventInputAudioBufferAppend("AQID"); + voiceLiveTracer.traceSend(appendEvent, "{\"type\":\"input_audio_buffer.append\",\"audio\":\"AQID\"}"); + + // Receive audio delta (base64 "AQIDBA==" = 4 bytes) + String audioJson = "{\"type\":\"response.audio.delta\",\"response_id\":\"r1\"," + + "\"item_id\":\"i1\",\"output_index\":0,\"content_index\":0,\"delta\":\"AQIDBA==\"}"; + SessionUpdate audioDelta = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(audioJson)); + voiceLiveTracer.traceRecv(audioDelta, audioJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(spans.size() - 1); + assertEquals(3L, connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_AUDIO_BYTES_SENT).longValue()); + assertEquals(4L, + connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_AUDIO_BYTES_RECEIVED).longValue()); + } + + @Test + void testTurnCountTracking() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String doneJson = "{\"type\":\"response.done\",\"event_id\":\"event1\"," + + "\"response\":{\"id\":\"response1\",\"status\":\"completed\",\"output\":[]}}"; + SessionUpdate responseDone = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(doneJson)); + voiceLiveTracer.traceRecv(responseDone, doneJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(spans.size() - 1); + assertEquals(1L, connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_TURN_COUNT).longValue()); + } + + @Test + void testTokenUsageOnResponseDone() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String doneJson = "{\"type\":\"response.done\",\"event_id\":\"event1\"," + + "\"response\":{\"id\":\"response1\",\"status\":\"completed\",\"output\":[]," + + "\"usage\":{\"total_tokens\":150,\"input_tokens\":100,\"output_tokens\":50," + + "\"input_token_details\":{\"cached_tokens\":0,\"text_tokens\":50,\"audio_tokens\":50}," + + "\"output_token_details\":{\"text_tokens\":25,\"audio_tokens\":25}}}}"; + SessionUpdate responseDone = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(doneJson)); + voiceLiveTracer.traceRecv(responseDone, doneJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + // Recv span is first to finish + SpanData recvSpan = spans.get(0); + assertEquals(100L, recvSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_USAGE_INPUT_TOKENS).longValue()); + assertEquals(50L, recvSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_USAGE_OUTPUT_TOKENS).longValue()); + } + + @Test + void testErrorEventTracking() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String errorJson = "{\"type\":\"error\",\"event_id\":\"event1\"," + + "\"error\":{\"type\":\"server_error\",\"code\":\"500\",\"message\":\"Internal error\"}}"; + SessionUpdate errorUpdate = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(errorJson)); + voiceLiveTracer.traceRecv(errorUpdate, errorJson); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData recvSpan = spans.get(0); + + assertEquals(StatusCode.UNSET, recvSpan.getStatus().getStatusCode()); + + List events = recvSpan.getEvents(); + EventData errorEvent = events.stream() + .filter(e -> VoiceLiveTracer.GEN_AI_VOICE_ERROR.equals(e.getName())) + .findFirst() + .orElseThrow(() -> new AssertionError("Expected gen_ai.voice.error event")); + assertEquals("500", errorEvent.getAttributes().get(VoiceLiveTracer.ERROR_CODE)); + assertEquals("Internal error", errorEvent.getAttributes().get(VoiceLiveTracer.ERROR_MESSAGE)); + } + + @Test + void testConnectSpanErrorStatus() { + voiceLiveTracer.startConnectSpan(); + voiceLiveTracer.endConnectSpan(new RuntimeException("Connection lost")); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(0); + assertEquals(StatusCode.ERROR, connectSpan.getStatus().getStatusCode()); + assertEquals("Connection lost", connectSpan.getStatus().getDescription()); + assertEquals("java.lang.RuntimeException", connectSpan.getAttributes().get(VoiceLiveTracer.ERROR_TYPE)); + + // Verify exception was recorded + assertTrue(connectSpan.getEvents().stream().anyMatch(e -> "exception".equals(e.getName()))); + } + + @Test + void testSessionIdFromSessionCreated() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String json = "{\"type\":\"session.created\",\"event_id\":\"event1\"," + + "\"session\":{\"id\":\"session456\",\"model\":\"gpt-4o\"}}"; + SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json)); + voiceLiveTracer.traceRecv(update, json); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(spans.size() - 1); + assertEquals("session456", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_SESSION_ID)); + } + + @Test + void testConnectSpanWithoutModel() throws Exception { + VoiceLiveTracer tracerNoModel = new VoiceLiveTracer(tracer, meterProvider.get("test"), + new URI("wss://test.cognitiveservices.azure.com"), null, null); + + tracerNoModel.startConnectSpan(); + tracerNoModel.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData span = spans.get(0); + assertEquals(VoiceLiveTracer.OPERATION_CONNECT, span.getName()); + assertFalse(span.getAttributes().asMap().containsKey(VoiceLiveTracer.GEN_AI_REQUEST_MODEL)); + } + + @Test + void testParentChildSpanHierarchy() throws Exception { + voiceLiveTracer.startConnectSpan(); + + // Send + ClientEventResponseCreate event = new ClientEventResponseCreate(); + voiceLiveTracer.traceSend(event, "{\"type\":\"response.create\"}"); + + // Recv + String json = "{\"type\":\"session.created\",\"event_id\":\"event1\"," + + "\"session\":{\"id\":\"session123\",\"model\":\"gpt-4o\"}}"; + SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json)); + voiceLiveTracer.traceRecv(update, json); + + // Close + voiceLiveTracer.traceClose(); + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + assertEquals(4, spans.size()); + + // All child spans share the same trace ID and reference the connect span as parent + SpanData connectSpan = spans.get(spans.size() - 1); + String traceId = connectSpan.getSpanContext().getTraceId(); + String connectSpanId = connectSpan.getSpanContext().getSpanId(); + + for (int i = 0; i < spans.size() - 1; i++) { + SpanData child = spans.get(i); + assertEquals(traceId, child.getSpanContext().getTraceId(), "Child span should be in same trace"); + assertEquals(connectSpanId, child.getParentSpanId(), "Child span should have connect as parent"); + } + } + + @Test + void testBuilderWithOpenTelemetry() { + VoiceLiveAsyncClient client = new VoiceLiveClientBuilder().endpoint("https://test.cognitiveservices.azure.com") + .credential(new com.azure.core.credential.KeyCredential("fake")) + .openTelemetry(io.opentelemetry.api.OpenTelemetry.noop()) + .buildAsyncClient(); + + assertNotNull(client); + } + + @Test + void testSpanEventsContainAttributes() { + voiceLiveTracer.startConnectSpan(); + + ClientEventResponseCreate event = new ClientEventResponseCreate(); + voiceLiveTracer.traceSend(event, "{\"type\":\"response.create\"}"); + + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData sendSpan = spans.get(0); + List events = sendSpan.getEvents(); + assertFalse(events.isEmpty()); + + EventData inputEvent = events.stream() + .filter(e -> VoiceLiveTracer.GEN_AI_INPUT_MESSAGES.equals(e.getName())) + .findFirst() + .orElseThrow(() -> new AssertionError("Expected gen_ai.input.messages event")); + assertEquals(VoiceLiveTracer.GEN_AI_SYSTEM_VALUE, + inputEvent.getAttributes().get(VoiceLiveTracer.GEN_AI_SYSTEM)); + assertEquals("response.create", inputEvent.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_EVENT_TYPE)); + } + + @Test + void testResponseDoneTracksConversationAndFinishReason() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String doneJson = "{\"type\":\"response.done\",\"event_id\":\"event1\"," + + "\"response\":{\"id\":\"response1\",\"conversation_id\":\"conversation1\"," + + "\"status\":\"completed\",\"output\":[]}}"; + SessionUpdate responseDone = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(doneJson)); + voiceLiveTracer.traceRecv(responseDone, doneJson); + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData recvSpan = spans.get(0); + SpanData connectSpan = spans.get(1); + + assertEquals("response1", recvSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_RESPONSE_ID)); + assertEquals("conversation1", recvSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_CONVERSATION_ID)); + assertEquals("[\"completed\"]", recvSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_RESPONSE_FINISH_REASONS)); + assertEquals("conversation1", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_CONVERSATION_ID)); + // Verify accumulated lastResponseId and lastFinishReasons are flushed to the connect span + assertEquals("response1", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_RESPONSE_ID)); + assertEquals("[\"completed\"]", + connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_RESPONSE_FINISH_REASONS)); + } + + @Test + void testFunctionCallOutputSendTracksIds() { + voiceLiveTracer.startConnectSpan(); + + ClientEventConversationItemCreate event + = new ClientEventConversationItemCreate().setPreviousItemId("previousItem1") + .setItem(new FunctionCallOutputItem("call123", "{\"ok\":true}")); + voiceLiveTracer.traceSend(event, + "{\"type\":\"conversation.item.create\",\"previous_item_id\":\"previousItem1\"," + + "\"item\":{\"type\":\"function_call_output\",\"call_id\":\"call123\"," + + "\"output\":\"{\\\"ok\\\":true}\"}}"); + + voiceLiveTracer.endConnectSpan(null); + + SpanData sendSpan = spanExporter.getFinishedSpanItems().get(0); + assertEquals("call123", sendSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_CALL_ID)); + assertEquals("previousItem1", sendSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_PREVIOUS_ITEM_ID)); + } + + @Test + void testSessionUpdateTracksConnectSpanConfig() { + voiceLiveTracer.startConnectSpan(); + + String updateJson = "{\"type\":\"session.update\",\"session\":{" + "\"instructions\":\"You are concise.\"," + + "\"temperature\":0.2," + "\"max_response_output_tokens\":256," + "\"input_audio_sampling_rate\":24000," + + "\"input_audio_format\":\"pcm16\"," + "\"output_audio_format\":\"pcm16\"," + + "\"tools\":[{\"type\":\"function\",\"name\":\"get_weather\"}]}}"; + voiceLiveTracer.traceSend(new ClientEventResponseCreate(), updateJson); + voiceLiveTracer.endConnectSpan(null); + + SpanData connectSpan = spanExporter.getFinishedSpanItems().get(1); + assertEquals("You are concise.", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_SYSTEM_INSTRUCTIONS)); + assertEquals("0.2", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_REQUEST_TEMPERATURE)); + assertEquals("256", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_REQUEST_MAX_OUTPUT_TOKENS)); + assertEquals("pcm16", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_INPUT_AUDIO_FORMAT)); + assertEquals(24000L, + connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_INPUT_SAMPLE_RATE).longValue()); + assertTrue(connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_REQUEST_TOOLS).contains("get_weather")); + } + + @Test + void testSessionCreatedTracksAgentAttributes() throws Exception { + voiceLiveTracer.startConnectSpan(new AgentSessionConfig("agent-name", "project-name").setAgentVersion("1.2.3") + .setConversationId("clientConversation1")); + + String json = "{\"type\":\"session.created\",\"event_id\":\"event1\",\"session\":{" + + "\"id\":\"session123\",\"input_audio_sampling_rate\":24000," + + "\"agent\":{\"agent_id\":\"agent123\",\"thread_id\":\"thread456\"}}}"; + SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json)); + voiceLiveTracer.traceRecv(update, json); + voiceLiveTracer.endConnectSpan(null); + + SpanData connectSpan = spanExporter.getFinishedSpanItems().get(1); + assertEquals("agent-name", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_AGENT_NAME)); + assertEquals("1.2.3", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_AGENT_VERSION)); + assertEquals("project-name", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_AGENT_PROJECT_NAME)); + assertEquals("agent123", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_AGENT_ID)); + assertEquals("thread456", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_AGENT_THREAD_ID)); + assertEquals("clientConversation1", connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_CONVERSATION_ID)); + } + + @Test + void testTraceRecvRawRateLimitsUpdated() { + voiceLiveTracer.startConnectSpan(); + + String rawJson + = "{\"type\":\"rate_limits.updated\",\"rate_limits\":[{\"name\":\"requests\",\"remaining\":10}]}"; + voiceLiveTracer.traceRecvRaw(rawJson); + voiceLiveTracer.endConnectSpan(null); + + SpanData recvSpan = spanExporter.getFinishedSpanItems().get(0); + EventData rateLimitEvent = recvSpan.getEvents() + .stream() + .filter(e -> VoiceLiveTracer.GEN_AI_VOICE_RATE_LIMITS_UPDATED.equals(e.getName())) + .findFirst() + .orElseThrow(() -> new AssertionError("Expected gen_ai.voice.rate_limits.updated event")); + assertTrue(rateLimitEvent.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_RATE_LIMITS).contains("requests")); + } + + @Test + void testSessionCreatedTracksInputAudioSamplingRate() throws Exception { + voiceLiveTracer.startConnectSpan(); + + String json = "{\"type\":\"session.created\",\"event_id\":\"event1\"," + + "\"session\":{\"id\":\"session789\",\"model\":\"gpt-4o\"," + "\"input_audio_sampling_rate\":24000}}"; + SessionUpdate update = SessionUpdate.fromJson(com.azure.json.JsonProviders.createReader(json)); + voiceLiveTracer.traceRecv(update, json); + voiceLiveTracer.endConnectSpan(null); + + List spans = spanExporter.getFinishedSpanItems(); + SpanData connectSpan = spans.get(spans.size() - 1); + assertEquals(24000L, + connectSpan.getAttributes().get(VoiceLiveTracer.GEN_AI_VOICE_INPUT_SAMPLE_RATE).longValue()); + } +}