From 39564656999d0d09d0dee65f5dca4f1a3e38935b Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Wed, 18 Mar 2026 18:24:17 +0100 Subject: [PATCH 01/12] feat: add metrics and usage to streaming response types --- .../runtime/bedrock_converse_adapter.ts | 18 ++++++++++++++++-- .../conversation_message_history_retriever.ts | 12 ++++++++++-- .../src/conversation/runtime/types.ts | 2 ++ .../amplify/data/resource.ts | 2 ++ 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts index 1d6f07f83ea..f25f8e124b0 100644 --- a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts +++ b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts @@ -11,6 +11,7 @@ import { Tool, ToolConfiguration, ToolInputSchema, + //TokenUsage, //check if this exists } from '@aws-sdk/client-bedrock-runtime'; import { ConversationTurnEvent, @@ -113,7 +114,7 @@ export class BedrockConverseAdapter { inferenceConfig: inferenceConfiguration, toolConfig, }; - this.logger.info('Sending Bedrock Converse request'); + this.logger.info('Sending Bedrock Converse request'); //converse this.logger.debug('Bedrock Converse request:', converseCommandInput); bedrockResponse = await this.bedrockClient.send( new ConverseCommand(converseCommandInput), @@ -175,6 +176,10 @@ export class BedrockConverseAdapter { let blockIndex = 0; let lastBlockIndex = 0; let stopReason = ''; + let latencyMs = 0; + let inputTokens = 0; + let outputTokens = 0; + let totalTokens = 0; // Accumulates client facing content per turn. // So that upstream can persist full message at the end of the streaming. const accumulatedTurnContent: Array = []; @@ -195,7 +200,7 @@ export class BedrockConverseAdapter { bedrockResponse = await this.bedrockClient.send( new ConverseStreamCommand(converseCommandInput), ); - this.logger.info( + this.logger.info( //stream `Received Bedrock Converse Stream response, requestId=${bedrockResponse.$metadata.requestId}`, ); if (!bedrockResponse.stream) { @@ -304,6 +309,11 @@ export class BedrockConverseAdapter { } } else if (chunk.messageStop) { stopReason = chunk.messageStop.stopReason ?? ''; + } else if (chunk.metadata) { + latencyMs = chunk.metadata.metrics.latencyMs; //check this + inputTokens = chunk.metadata.usage.inputTokens; + outputTokens = chunk.metadata.usage.outputTokens; + totalTokens = chunk.metadata.usage.totalTokens; } processedBedrockChunks++; if (processedBedrockChunks % 1000 === 0) { @@ -330,6 +340,8 @@ export class BedrockConverseAdapter { associatedUserMessageId: this.event.currentMessageId, contentBlockIndex: lastBlockIndex, stopReason: stopReason, + metrics: {latencyMs}, + usage: {inputTokens, outputTokens, totalTokens,}, //check this }; return; } @@ -359,6 +371,8 @@ export class BedrockConverseAdapter { associatedUserMessageId: this.event.currentMessageId, contentBlockIndex: lastBlockIndex, stopReason: stopReason, + metrics: {latencyMs}, + usage: {inputTokens, outputTokens, totalTokens,}, //check this }; } diff --git a/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts b/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts index edb77f1c89f..b65f6f5cf38 100644 --- a/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts +++ b/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts @@ -92,6 +92,14 @@ const messageItemSelectionSet = ` toolUseId } } + metrics { + latencyMs + } + usage { + inputTokens + outputTokens + totalTokens + } `; /** @@ -164,7 +172,7 @@ export class ConversationMessageHistoryRetriever { } const aiContext = current.aiContext; const content = aiContext - ? [...current.content, { text: JSON.stringify(aiContext) }] + ? [...current.content, { text: JSON.stringify(aiContext) }]//TODO add usage and latency! but only for the response!! : current.content; acc.push({ role: current.role, content }); @@ -176,7 +184,7 @@ export class ConversationMessageHistoryRetriever { if (correspondingAssistantMessage) { acc.push({ role: correspondingAssistantMessage.role, - content: correspondingAssistantMessage.content, + content: correspondingAssistantMessage.content, //TODO add usage and latency! but only for the response!! }); } return acc; diff --git a/packages/ai-constructs/src/conversation/runtime/types.ts b/packages/ai-constructs/src/conversation/runtime/types.ts index 6cc7da99a52..af48eb9b2f6 100644 --- a/packages/ai-constructs/src/conversation/runtime/types.ts +++ b/packages/ai-constructs/src/conversation/runtime/types.ts @@ -153,6 +153,8 @@ export type StreamingResponseChunk = { | { // turn complete stopReason: string; + metrics: {latencyMs: number}; + usage: {inputTokens: number, outputTokens: number, totalTokens: number}; contentBlockDoneAtIndex?: never; contentBlockText?: never; contentBlockDeltaIndex?: never; diff --git a/packages/integration-tests/src/test-projects/conversation-handler/amplify/data/resource.ts b/packages/integration-tests/src/test-projects/conversation-handler/amplify/data/resource.ts index 185780bc0b0..3a457729f25 100644 --- a/packages/integration-tests/src/test-projects/conversation-handler/amplify/data/resource.ts +++ b/packages/integration-tests/src/test-projects/conversation-handler/amplify/data/resource.ts @@ -119,6 +119,8 @@ const schema = a.schema({ // when message is complete stopReason: a.string(), + metrics: {latency: a.integer()}, //check this + usage: {inputTokens: a.integer(), outputTokens: a.integer(), totalTokens: a.integer()}, // error errors: a.ref('MockConversationTurnError').array(), From 133cdd88c4acbe8c6bb661a58e590ec6829a1133 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Mon, 23 Mar 2026 18:33:26 +0100 Subject: [PATCH 02/12] feat: add debug logging for bedrock stream metadata --- .../runtime/bedrock_converse_adapter.ts | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts index f25f8e124b0..99553e1ae57 100644 --- a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts +++ b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts @@ -200,7 +200,8 @@ export class BedrockConverseAdapter { bedrockResponse = await this.bedrockClient.send( new ConverseStreamCommand(converseCommandInput), ); - this.logger.info( //stream + this.logger.info( + //stream `Received Bedrock Converse Stream response, requestId=${bedrockResponse.$metadata.requestId}`, ); if (!bedrockResponse.stream) { @@ -264,6 +265,7 @@ export class BedrockConverseAdapter { blockDeltaIndex++; } } else if (chunk.contentBlockStop) { + this.logger.debug(`now in chunk.contentBlockStop`); if (toolUseBlock) { if (toolUseInput) { toolUseBlock.toolUse.input = JSON.parse(toolUseInput); @@ -309,11 +311,14 @@ export class BedrockConverseAdapter { } } else if (chunk.messageStop) { stopReason = chunk.messageStop.stopReason ?? ''; + this.logger.debug( + `Bedrock stop reason received: stopReason=${chunk.messageStop.stopReason ?? ''}`, + ); } else if (chunk.metadata) { - latencyMs = chunk.metadata.metrics.latencyMs; //check this - inputTokens = chunk.metadata.usage.inputTokens; - outputTokens = chunk.metadata.usage.outputTokens; - totalTokens = chunk.metadata.usage.totalTokens; + latencyMs = chunk.metadata.metrics?.latencyMs ?? 0; + inputTokens = chunk.metadata.usage?.inputTokens ?? 0; + outputTokens = chunk.metadata.usage?.outputTokens ?? 0; + totalTokens = chunk.metadata.usage?.totalTokens ?? 0; } processedBedrockChunks++; if (processedBedrockChunks % 1000 === 0) { @@ -340,8 +345,8 @@ export class BedrockConverseAdapter { associatedUserMessageId: this.event.currentMessageId, contentBlockIndex: lastBlockIndex, stopReason: stopReason, - metrics: {latencyMs}, - usage: {inputTokens, outputTokens, totalTokens,}, //check this + metrics: { latencyMs }, + usage: { inputTokens, outputTokens, totalTokens }, //check this }; return; } @@ -371,8 +376,8 @@ export class BedrockConverseAdapter { associatedUserMessageId: this.event.currentMessageId, contentBlockIndex: lastBlockIndex, stopReason: stopReason, - metrics: {latencyMs}, - usage: {inputTokens, outputTokens, totalTokens,}, //check this + metrics: { latencyMs }, + usage: { inputTokens, outputTokens, totalTokens }, //check this }; } From 1373e9a95b29f07c5dfa824894d53e0d34c7a229 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Mon, 23 Mar 2026 18:34:09 +0100 Subject: [PATCH 03/12] feat: add MockMetrics and MockUsage custom types to integration test schema --- .../conversation-handler/amplify/data/resource.ts | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/packages/integration-tests/src/test-projects/conversation-handler/amplify/data/resource.ts b/packages/integration-tests/src/test-projects/conversation-handler/amplify/data/resource.ts index 3a457729f25..c7a9d22f8a1 100644 --- a/packages/integration-tests/src/test-projects/conversation-handler/amplify/data/resource.ts +++ b/packages/integration-tests/src/test-projects/conversation-handler/amplify/data/resource.ts @@ -91,6 +91,16 @@ const schema = a.schema({ message: a.string(), }), + MockMetrics: a.customType({ + latencyMs: a.integer(), + }), + + MockUsage: a.customType({ + inputTokens: a.integer(), + outputTokens: a.integer(), + totalTokens: a.integer(), + }), + ConversationMessageAssistantResponse: a .model({ conversationId: a.id(), @@ -119,8 +129,8 @@ const schema = a.schema({ // when message is complete stopReason: a.string(), - metrics: {latency: a.integer()}, //check this - usage: {inputTokens: a.integer(), outputTokens: a.integer(), totalTokens: a.integer()}, + metrics: a.ref('MockMetrics'), + usage: a.ref('MockUsage'), // error errors: a.ref('MockConversationTurnError').array(), From 14b241b01a4db36ac24c65daae905b350907b276 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Wed, 25 Mar 2026 13:42:23 +0100 Subject: [PATCH 04/12] feat: capture metrics and usage from bedrock stream metadata --- .../ai-constructs/src/conversation/runtime/types.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/ai-constructs/src/conversation/runtime/types.ts b/packages/ai-constructs/src/conversation/runtime/types.ts index af48eb9b2f6..8e3a6848fc7 100644 --- a/packages/ai-constructs/src/conversation/runtime/types.ts +++ b/packages/ai-constructs/src/conversation/runtime/types.ts @@ -133,6 +133,8 @@ export type StreamingResponseChunk = { contentBlockDoneAtIndex?: never; contentBlockToolUse?: never; stopReason?: never; + metrics?: never; + usage?: never; } | { // end of block. applicable to text blocks @@ -141,6 +143,8 @@ export type StreamingResponseChunk = { contentBlockDeltaIndex?: never; contentBlockToolUse?: never; stopReason?: never; + metrics?: never; + usage?: never; } | { // tool use @@ -149,12 +153,14 @@ export type StreamingResponseChunk = { contentBlockText?: never; contentBlockDeltaIndex?: never; stopReason?: never; + metrics?: never; + usage?: never; } | { // turn complete stopReason: string; - metrics: {latencyMs: number}; - usage: {inputTokens: number, outputTokens: number, totalTokens: number}; + metrics: { latencyMs: number }; + usage: { inputTokens: number; outputTokens: number; totalTokens: number }; contentBlockDoneAtIndex?: never; contentBlockText?: never; contentBlockDeltaIndex?: never; From f438076180aaee0138d537cc8c4aca6c932f90f4 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Wed, 25 Mar 2026 13:43:25 +0100 Subject: [PATCH 05/12] feat: serialize metrics and usage as JSON strings for AppSync --- .../conversation_turn_response_sender.ts | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts index 9849f5ecd07..782a56b44ed 100644 --- a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts +++ b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts @@ -126,22 +126,24 @@ export class ConversationTurnResponseSender { }; private createStreamingMutationRequest = (chunk: StreamingResponseChunk) => { - const query = ` - mutation PublishModelResponse($input: ${this.event.responseMutation.inputTypeName}!) { - ${this.event.responseMutation.name}(input: $input) { - ${this.event.responseMutation.selectionSet} - } - } + const query = ` + mutation PublishModelResponse($input: ${this.event.responseMutation.inputTypeName}!) { + ${this.event.responseMutation.name}(input: $input) { + ${this.event.responseMutation.selectionSet} + } + } `; - chunk = { + const serializedChunk = { ...chunk, accumulatedTurnContent: this.serializeContent( chunk.accumulatedTurnContent, ), + ...(chunk.metrics && { metrics: JSON.stringify(chunk.metrics) }), + ...(chunk.usage && { usage: JSON.stringify(chunk.usage) }), }; - const variables: MutationStreamingResponseInput = { - input: chunk, - }; + const variables = { + input: serializedChunk, + } as MutationStreamingResponseInput; return { query, variables }; }; From e430a55644874df11de7d98ede368b6aa74f4298 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Thu, 26 Mar 2026 13:39:44 +0100 Subject: [PATCH 06/12] chore: clean up dev comments --- .../src/conversation/runtime/bedrock_converse_adapter.ts | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts index 99553e1ae57..694634de490 100644 --- a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts +++ b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts @@ -11,7 +11,6 @@ import { Tool, ToolConfiguration, ToolInputSchema, - //TokenUsage, //check if this exists } from '@aws-sdk/client-bedrock-runtime'; import { ConversationTurnEvent, @@ -114,7 +113,7 @@ export class BedrockConverseAdapter { inferenceConfig: inferenceConfiguration, toolConfig, }; - this.logger.info('Sending Bedrock Converse request'); //converse + this.logger.info('Sending Bedrock Converse request'); this.logger.debug('Bedrock Converse request:', converseCommandInput); bedrockResponse = await this.bedrockClient.send( new ConverseCommand(converseCommandInput), @@ -201,7 +200,6 @@ export class BedrockConverseAdapter { new ConverseStreamCommand(converseCommandInput), ); this.logger.info( - //stream `Received Bedrock Converse Stream response, requestId=${bedrockResponse.$metadata.requestId}`, ); if (!bedrockResponse.stream) { @@ -265,7 +263,6 @@ export class BedrockConverseAdapter { blockDeltaIndex++; } } else if (chunk.contentBlockStop) { - this.logger.debug(`now in chunk.contentBlockStop`); if (toolUseBlock) { if (toolUseInput) { toolUseBlock.toolUse.input = JSON.parse(toolUseInput); @@ -346,7 +343,7 @@ export class BedrockConverseAdapter { contentBlockIndex: lastBlockIndex, stopReason: stopReason, metrics: { latencyMs }, - usage: { inputTokens, outputTokens, totalTokens }, //check this + usage: { inputTokens, outputTokens, totalTokens }, }; return; } @@ -377,7 +374,7 @@ export class BedrockConverseAdapter { contentBlockIndex: lastBlockIndex, stopReason: stopReason, metrics: { latencyMs }, - usage: { inputTokens, outputTokens, totalTokens }, //check this + usage: { inputTokens, outputTokens, totalTokens }, }; } From 1784ffef0d878a84ae0246d66a5164f49fa51744 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Thu, 26 Mar 2026 13:41:30 +0100 Subject: [PATCH 07/12] chore: clean up dev comments --- .../runtime/conversation_message_history_retriever.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts b/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts index b65f6f5cf38..c8dd4858f28 100644 --- a/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts +++ b/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts @@ -92,7 +92,7 @@ const messageItemSelectionSet = ` toolUseId } } - metrics { + metrics { latencyMs } usage { @@ -172,7 +172,7 @@ export class ConversationMessageHistoryRetriever { } const aiContext = current.aiContext; const content = aiContext - ? [...current.content, { text: JSON.stringify(aiContext) }]//TODO add usage and latency! but only for the response!! + ? [...current.content, { text: JSON.stringify(aiContext) }] : current.content; acc.push({ role: current.role, content }); @@ -184,7 +184,7 @@ export class ConversationMessageHistoryRetriever { if (correspondingAssistantMessage) { acc.push({ role: correspondingAssistantMessage.role, - content: correspondingAssistantMessage.content, //TODO add usage and latency! but only for the response!! + content: correspondingAssistantMessage.content, }); } return acc; From 5d63c47cde0e8e7e3cc19f0f1d2dc5f82daa8b31 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Thu, 26 Mar 2026 13:43:38 +0100 Subject: [PATCH 08/12] chore: add metrics/usage tests --- .../runtime/bedrock_converse_adapter.test.ts | 44 +++++++++++++++++- .../conversation_turn_response_sender.test.ts | 45 +++++++++++++++++++ 2 files changed, 87 insertions(+), 2 deletions(-) diff --git a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.test.ts b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.test.ts index 12988a5977d..80c563d2642 100644 --- a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.test.ts +++ b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.test.ts @@ -201,6 +201,8 @@ void describe('Bedrock converse adapter', () => { associatedUserMessageId: event.currentMessageId, contentBlockIndex: 1, stopReason: 'end_turn', + metrics: { latencyMs: 150 }, + usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 }, }, ]); } else { @@ -712,6 +714,8 @@ void describe('Bedrock converse adapter', () => { associatedUserMessageId: event.currentMessageId, contentBlockIndex: 0, stopReason: 'tool_use', + metrics: { latencyMs: 150 }, + usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 }, }, ]); } else { @@ -1033,14 +1037,44 @@ void describe('Bedrock converse adapter', () => { progressCalls[1].arguments[0], 'Processed 2000 chunks from Bedrock Converse Stream response, requestId=testRequestId', ); - // each block is decomposed into 4 chunks + start and stop of whole message. - const expectedNumberOfAllChunks = numberOfBlocks * 4 + 2; + // each block is decomposed into 4 chunks + start, stop, and metadata of whole message. + const expectedNumberOfAllChunks = numberOfBlocks * 4 + 3; assert.strictEqual( progressCalls[2].arguments[0], `Completed processing ${expectedNumberOfAllChunks.toString()} chunks from Bedrock Converse Stream response, requestId=testRequestId`, ); }); + void it('includes metrics and usage in the final streaming chunk', async () => { + const event: ConversationTurnEvent = { + ...commonEvent, + }; + + const bedrockClient = new BedrockRuntimeClient(); + const content = [{ text: 'block1' }]; + const bedrockResponse = mockBedrockResponse(content, true); + mock.method(bedrockClient, 'send', () => Promise.resolve(bedrockResponse)); + + const adapter = new BedrockConverseAdapter( + event, + [], + bedrockClient, + undefined, + messageHistoryRetriever, + ); + + const chunks: Array = + await askBedrockWithStreaming(adapter); + const lastChunk = chunks[chunks.length - 1]; + assert.ok(lastChunk.stopReason); + assert.deepStrictEqual(lastChunk.metrics, { latencyMs: 150 }); + assert.deepStrictEqual(lastChunk.usage, { + inputTokens: 10, + outputTokens: 20, + totalTokens: 30, + }); + }); + void it('throws if tool is duplicated', () => { assert.throws( () => @@ -1304,6 +1338,12 @@ const mockConverseStreamCommandOutput = ( stopReason: stopReason, }, }); + streamItems.push({ + metadata: { + metrics: { latencyMs: 150 }, + usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 }, + }, + }); return { $metadata: {}, stream: (async function* (): AsyncGenerator { diff --git a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.test.ts b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.test.ts index cedfbedf88a..c26eaac814c 100644 --- a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.test.ts +++ b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.test.ts @@ -300,6 +300,51 @@ void describe('Conversation turn response sender', () => { }); }); + void it('serializes metrics and usage to JSON when streaming', async () => { + const userAgentProvider = new UserAgentProvider( + {} as unknown as ConversationTurnEvent, + ); + mock.method(userAgentProvider, 'getUserAgent', () => ''); + const graphqlRequestExecutor = new GraphqlRequestExecutor( + '', + '', + userAgentProvider, + ); + const executeGraphqlMock = mock.method( + graphqlRequestExecutor, + 'executeGraphql', + () => Promise.resolve(), + ); + const sender = new ConversationTurnResponseSender( + event, + userAgentProvider, + graphqlRequestExecutor, + ); + const chunk: StreamingResponseChunk = { + accumulatedTurnContent: [{ text: 'testContent' }], + associatedUserMessageId: 'testAssociatedUserMessageId', + contentBlockIndex: 0, + conversationId: 'testConversationId', + stopReason: 'end_turn', + metrics: { latencyMs: 150 }, + usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 }, + }; + await sender.sendResponseChunk(chunk); + + assert.strictEqual(executeGraphqlMock.mock.calls.length, 1); + const request = executeGraphqlMock.mock.calls[0] + .arguments[0] as GraphqlRequest; + // metrics and usage should be serialized to JSON strings + assert.strictEqual( + (request.variables.input as Record).metrics, + JSON.stringify({ latencyMs: 150 }), + ); + assert.strictEqual( + (request.variables.input as Record).usage, + JSON.stringify({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }), + ); + }); + void it('sends errors response back to appsync', async () => { const userAgentProvider = new UserAgentProvider( {} as unknown as ConversationTurnEvent, From edb217ec69e8e282307f1e3c78640f3344f56c64 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Thu, 26 Mar 2026 13:46:15 +0100 Subject: [PATCH 09/12] chore: run prettier on changed files --- .../runtime/conversation_turn_response_sender.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts index 782a56b44ed..3042b50c7e3 100644 --- a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts +++ b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts @@ -126,12 +126,12 @@ export class ConversationTurnResponseSender { }; private createStreamingMutationRequest = (chunk: StreamingResponseChunk) => { - const query = ` - mutation PublishModelResponse($input: ${this.event.responseMutation.inputTypeName}!) { - ${this.event.responseMutation.name}(input: $input) { - ${this.event.responseMutation.selectionSet} - } - } + const query = ` + mutation PublishModelResponse($input: ${this.event.responseMutation.inputTypeName}!) { + ${this.event.responseMutation.name}(input: $input) { + ${this.event.responseMutation.selectionSet} + } + } `; const serializedChunk = { ...chunk, From e17ae289ec364706ad1bb35472170c3f5eea50c2 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Thu, 26 Mar 2026 13:53:55 +0100 Subject: [PATCH 10/12] chore: add changeset --- .changeset/spotty-dragons-hug.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/spotty-dragons-hug.md diff --git a/.changeset/spotty-dragons-hug.md b/.changeset/spotty-dragons-hug.md new file mode 100644 index 00000000000..22a8a53e314 --- /dev/null +++ b/.changeset/spotty-dragons-hug.md @@ -0,0 +1,5 @@ +--- +'@aws-amplify/ai-constructs': minor +--- + +Add metrics (latencyMs) and usage (inputTokens, outputTokens, totalTokens) to streaming conversation responses from Bedrock Converse API From cf2107deeaa16fbbae6af56ba2f729b1dfe631f1 Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Fri, 27 Mar 2026 11:58:35 +0100 Subject: [PATCH 11/12] fix: remove metrics/usage from message history selection set and add SerializedStreamingResponseChunk type --- .../runtime/bedrock_converse_adapter.ts | 3 +++ .../conversation_message_history_retriever.ts | 8 ------- .../conversation_turn_response_sender.ts | 22 ++++++++++++++----- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts index 694634de490..d4e8eac32d5 100644 --- a/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts +++ b/packages/ai-constructs/src/conversation/runtime/bedrock_converse_adapter.ts @@ -175,6 +175,9 @@ export class BedrockConverseAdapter { let blockIndex = 0; let lastBlockIndex = 0; let stopReason = ''; + // The following metadata´ are overwritten on each iteration of the tool-use loop. + // Only the final iteration's values are reported, as intermediate iterations + // are tool-use round-trips and the final iteration contains the actual model response. let latencyMs = 0; let inputTokens = 0; let outputTokens = 0; diff --git a/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts b/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts index c8dd4858f28..edb77f1c89f 100644 --- a/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts +++ b/packages/ai-constructs/src/conversation/runtime/conversation_message_history_retriever.ts @@ -92,14 +92,6 @@ const messageItemSelectionSet = ` toolUseId } } - metrics { - latencyMs - } - usage { - inputTokens - outputTokens - totalTokens - } `; /** diff --git a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts index 3042b50c7e3..eeed75ca847 100644 --- a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts +++ b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.ts @@ -15,8 +15,16 @@ export type MutationResponseInput = { }; }; +export type SerializedStreamingResponseChunk = Omit< + StreamingResponseChunk, + 'metrics' | 'usage' +> & { + metrics?: string; + usage?: string; +}; + export type MutationStreamingResponseInput = { - input: StreamingResponseChunk; + input: SerializedStreamingResponseChunk; }; export type MutationErrorsResponseInput = { @@ -133,17 +141,19 @@ export class ConversationTurnResponseSender { } } `; - const serializedChunk = { - ...chunk, + + const { metrics, usage, ...rest } = chunk; + const serializedChunk: SerializedStreamingResponseChunk = { + ...rest, accumulatedTurnContent: this.serializeContent( chunk.accumulatedTurnContent, ), - ...(chunk.metrics && { metrics: JSON.stringify(chunk.metrics) }), - ...(chunk.usage && { usage: JSON.stringify(chunk.usage) }), + ...(metrics && { metrics: JSON.stringify(metrics) }), + ...(usage && { usage: JSON.stringify(usage) }), }; const variables = { input: serializedChunk, - } as MutationStreamingResponseInput; + }; return { query, variables }; }; From 5c1bcbfbb22d9e07348e3f2dd65849aa219f727a Mon Sep 17 00:00:00 2001 From: Paul Jakob Kroker Date: Fri, 27 Mar 2026 11:59:01 +0100 Subject: [PATCH 12/12] test: verify non-stop-reason chunks exclude metrics and usage --- .../conversation_turn_response_sender.test.ts | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.test.ts b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.test.ts index c26eaac814c..5b62357c130 100644 --- a/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.test.ts +++ b/packages/ai-constructs/src/conversation/runtime/conversation_turn_response_sender.test.ts @@ -345,6 +345,44 @@ void describe('Conversation turn response sender', () => { ); }); + void it('does not include metrics and usage for non-stop-reason chunks', async () => { + const userAgentProvider = new UserAgentProvider( + {} as unknown as ConversationTurnEvent, + ); + mock.method(userAgentProvider, 'getUserAgent', () => ''); + const graphqlRequestExecutor = new GraphqlRequestExecutor( + '', + '', + userAgentProvider, + ); + const executeGraphqlMock = mock.method( + graphqlRequestExecutor, + 'executeGraphql', + () => Promise.resolve(), + ); + const sender = new ConversationTurnResponseSender( + event, + userAgentProvider, + graphqlRequestExecutor, + ); + const chunk: StreamingResponseChunk = { + accumulatedTurnContent: [{ text: 'testContent' }], + associatedUserMessageId: 'testAssociatedUserMessageId', + contentBlockIndex: 0, + contentBlockDeltaIndex: 0, + conversationId: 'testConversationId', + contentBlockText: 'testBlockText', + }; + await sender.sendResponseChunk(chunk); + + assert.strictEqual(executeGraphqlMock.mock.calls.length, 1); + const request = executeGraphqlMock.mock.calls[0] + .arguments[0] as GraphqlRequest; + const input = request.variables.input as Record; + assert.strictEqual(input.metrics, undefined); + assert.strictEqual(input.usage, undefined); + }); + void it('sends errors response back to appsync', async () => { const userAgentProvider = new UserAgentProvider( {} as unknown as ConversationTurnEvent,