Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions packages/instrumentation-anthropic/src/instrumentation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -516,18 +516,21 @@ export class AnthropicInstrumentation extends InstrumentationBase {
}

if (type === GEN_AI_OPERATION_NAME_VALUE_CHAT && result.usage) {
// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
const cacheRead = result.usage.cache_read_input_tokens ?? 0;
const cacheCreation = result.usage.cache_creation_input_tokens ?? 0;
const totalInputTokens =
result.usage.input_tokens + cacheRead + cacheCreation;
span.setAttribute(
SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS,
result.usage.input_tokens + result.usage.output_tokens,
totalInputTokens + result.usage.output_tokens,
);
span.setAttribute(
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
result.usage.output_tokens,
);
span.setAttribute(
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
result.usage.input_tokens,
);
span.setAttribute(ATTR_GEN_AI_USAGE_INPUT_TOKENS, totalInputTokens);

// Cache token attributes (v1.40)
if (result.usage.cache_creation_input_tokens != null) {
Expand Down
108 changes: 108 additions & 0 deletions packages/instrumentation-anthropic/test/instrumentation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,12 @@ import {
ATTR_GEN_AI_RESPONSE_MODEL,
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
ATTR_GEN_AI_PROVIDER_NAME,
ATTR_GEN_AI_OPERATION_NAME,
ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
GEN_AI_OPERATION_NAME_VALUE_CHAT,
} from "@opentelemetry/semantic-conventions/incubating";

const memoryExporter = new InMemorySpanExporter();
Expand Down Expand Up @@ -343,3 +346,108 @@ describe("Test Anthropic instrumentation", async function () {
assert.equal(+promptTokens + +completionTokens, totalTokens);
}).timeout(30000);
});

describe("Anthropic cache token fold-in semantics", () => {
// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
// These tests exercise _endSpan directly with synthetic Message objects.

const exporter = new InMemorySpanExporter();
const provider = new NodeTracerProvider({
spanProcessors: [new SimpleSpanProcessor(exporter)],
});
const instrumentation = new AnthropicInstrumentation();
instrumentation.setTracerProvider(provider);

afterEach(() => exporter.reset());

const endSpanWithUsage = (usage: Record<string, unknown>) => {
const span = (instrumentation as any).tracer.startSpan("chat test-model");
(instrumentation as any)._endSpan({
span,
type: GEN_AI_OPERATION_NAME_VALUE_CHAT,
result: {
id: "msg_test",
type: "message",
model: "test-model",
role: "assistant",
stop_reason: "end_turn",
stop_sequence: null,
content: [],
usage,
},
});
const spans = exporter.getFinishedSpans();
return spans[spans.length - 1];
};

it("folds cache_read + cache_creation into input_tokens and total_tokens", () => {
const span = endSpanWithUsage({
input_tokens: 100,
output_tokens: 50,
cache_read_input_tokens: 900,
cache_creation_input_tokens: 200,
});
assert.strictEqual(
span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS],
1200,
"input_tokens should equal 100 + 900 + 200",
);
assert.strictEqual(
span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
1250,
"total_tokens should equal summed input (1200) + output (50)",
);
assert.strictEqual(
span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
900,
"cache_read should still be emitted separately",
);
assert.strictEqual(
span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
200,
"cache_creation should still be emitted separately",
);
});

it("folds only cache_read when cache_creation is absent", () => {
const span = endSpanWithUsage({
input_tokens: 100,
output_tokens: 50,
cache_read_input_tokens: 900,
});
assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 1000);
assert.strictEqual(
span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
1050,
);
assert.strictEqual(
span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
900,
);
assert.strictEqual(
span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
undefined,
);
});

it("leaves input_tokens unchanged when no cache fields present", () => {
const span = endSpanWithUsage({
input_tokens: 100,
output_tokens: 50,
});
assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 100);
assert.strictEqual(
span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
150,
);
assert.strictEqual(
span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
undefined,
);
assert.strictEqual(
span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
undefined,
);
});
});
38 changes: 31 additions & 7 deletions packages/instrumentation-bedrock/src/instrumentation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ import {
ATTR_GEN_AI_RESPONSE_MODEL,
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
GEN_AI_OPERATION_NAME_VALUE_CHAT,
GEN_AI_OPERATION_NAME_VALUE_TEXT_COMPLETION,
ATTR_GEN_AI_PROVIDER_NAME,
Expand Down Expand Up @@ -694,14 +696,36 @@ export class BedrockInstrumentation extends InstrumentationBase {
],
}
: {}),
// Anthropic new messages API returns usage on non-streaming response
// Anthropic new messages API returns usage on non-streaming response.
// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
...(usage
? {
[ATTR_GEN_AI_USAGE_INPUT_TOKENS]: usage["input_tokens"],
[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS]: usage["output_tokens"],
[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS]:
(usage["input_tokens"] || 0) + (usage["output_tokens"] || 0),
}
? (() => {
const inputTokens = usage["input_tokens"] || 0;
const outputTokens = usage["output_tokens"] || 0;
const cacheRead = usage["cache_read_input_tokens"] || 0;
const cacheCreation = usage["cache_creation_input_tokens"] || 0;
const totalInputTokens =
inputTokens + cacheRead + cacheCreation;
return {
[ATTR_GEN_AI_USAGE_INPUT_TOKENS]: totalInputTokens,
[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS]: usage["output_tokens"],
[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS]:
totalInputTokens + outputTokens,
...(usage["cache_read_input_tokens"]
? {
[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS]:
usage["cache_read_input_tokens"],
}
: {}),
...(usage["cache_creation_input_tokens"]
? {
[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS]:
usage["cache_creation_input_tokens"],
}
: {}),
};
})()
: {}),
};

Expand Down
65 changes: 65 additions & 0 deletions packages/instrumentation-bedrock/tests/anthropic.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ import {
ATTR_GEN_AI_REQUEST_TOP_P,
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
// OTel 1.40 new attributes
ATTR_GEN_AI_PROVIDER_NAME,
GEN_AI_PROVIDER_NAME_VALUE_AWS_BEDROCK,
Expand Down Expand Up @@ -126,6 +128,69 @@ describe("Test Anthropic with AWS Bedrock Instrumentation", () => {
context.disable();
});

it("should set cache tokens in span for Anthropic messages API with cached tokens", async function () {
const { server } = this.polly as Polly;
const modelId = "anthropic.claude-3-5-sonnet-20241022-v2-0";
server
.post(
`https://bedrock-runtime.us-east-1.amazonaws.com/model/${modelId}/invoke`,
)
.intercept((_req, res) => {
res.status(200).json({
id: "msg_cache_test",
type: "message",
role: "assistant",
content: [{ type: "text", text: "North, South, East, West." }],
model: modelId,
stop_reason: "end_turn",
stop_sequence: null,
usage: {
input_tokens: 10,
cache_creation_input_tokens: 8,
cache_read_input_tokens: 5,
output_tokens: 7,
},
});
});

const input = {
modelId,
contentType: "application/json",
accept: "application/json",
body: JSON.stringify({
anthropic_version: "bedrock-2023-05-31",
max_tokens: 300,
messages: [
{ role: "user", content: "What are the 4 cardinal directions?" },
],
}),
};

const command = new bedrock.InvokeModelCommand(input);
await bedrockRuntimeClient.send(command);

const spans = memoryExporter.getFinishedSpans();
const attributes = spans[0].attributes;

// Per OTel GenAI semconv (subset semantics), input_tokens includes
// cache_read + cache_creation. Raw response: input=10, cache_read=5,
// cache_creation=8 → summed input_tokens = 23.
assert.strictEqual(attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 23);
assert.strictEqual(attributes[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS], 7);
assert.strictEqual(
attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
30,
);
assert.strictEqual(
attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
5,
);
assert.strictEqual(
attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
8,
);
});

it("should set request and response attributes in span for given prompt", async () => {
const prompt = `What are the 4 cardinal directions?`;
const params = {
Expand Down
98 changes: 98 additions & 0 deletions packages/instrumentation-bedrock/tests/cache-token-fold-in.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright Traceloop
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import * as assert from "assert";
import { BedrockInstrumentation } from "../src/instrumentation";
import { BedrockVendor } from "../src/types";
import { SpanAttributes } from "@traceloop/ai-semantic-conventions";
import {
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
} from "@opentelemetry/semantic-conventions/incubating";

// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
// These tests exercise the Anthropic-on-Bedrock response handler directly.

describe("Bedrock Anthropic cache token fold-in semantics", () => {
const instrumentation = new BedrockInstrumentation();

const setResponseAttrs = (usage: Record<string, unknown>) =>
(instrumentation as any)._setResponseAttributes(
BedrockVendor.ANTHROPIC,
{
stop_reason: "end_turn",
usage,
content: [],
},
false,
);

it("folds cache_read + cache_creation into input_tokens and total_tokens", () => {
const attrs = setResponseAttrs({
input_tokens: 100,
output_tokens: 50,
cache_read_input_tokens: 900,
cache_creation_input_tokens: 200,
});
assert.strictEqual(
attrs[ATTR_GEN_AI_USAGE_INPUT_TOKENS],
1200,
"input_tokens should equal 100 + 900 + 200",
);
assert.strictEqual(
attrs[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
1250,
"total_tokens should equal summed input (1200) + output (50)",
);
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS], 900);
assert.strictEqual(
attrs[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
200,
);
});

it("folds only cache_read when cache_creation is absent", () => {
const attrs = setResponseAttrs({
input_tokens: 100,
output_tokens: 50,
cache_read_input_tokens: 900,
});
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 1000);
assert.strictEqual(attrs[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS], 1050);
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS], 900);
assert.strictEqual(
attrs[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
undefined,
);
});

it("leaves input_tokens unchanged when no cache fields present", () => {
const attrs = setResponseAttrs({
input_tokens: 100,
output_tokens: 50,
});
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 100);
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS], 50);
assert.strictEqual(attrs[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS], 150);
assert.strictEqual(
attrs[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
undefined,
);
});
});
Loading
Loading