Skip to content

Commit 3867c36

Browse files
authored
fix(instrumentations): include cache tokens in gen ai.usage.input tokens (#1028)
1 parent 9f2159e commit 3867c36

17 files changed

Lines changed: 1009 additions & 22 deletions

File tree

packages/instrumentation-anthropic/src/instrumentation.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -516,18 +516,21 @@ export class AnthropicInstrumentation extends InstrumentationBase {
516516
}
517517

518518
if (type === GEN_AI_OPERATION_NAME_VALUE_CHAT && result.usage) {
519+
// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
520+
// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
521+
const cacheRead = result.usage.cache_read_input_tokens ?? 0;
522+
const cacheCreation = result.usage.cache_creation_input_tokens ?? 0;
523+
const totalInputTokens =
524+
result.usage.input_tokens + cacheRead + cacheCreation;
519525
span.setAttribute(
520526
SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS,
521-
result.usage.input_tokens + result.usage.output_tokens,
527+
totalInputTokens + result.usage.output_tokens,
522528
);
523529
span.setAttribute(
524530
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
525531
result.usage.output_tokens,
526532
);
527-
span.setAttribute(
528-
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
529-
result.usage.input_tokens,
530-
);
533+
span.setAttribute(ATTR_GEN_AI_USAGE_INPUT_TOKENS, totalInputTokens);
531534

532535
// Cache token attributes (v1.40)
533536
if (result.usage.cache_creation_input_tokens != null) {

packages/instrumentation-anthropic/test/instrumentation.test.ts

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,12 @@ import {
4242
ATTR_GEN_AI_RESPONSE_MODEL,
4343
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
4444
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
45+
ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
46+
ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
4547
ATTR_GEN_AI_PROVIDER_NAME,
4648
ATTR_GEN_AI_OPERATION_NAME,
4749
ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
50+
GEN_AI_OPERATION_NAME_VALUE_CHAT,
4851
} from "@opentelemetry/semantic-conventions/incubating";
4952

5053
const memoryExporter = new InMemorySpanExporter();
@@ -343,3 +346,108 @@ describe("Test Anthropic instrumentation", async function () {
343346
assert.equal(+promptTokens + +completionTokens, totalTokens);
344347
}).timeout(30000);
345348
});
349+
350+
describe("Anthropic cache token fold-in semantics", () => {
351+
// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
352+
// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
353+
// These tests exercise _endSpan directly with synthetic Message objects.
354+
355+
const exporter = new InMemorySpanExporter();
356+
const provider = new NodeTracerProvider({
357+
spanProcessors: [new SimpleSpanProcessor(exporter)],
358+
});
359+
const instrumentation = new AnthropicInstrumentation();
360+
instrumentation.setTracerProvider(provider);
361+
362+
afterEach(() => exporter.reset());
363+
364+
const endSpanWithUsage = (usage: Record<string, unknown>) => {
365+
const span = (instrumentation as any).tracer.startSpan("chat test-model");
366+
(instrumentation as any)._endSpan({
367+
span,
368+
type: GEN_AI_OPERATION_NAME_VALUE_CHAT,
369+
result: {
370+
id: "msg_test",
371+
type: "message",
372+
model: "test-model",
373+
role: "assistant",
374+
stop_reason: "end_turn",
375+
stop_sequence: null,
376+
content: [],
377+
usage,
378+
},
379+
});
380+
const spans = exporter.getFinishedSpans();
381+
return spans[spans.length - 1];
382+
};
383+
384+
it("folds cache_read + cache_creation into input_tokens and total_tokens", () => {
385+
const span = endSpanWithUsage({
386+
input_tokens: 100,
387+
output_tokens: 50,
388+
cache_read_input_tokens: 900,
389+
cache_creation_input_tokens: 200,
390+
});
391+
assert.strictEqual(
392+
span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS],
393+
1200,
394+
"input_tokens should equal 100 + 900 + 200",
395+
);
396+
assert.strictEqual(
397+
span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
398+
1250,
399+
"total_tokens should equal summed input (1200) + output (50)",
400+
);
401+
assert.strictEqual(
402+
span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
403+
900,
404+
"cache_read should still be emitted separately",
405+
);
406+
assert.strictEqual(
407+
span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
408+
200,
409+
"cache_creation should still be emitted separately",
410+
);
411+
});
412+
413+
it("folds only cache_read when cache_creation is absent", () => {
414+
const span = endSpanWithUsage({
415+
input_tokens: 100,
416+
output_tokens: 50,
417+
cache_read_input_tokens: 900,
418+
});
419+
assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 1000);
420+
assert.strictEqual(
421+
span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
422+
1050,
423+
);
424+
assert.strictEqual(
425+
span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
426+
900,
427+
);
428+
assert.strictEqual(
429+
span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
430+
undefined,
431+
);
432+
});
433+
434+
it("leaves input_tokens unchanged when no cache fields present", () => {
435+
const span = endSpanWithUsage({
436+
input_tokens: 100,
437+
output_tokens: 50,
438+
});
439+
assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 100);
440+
assert.strictEqual(
441+
span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
442+
150,
443+
);
444+
assert.strictEqual(
445+
span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
446+
undefined,
447+
);
448+
assert.strictEqual(
449+
span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
450+
undefined,
451+
);
452+
});
453+
});

packages/instrumentation-bedrock/src/instrumentation.ts

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ import {
4848
ATTR_GEN_AI_RESPONSE_MODEL,
4949
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
5050
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
51+
ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
52+
ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
5153
GEN_AI_OPERATION_NAME_VALUE_CHAT,
5254
GEN_AI_OPERATION_NAME_VALUE_TEXT_COMPLETION,
5355
ATTR_GEN_AI_PROVIDER_NAME,
@@ -694,14 +696,36 @@ export class BedrockInstrumentation extends InstrumentationBase {
694696
],
695697
}
696698
: {}),
697-
// Anthropic new messages API returns usage on non-streaming response
699+
// Anthropic new messages API returns usage on non-streaming response.
700+
// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
701+
// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
698702
...(usage
699-
? {
700-
[ATTR_GEN_AI_USAGE_INPUT_TOKENS]: usage["input_tokens"],
701-
[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS]: usage["output_tokens"],
702-
[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS]:
703-
(usage["input_tokens"] || 0) + (usage["output_tokens"] || 0),
704-
}
703+
? (() => {
704+
const inputTokens = usage["input_tokens"] || 0;
705+
const outputTokens = usage["output_tokens"] || 0;
706+
const cacheRead = usage["cache_read_input_tokens"] || 0;
707+
const cacheCreation = usage["cache_creation_input_tokens"] || 0;
708+
const totalInputTokens =
709+
inputTokens + cacheRead + cacheCreation;
710+
return {
711+
[ATTR_GEN_AI_USAGE_INPUT_TOKENS]: totalInputTokens,
712+
[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS]: usage["output_tokens"],
713+
[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS]:
714+
totalInputTokens + outputTokens,
715+
...(usage["cache_read_input_tokens"] != null
716+
? {
717+
[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS]:
718+
usage["cache_read_input_tokens"],
719+
}
720+
: {}),
721+
...(usage["cache_creation_input_tokens"] != null
722+
? {
723+
[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS]:
724+
usage["cache_creation_input_tokens"],
725+
}
726+
: {}),
727+
};
728+
})()
705729
: {}),
706730
};
707731

packages/instrumentation-bedrock/tests/anthropic.test.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ import {
3434
ATTR_GEN_AI_REQUEST_TOP_P,
3535
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
3636
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
37+
ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
38+
ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
3739
// OTel 1.40 new attributes
3840
ATTR_GEN_AI_PROVIDER_NAME,
3941
GEN_AI_PROVIDER_NAME_VALUE_AWS_BEDROCK,
@@ -126,6 +128,69 @@ describe("Test Anthropic with AWS Bedrock Instrumentation", () => {
126128
context.disable();
127129
});
128130

131+
it("should set cache tokens in span for Anthropic messages API with cached tokens", async function () {
132+
const { server } = this.polly as Polly;
133+
const modelId = "anthropic.claude-3-5-sonnet-20241022-v2-0";
134+
server
135+
.post(
136+
`https://bedrock-runtime.us-east-1.amazonaws.com/model/${modelId}/invoke`,
137+
)
138+
.intercept((_req, res) => {
139+
res.status(200).json({
140+
id: "msg_cache_test",
141+
type: "message",
142+
role: "assistant",
143+
content: [{ type: "text", text: "North, South, East, West." }],
144+
model: modelId,
145+
stop_reason: "end_turn",
146+
stop_sequence: null,
147+
usage: {
148+
input_tokens: 10,
149+
cache_creation_input_tokens: 8,
150+
cache_read_input_tokens: 5,
151+
output_tokens: 7,
152+
},
153+
});
154+
});
155+
156+
const input = {
157+
modelId,
158+
contentType: "application/json",
159+
accept: "application/json",
160+
body: JSON.stringify({
161+
anthropic_version: "bedrock-2023-05-31",
162+
max_tokens: 300,
163+
messages: [
164+
{ role: "user", content: "What are the 4 cardinal directions?" },
165+
],
166+
}),
167+
};
168+
169+
const command = new bedrock.InvokeModelCommand(input);
170+
await bedrockRuntimeClient.send(command);
171+
172+
const spans = memoryExporter.getFinishedSpans();
173+
const attributes = spans[0].attributes;
174+
175+
// Per OTel GenAI semconv (subset semantics), input_tokens includes
176+
// cache_read + cache_creation. Raw response: input=10, cache_read=5,
177+
// cache_creation=8 → summed input_tokens = 23.
178+
assert.strictEqual(attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 23);
179+
assert.strictEqual(attributes[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS], 7);
180+
assert.strictEqual(
181+
attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
182+
30,
183+
);
184+
assert.strictEqual(
185+
attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
186+
5,
187+
);
188+
assert.strictEqual(
189+
attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
190+
8,
191+
);
192+
});
193+
129194
it("should set request and response attributes in span for given prompt", async () => {
130195
const prompt = `What are the 4 cardinal directions?`;
131196
const params = {
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Copyright Traceloop
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import * as assert from "assert";
18+
import { BedrockInstrumentation } from "../src/instrumentation";
19+
import { BedrockVendor } from "../src/types";
20+
import { SpanAttributes } from "@traceloop/ai-semantic-conventions";
21+
import {
22+
ATTR_GEN_AI_USAGE_INPUT_TOKENS,
23+
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
24+
ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
25+
ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
26+
} from "@opentelemetry/semantic-conventions/incubating";
27+
28+
// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
29+
// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
30+
// These tests exercise the Anthropic-on-Bedrock response handler directly.
31+
32+
describe("Bedrock Anthropic cache token fold-in semantics", () => {
33+
const instrumentation = new BedrockInstrumentation();
34+
35+
const setResponseAttrs = (usage: Record<string, unknown>) =>
36+
(instrumentation as any)._setResponseAttributes(
37+
BedrockVendor.ANTHROPIC,
38+
{
39+
stop_reason: "end_turn",
40+
usage,
41+
content: [],
42+
},
43+
false,
44+
);
45+
46+
it("folds cache_read + cache_creation into input_tokens and total_tokens", () => {
47+
const attrs = setResponseAttrs({
48+
input_tokens: 100,
49+
output_tokens: 50,
50+
cache_read_input_tokens: 900,
51+
cache_creation_input_tokens: 200,
52+
});
53+
assert.strictEqual(
54+
attrs[ATTR_GEN_AI_USAGE_INPUT_TOKENS],
55+
1200,
56+
"input_tokens should equal 100 + 900 + 200",
57+
);
58+
assert.strictEqual(
59+
attrs[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
60+
1250,
61+
"total_tokens should equal summed input (1200) + output (50)",
62+
);
63+
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS], 900);
64+
assert.strictEqual(
65+
attrs[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
66+
200,
67+
);
68+
});
69+
70+
it("folds only cache_read when cache_creation is absent", () => {
71+
const attrs = setResponseAttrs({
72+
input_tokens: 100,
73+
output_tokens: 50,
74+
cache_read_input_tokens: 900,
75+
});
76+
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 1000);
77+
assert.strictEqual(attrs[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS], 1050);
78+
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS], 900);
79+
assert.strictEqual(
80+
attrs[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
81+
undefined,
82+
);
83+
});
84+
85+
it("leaves input_tokens unchanged when no cache fields present", () => {
86+
const attrs = setResponseAttrs({
87+
input_tokens: 100,
88+
output_tokens: 50,
89+
});
90+
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 100);
91+
assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS], 50);
92+
assert.strictEqual(attrs[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS], 150);
93+
assert.strictEqual(
94+
attrs[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
95+
undefined,
96+
);
97+
});
98+
});

0 commit comments

Comments
 (0)