diff --git a/packages/instrumentation-anthropic/src/instrumentation.ts b/packages/instrumentation-anthropic/src/instrumentation.ts
index a79691fc..7012cae0 100644
--- a/packages/instrumentation-anthropic/src/instrumentation.ts
+++ b/packages/instrumentation-anthropic/src/instrumentation.ts
@@ -516,18 +516,21 @@ export class AnthropicInstrumentation extends InstrumentationBase {
       }
 
       if (type === GEN_AI_OPERATION_NAME_VALUE_CHAT && result.usage) {
+        // Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
+        // SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
+        const cacheRead = result.usage.cache_read_input_tokens ?? 0;
+        const cacheCreation = result.usage.cache_creation_input_tokens ?? 0;
+        const totalInputTokens =
+          result.usage.input_tokens + cacheRead + cacheCreation;
         span.setAttribute(
           SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS,
-          result.usage.input_tokens + result.usage.output_tokens,
+          totalInputTokens + result.usage.output_tokens,
         );
         span.setAttribute(
           ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
           result.usage.output_tokens,
         );
-        span.setAttribute(
-          ATTR_GEN_AI_USAGE_INPUT_TOKENS,
-          result.usage.input_tokens,
-        );
+        span.setAttribute(ATTR_GEN_AI_USAGE_INPUT_TOKENS, totalInputTokens);
 
         // Cache token attributes (v1.40)
         if (result.usage.cache_creation_input_tokens != null) {
diff --git a/packages/instrumentation-anthropic/test/instrumentation.test.ts b/packages/instrumentation-anthropic/test/instrumentation.test.ts
index 3677b2db..c2d7934c 100644
--- a/packages/instrumentation-anthropic/test/instrumentation.test.ts
+++ b/packages/instrumentation-anthropic/test/instrumentation.test.ts
@@ -42,9 +42,12 @@ import {
   ATTR_GEN_AI_RESPONSE_MODEL,
   ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
   ATTR_GEN_AI_USAGE_INPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
   ATTR_GEN_AI_PROVIDER_NAME,
   ATTR_GEN_AI_OPERATION_NAME,
   ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
+  GEN_AI_OPERATION_NAME_VALUE_CHAT,
 } from "@opentelemetry/semantic-conventions/incubating";
 
 const memoryExporter = new InMemorySpanExporter();
@@ -343,3 +346,108 @@ describe("Test Anthropic instrumentation", async function () {
     assert.equal(+promptTokens + +completionTokens, totalTokens);
   }).timeout(30000);
 });
+
+describe("Anthropic cache token fold-in semantics", () => {
+  // Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
+  // SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
+  // These tests exercise _endSpan directly with synthetic Message objects.
+
+  const exporter = new InMemorySpanExporter();
+  const provider = new NodeTracerProvider({
+    spanProcessors: [new SimpleSpanProcessor(exporter)],
+  });
+  const instrumentation = new AnthropicInstrumentation();
+  instrumentation.setTracerProvider(provider);
+
+  afterEach(() => exporter.reset());
+
+  const endSpanWithUsage = (usage: Record<string, unknown>) => {
+    const span = (instrumentation as any).tracer.startSpan("chat test-model");
+    (instrumentation as any)._endSpan({
+      span,
+      type: GEN_AI_OPERATION_NAME_VALUE_CHAT,
+      result: {
+        id: "msg_test",
+        type: "message",
+        model: "test-model",
+        role: "assistant",
+        stop_reason: "end_turn",
+        stop_sequence: null,
+        content: [],
+        usage,
+      },
+    });
+    const spans = exporter.getFinishedSpans();
+    return spans[spans.length - 1];
+  };
+
+  it("folds cache_read + cache_creation into input_tokens and total_tokens", () => {
+    const span = endSpanWithUsage({
+      input_tokens: 100,
+      output_tokens: 50,
+      cache_read_input_tokens: 900,
+      cache_creation_input_tokens: 200,
+    });
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS],
+      1200,
+      "input_tokens should equal 100 + 900 + 200",
+    );
+    assert.strictEqual(
+      span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
+      1250,
+      "total_tokens should equal summed input (1200) + output (50)",
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      900,
+      "cache_read should still be emitted separately",
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
+      200,
+      "cache_creation should still be emitted separately",
+    );
+  });
+
+  it("folds only cache_read when cache_creation is absent", () => {
+    const span = endSpanWithUsage({
+      input_tokens: 100,
+      output_tokens: 50,
+      cache_read_input_tokens: 900,
+    });
+    assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 1000);
+    assert.strictEqual(
+      span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
+      1050,
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      900,
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
+      undefined,
+    );
+  });
+
+  it("leaves input_tokens unchanged when no cache fields present", () => {
+    const span = endSpanWithUsage({
+      input_tokens: 100,
+      output_tokens: 50,
+    });
+    assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 100);
+    assert.strictEqual(
+      span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
+      150,
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      undefined,
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
+      undefined,
+    );
+  });
+});
diff --git a/packages/instrumentation-bedrock/src/instrumentation.ts b/packages/instrumentation-bedrock/src/instrumentation.ts
index fee5537c..dcea3fa6 100644
--- a/packages/instrumentation-bedrock/src/instrumentation.ts
+++ b/packages/instrumentation-bedrock/src/instrumentation.ts
@@ -48,6 +48,8 @@ import {
   ATTR_GEN_AI_RESPONSE_MODEL,
   ATTR_GEN_AI_USAGE_INPUT_TOKENS,
   ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
   GEN_AI_OPERATION_NAME_VALUE_CHAT,
   GEN_AI_OPERATION_NAME_VALUE_TEXT_COMPLETION,
   ATTR_GEN_AI_PROVIDER_NAME,
@@ -694,14 +696,36 @@ export class BedrockInstrumentation extends InstrumentationBase {
                 ],
               }
             : {}),
-          // Anthropic new messages API returns usage on non-streaming response
+          // Anthropic new messages API returns usage on non-streaming response.
+          // Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
+          // SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
           ...(usage
-            ? {
-                [ATTR_GEN_AI_USAGE_INPUT_TOKENS]: usage["input_tokens"],
-                [ATTR_GEN_AI_USAGE_OUTPUT_TOKENS]: usage["output_tokens"],
-                [SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS]:
-                  (usage["input_tokens"] || 0) + (usage["output_tokens"] || 0),
-              }
+            ? (() => {
+                const inputTokens = usage["input_tokens"] || 0;
+                const outputTokens = usage["output_tokens"] || 0;
+                const cacheRead = usage["cache_read_input_tokens"] || 0;
+                const cacheCreation = usage["cache_creation_input_tokens"] || 0;
+                const totalInputTokens =
+                  inputTokens + cacheRead + cacheCreation;
+                return {
+                  [ATTR_GEN_AI_USAGE_INPUT_TOKENS]: totalInputTokens,
+                  [ATTR_GEN_AI_USAGE_OUTPUT_TOKENS]: usage["output_tokens"],
+                  [SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS]:
+                    totalInputTokens + outputTokens,
+                  ...(usage["cache_read_input_tokens"]
+                    ? {
+                        [ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS]:
+                          usage["cache_read_input_tokens"],
+                      }
+                    : {}),
+                  ...(usage["cache_creation_input_tokens"]
+                    ? {
+                        [ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS]:
+                          usage["cache_creation_input_tokens"],
+                      }
+                    : {}),
+                };
+              })()
             : {}),
         };
 
diff --git a/packages/instrumentation-bedrock/tests/anthropic.test.ts b/packages/instrumentation-bedrock/tests/anthropic.test.ts
index ddb36f1d..744a7b35 100644
--- a/packages/instrumentation-bedrock/tests/anthropic.test.ts
+++ b/packages/instrumentation-bedrock/tests/anthropic.test.ts
@@ -34,6 +34,8 @@ import {
   ATTR_GEN_AI_REQUEST_TOP_P,
   ATTR_GEN_AI_USAGE_INPUT_TOKENS,
   ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
   // OTel 1.40 new attributes
   ATTR_GEN_AI_PROVIDER_NAME,
   GEN_AI_PROVIDER_NAME_VALUE_AWS_BEDROCK,
@@ -126,6 +128,69 @@ describe("Test Anthropic with AWS Bedrock Instrumentation", () => {
     context.disable();
   });
 
+  it("should set cache tokens in span for Anthropic messages API with cached tokens", async function () {
+    const { server } = this.polly as Polly;
+    const modelId = "anthropic.claude-3-5-sonnet-20241022-v2-0";
+    server
+      .post(
+        `https://bedrock-runtime.us-east-1.amazonaws.com/model/${modelId}/invoke`,
+      )
+      .intercept((_req, res) => {
+        res.status(200).json({
+          id: "msg_cache_test",
+          type: "message",
+          role: "assistant",
+          content: [{ type: "text", text: "North, South, East, West." }],
+          model: modelId,
+          stop_reason: "end_turn",
+          stop_sequence: null,
+          usage: {
+            input_tokens: 10,
+            cache_creation_input_tokens: 8,
+            cache_read_input_tokens: 5,
+            output_tokens: 7,
+          },
+        });
+      });
+
+    const input = {
+      modelId,
+      contentType: "application/json",
+      accept: "application/json",
+      body: JSON.stringify({
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 300,
+        messages: [
+          { role: "user", content: "What are the 4 cardinal directions?" },
+        ],
+      }),
+    };
+
+    const command = new bedrock.InvokeModelCommand(input);
+    await bedrockRuntimeClient.send(command);
+
+    const spans = memoryExporter.getFinishedSpans();
+    const attributes = spans[0].attributes;
+
+    // Per OTel GenAI semconv (subset semantics), input_tokens includes
+    // cache_read + cache_creation. Raw response: input=10, cache_read=5,
+    // cache_creation=8 → summed input_tokens = 23.
+    assert.strictEqual(attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 23);
+    assert.strictEqual(attributes[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS], 7);
+    assert.strictEqual(
+      attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
+      30,
+    );
+    assert.strictEqual(
+      attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      5,
+    );
+    assert.strictEqual(
+      attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
+      8,
+    );
+  });
+
   it("should set request and response attributes in span for given prompt", async () => {
     const prompt = `What are the 4 cardinal directions?`;
     const params = {
diff --git a/packages/instrumentation-bedrock/tests/cache-token-fold-in.test.ts b/packages/instrumentation-bedrock/tests/cache-token-fold-in.test.ts
new file mode 100644
index 00000000..1c4e16f3
--- /dev/null
+++ b/packages/instrumentation-bedrock/tests/cache-token-fold-in.test.ts
@@ -0,0 +1,98 @@
+/*
+ * Copyright Traceloop
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import * as assert from "assert";
+import { BedrockInstrumentation } from "../src/instrumentation";
+import { BedrockVendor } from "../src/types";
+import { SpanAttributes } from "@traceloop/ai-semantic-conventions";
+import {
+  ATTR_GEN_AI_USAGE_INPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+} from "@opentelemetry/semantic-conventions/incubating";
+
+// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
+// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
+// These tests exercise the Anthropic-on-Bedrock response handler directly.
+
+describe("Bedrock Anthropic cache token fold-in semantics", () => {
+  const instrumentation = new BedrockInstrumentation();
+
+  const setResponseAttrs = (usage: Record<string, unknown>) =>
+    (instrumentation as any)._setResponseAttributes(
+      BedrockVendor.ANTHROPIC,
+      {
+        stop_reason: "end_turn",
+        usage,
+        content: [],
+      },
+      false,
+    );
+
+  it("folds cache_read + cache_creation into input_tokens and total_tokens", () => {
+    const attrs = setResponseAttrs({
+      input_tokens: 100,
+      output_tokens: 50,
+      cache_read_input_tokens: 900,
+      cache_creation_input_tokens: 200,
+    });
+    assert.strictEqual(
+      attrs[ATTR_GEN_AI_USAGE_INPUT_TOKENS],
+      1200,
+      "input_tokens should equal 100 + 900 + 200",
+    );
+    assert.strictEqual(
+      attrs[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
+      1250,
+      "total_tokens should equal summed input (1200) + output (50)",
+    );
+    assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS], 900);
+    assert.strictEqual(
+      attrs[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
+      200,
+    );
+  });
+
+  it("folds only cache_read when cache_creation is absent", () => {
+    const attrs = setResponseAttrs({
+      input_tokens: 100,
+      output_tokens: 50,
+      cache_read_input_tokens: 900,
+    });
+    assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 1000);
+    assert.strictEqual(attrs[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS], 1050);
+    assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS], 900);
+    assert.strictEqual(
+      attrs[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
+      undefined,
+    );
+  });
+
+  it("leaves input_tokens unchanged when no cache fields present", () => {
+    const attrs = setResponseAttrs({
+      input_tokens: 100,
+      output_tokens: 50,
+    });
+    assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 100);
+    assert.strictEqual(attrs[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS], 50);
+    assert.strictEqual(attrs[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS], 150);
+    assert.strictEqual(
+      attrs[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      undefined,
+    );
+  });
+});
diff --git a/packages/instrumentation-langchain/src/callback_handler.ts b/packages/instrumentation-langchain/src/callback_handler.ts
index 39db59fd..d77baab4 100644
--- a/packages/instrumentation-langchain/src/callback_handler.ts
+++ b/packages/instrumentation-langchain/src/callback_handler.ts
@@ -30,6 +30,8 @@ import {
   ATTR_GEN_AI_RESPONSE_MODEL,
   ATTR_GEN_AI_USAGE_INPUT_TOKENS,
   ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
   ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
   ATTR_GEN_AI_RESPONSE_ID,
   ATTR_GEN_AI_AGENT_NAME,
@@ -284,8 +286,52 @@ export class TraceloopCallbackHandler extends BaseCallbackHandler {
       );
     }
 
-    // Add usage metrics if available
-    if (output.llmOutput?.usage) {
+    // Prefer the per-message `usage_metadata` shape — this is langchain-core's
+    // canonical UsageMetadata type, carrying cache token breakdowns in
+    // `input_token_details`. Per the contract, `input_tokens` is already the
+    // sum of all input token types (subset semantics), matching OTel GenAI.
+    const usageMetadata = this.extractUsageMetadataFromGenerations(output);
+    if (usageMetadata) {
+      if (typeof usageMetadata.input_tokens === "number") {
+        span.setAttribute(
+          ATTR_GEN_AI_USAGE_INPUT_TOKENS,
+          usageMetadata.input_tokens,
+        );
+      }
+      if (typeof usageMetadata.output_tokens === "number") {
+        span.setAttribute(
+          ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
+          usageMetadata.output_tokens,
+        );
+      }
+      if (typeof usageMetadata.total_tokens === "number") {
+        span.setAttribute(
+          SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS,
+          usageMetadata.total_tokens,
+        );
+      } else if (
+        typeof usageMetadata.input_tokens === "number" &&
+        typeof usageMetadata.output_tokens === "number"
+      ) {
+        span.setAttribute(
+          SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS,
+          usageMetadata.input_tokens + usageMetadata.output_tokens,
+        );
+      }
+      const details = usageMetadata.input_token_details;
+      if (details && typeof details.cache_read === "number") {
+        span.setAttribute(
+          ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+          details.cache_read,
+        );
+      }
+      if (details && typeof details.cache_creation === "number") {
+        span.setAttribute(
+          ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+          details.cache_creation,
+        );
+      }
+    } else if (output.llmOutput?.usage) {
       const usage = output.llmOutput.usage;
       if (usage.input_tokens || usage.input_tokens === 0) {
         span.setAttribute(ATTR_GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens);
@@ -305,8 +351,9 @@ export class TraceloopCallbackHandler extends BaseCallbackHandler {
       }
     }
 
-    // Also check for tokenUsage format (for compatibility)
-    if (output.llmOutput?.tokenUsage) {
+    // Also check for tokenUsage format (for compatibility).
+    // Skip when usage_metadata already populated the values.
+    if (!usageMetadata && output.llmOutput?.tokenUsage) {
       const usage = output.llmOutput.tokenUsage;
       if (usage.promptTokens || usage.promptTokens === 0) {
         span.setAttribute(ATTR_GEN_AI_USAGE_INPUT_TOKENS, usage.promptTokens);
@@ -533,6 +580,28 @@ export class TraceloopCallbackHandler extends BaseCallbackHandler {
     return model && typeof model === "string" ? model : undefined;
   }
 
+  // langchain-core's UsageMetadata lives on each AIMessage (generation.message).
+  // Returns the first non-empty one found across all generations.
+  private extractUsageMetadataFromGenerations(output: LLMResult): {
+    input_tokens?: number;
+    output_tokens?: number;
+    total_tokens?: number;
+    input_token_details?: { cache_read?: number; cache_creation?: number };
+  } | null {
+    if (!output.generations) return null;
+    for (const group of output.generations) {
+      if (!group) continue;
+      for (const gen of group) {
+        const message = (gen as any)?.message;
+        const usageMetadata = message?.usage_metadata;
+        if (usageMetadata && typeof usageMetadata === "object") {
+          return usageMetadata;
+        }
+      }
+    }
+    return null;
+  }
+
   private extractResponseId(output: LLMResult): string | null {
     // Providers may expose response ID in llmOutput (e.g., OpenAI's chatcmpl-xxx)
     if (output.llmOutput) {
diff --git a/packages/instrumentation-langchain/test/cache-token-fold-in.test.ts b/packages/instrumentation-langchain/test/cache-token-fold-in.test.ts
new file mode 100644
index 00000000..909db6fb
--- /dev/null
+++ b/packages/instrumentation-langchain/test/cache-token-fold-in.test.ts
@@ -0,0 +1,205 @@
+/*
+ * Copyright Traceloop
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import * as assert from "assert";
+import {
+  NodeTracerProvider,
+  InMemorySpanExporter,
+  SimpleSpanProcessor,
+} from "@opentelemetry/sdk-trace-node";
+import { AIMessage } from "@langchain/core/messages";
+import { LLMResult } from "@langchain/core/outputs";
+import { Serialized } from "@langchain/core/load/serializable";
+import { SpanAttributes } from "@traceloop/ai-semantic-conventions";
+import {
+  ATTR_GEN_AI_USAGE_INPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+} from "@opentelemetry/semantic-conventions/incubating";
+
+import { TraceloopCallbackHandler } from "../src/callback_handler";
+
+// Per OTel GenAI semconv, cache_read.input_tokens and cache_creation.input_tokens
+// SHOULD be included in gen_ai.usage.input_tokens (subset semantics).
+//
+// langchain-core's UsageMetadata contract documents input_tokens as
+// "Sum of all input token types" — so it is already subset-correct.
+// The handler must (1) emit cache_read / cache_creation attrs when present in
+// usage_metadata.input_token_details, and (2) not double-count.
+
+describe("LangChain cache token emission from usage_metadata", () => {
+  const exporter = new InMemorySpanExporter();
+  const provider = new NodeTracerProvider({
+    spanProcessors: [new SimpleSpanProcessor(exporter)],
+  });
+  const tracer = provider.getTracer("test");
+  const handler = new TraceloopCallbackHandler(tracer, true);
+
+  afterEach(() => exporter.reset());
+
+  const serializedLLM: Serialized = {
+    lc: 1,
+    type: "constructor",
+    id: ["langchain", "chat_models", "anthropic", "ChatAnthropic"],
+    kwargs: {},
+  };
+
+  const runChat = async (output: LLMResult): Promise<void> => {
+    const runId = `run-${Math.random()}`;
+    await handler.handleChatModelStart(serializedLLM, [[]], runId, undefined, {
+      invocation_params: { model: "claude-3-5-sonnet" },
+    });
+    await handler.handleLLMEnd(output, runId);
+  };
+
+  const buildOutput = (usageMetadata: any): LLMResult => ({
+    generations: [
+      [
+        {
+          text: "hello",
+          message: new AIMessage({
+            content: "hello",
+            usage_metadata: usageMetadata,
+          }),
+          generationInfo: { finish_reason: "end_turn" },
+        } as any,
+      ],
+    ],
+  });
+
+  it("emits cache_read + cache_creation from usage_metadata.input_token_details, with input_tokens already a sum", async () => {
+    // Per langchain-core contract, input_tokens=1200 already includes 900 cache_read + 200 cache_creation
+    await runChat(
+      buildOutput({
+        input_tokens: 1200,
+        output_tokens: 50,
+        total_tokens: 1250,
+        input_token_details: {
+          cache_read: 900,
+          cache_creation: 200,
+        },
+      }),
+    );
+
+    const span = exporter.getFinishedSpans().at(-1);
+    assert.ok(span);
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS],
+      1200,
+      "input_tokens preserved from usage_metadata (already subset)",
+    );
+    assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS], 50);
+    assert.strictEqual(
+      span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
+      1250,
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      900,
+      "cache_read.input_tokens should be emitted",
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
+      200,
+      "cache_creation.input_tokens should be emitted",
+    );
+  });
+
+  it("emits only cache_read when cache_creation is absent", async () => {
+    await runChat(
+      buildOutput({
+        input_tokens: 1000,
+        output_tokens: 50,
+        total_tokens: 1050,
+        input_token_details: {
+          cache_read: 900,
+        },
+      }),
+    );
+
+    const span = exporter.getFinishedSpans().at(-1);
+    assert.ok(span);
+    assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 1000);
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      900,
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
+      undefined,
+    );
+  });
+
+  it("does not emit cache attributes when input_token_details is absent", async () => {
+    await runChat(
+      buildOutput({
+        input_tokens: 100,
+        output_tokens: 50,
+        total_tokens: 150,
+      }),
+    );
+
+    const span = exporter.getFinishedSpans().at(-1);
+    assert.ok(span);
+    assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 100);
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      undefined,
+    );
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS],
+      undefined,
+    );
+  });
+
+  it("falls back to llmOutput.tokenUsage when usage_metadata is absent (backwards compat)", async () => {
+    const runId = `run-${Math.random()}`;
+    await handler.handleChatModelStart(serializedLLM, [[]], runId, undefined, {
+      invocation_params: { model: "gpt-4" },
+    });
+    await handler.handleLLMEnd(
+      {
+        generations: [
+          [
+            {
+              text: "hello",
+              message: new AIMessage({ content: "hello" }),
+              generationInfo: { finish_reason: "stop" },
+            } as any,
+          ],
+        ],
+        llmOutput: {
+          tokenUsage: {
+            promptTokens: 1000,
+            completionTokens: 50,
+            totalTokens: 1050,
+          },
+        },
+      },
+      runId,
+    );
+
+    const span = exporter.getFinishedSpans().at(-1);
+    assert.ok(span);
+    assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_INPUT_TOKENS], 1000);
+    assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_OUTPUT_TOKENS], 50);
+    assert.strictEqual(
+      span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS],
+      1050,
+    );
+  });
+});
diff --git a/packages/instrumentation-openai/src/instrumentation.ts b/packages/instrumentation-openai/src/instrumentation.ts
index f3f8d500..d1f1e9e9 100644
--- a/packages/instrumentation-openai/src/instrumentation.ts
+++ b/packages/instrumentation-openai/src/instrumentation.ts
@@ -48,6 +48,7 @@ import {
   ATTR_GEN_AI_OUTPUT_MESSAGES,
   ATTR_GEN_AI_USAGE_INPUT_TOKENS,
   ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
   ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
   ATTR_GEN_AI_TOOL_DEFINITIONS,
   GEN_AI_OPERATION_NAME_VALUE_CHAT,
@@ -113,6 +114,9 @@ interface ResponsesResult {
     input_tokens?: number;
     output_tokens?: number;
     total_tokens?: number;
+    input_tokens_details?: {
+      cached_tokens?: number;
+    };
   };
 }
 import {
@@ -965,6 +969,13 @@ export class OpenAIInstrumentation extends InstrumentationBase {
             totalTokens,
           );
         }
+        const cachedTokens = result.usage.input_tokens_details?.cached_tokens;
+        if (cachedTokens) {
+          span.setAttribute(
+            ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+            cachedTokens,
+          );
+        }
       }
 
       const outputMessages = buildOpenAIResponsesOutputMessage(result);
diff --git a/packages/instrumentation-openai/test/instrumentation.test.ts b/packages/instrumentation-openai/test/instrumentation.test.ts
index 2310fa96..70c7f780 100644
--- a/packages/instrumentation-openai/test/instrumentation.test.ts
+++ b/packages/instrumentation-openai/test/instrumentation.test.ts
@@ -43,6 +43,7 @@ import {
   ATTR_GEN_AI_OUTPUT_MESSAGES,
   ATTR_GEN_AI_USAGE_INPUT_TOKENS,
   ATTR_GEN_AI_USAGE_OUTPUT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
   ATTR_GEN_AI_RESPONSE_FINISH_REASONS,
 } from "@opentelemetry/semantic-conventions/incubating";
 
@@ -232,6 +233,23 @@ describe("Test OpenAI instrumentation", async function () {
     ]);
   });
 
+  it("should set cache read input tokens in span for responses with cached tokens", async () => {
+    const result = await openai.responses.create({
+      model: "gpt-4o-mini",
+      input: "Tell me a joke about OpenTelemetry",
+    });
+
+    const spans = memoryExporter.getFinishedSpans();
+    const span = spans.find((s) => s.name.startsWith("chat "));
+
+    assert.ok(result);
+    assert.ok(span);
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      13,
+    );
+  });
+
   it("should set attributes in span for streaming chat", async () => {
     const stream = await openai.chat.completions.create({
       messages: [
diff --git a/packages/instrumentation-openai/test/recordings/Test-OpenAI-instrumentation_1770406427/should-set-cache-read-input-tokens-in-span-for-responses-with-cached-tokens_3930499540/recording.har b/packages/instrumentation-openai/test/recordings/Test-OpenAI-instrumentation_1770406427/should-set-cache-read-input-tokens-in-span-for-responses-with-cached-tokens_3930499540/recording.har
new file mode 100644
index 00000000..46bfe23f
--- /dev/null
+++ b/packages/instrumentation-openai/test/recordings/Test-OpenAI-instrumentation_1770406427/should-set-cache-read-input-tokens-in-span-for-responses-with-cached-tokens_3930499540/recording.har
@@ -0,0 +1,236 @@
+{
+  "log": {
+    "_recordingName": "Test OpenAI instrumentation/should set cache read input tokens in span for responses with cached tokens",
+    "creator": {
+      "comment": "persister:fs",
+      "name": "Polly.JS",
+      "version": "6.0.6"
+    },
+    "entries": [
+      {
+        "_id": "c94800faf5cc7c077d6bd96c5e4196b9",
+        "_order": 0,
+        "cache": {},
+        "request": {
+          "bodySize": 68,
+          "cookies": [],
+          "headers": [
+            {
+              "_fromType": "array",
+              "name": "accept",
+              "value": "application/json"
+            },
+            {
+              "_fromType": "array",
+              "name": "content-type",
+              "value": "application/json"
+            },
+            {
+              "_fromType": "array",
+              "name": "user-agent",
+              "value": "OpenAI/JS 6.32.0"
+            },
+            {
+              "_fromType": "array",
+              "name": "x-stainless-arch",
+              "value": "arm64"
+            },
+            {
+              "_fromType": "array",
+              "name": "x-stainless-lang",
+              "value": "js"
+            },
+            {
+              "_fromType": "array",
+              "name": "x-stainless-os",
+              "value": "MacOS"
+            },
+            {
+              "_fromType": "array",
+              "name": "x-stainless-package-version",
+              "value": "6.32.0"
+            },
+            {
+              "_fromType": "array",
+              "name": "x-stainless-retry-count",
+              "value": "0"
+            },
+            {
+              "_fromType": "array",
+              "name": "x-stainless-runtime",
+              "value": "node"
+            },
+            {
+              "_fromType": "array",
+              "name": "x-stainless-runtime-version",
+              "value": "v22.22.1"
+            },
+            {
+              "_fromType": "array",
+              "name": "content-length",
+              "value": "68"
+            },
+            {
+              "_fromType": "array",
+              "name": "accept-encoding",
+              "value": "gzip,deflate"
+            },
+            {
+              "name": "host",
+              "value": "api.openai.com"
+            }
+          ],
+          "headersSize": 603,
+          "httpVersion": "HTTP/1.1",
+          "method": "POST",
+          "postData": {
+            "mimeType": "application/json",
+            "params": [],
+            "text": "{\"model\":\"gpt-4o-mini\",\"input\":\"Tell me a joke about OpenTelemetry\"}"
+          },
+          "queryString": [],
+          "url": "https://api.openai.com/v1/responses"
+        },
+        "response": {
+          "bodySize": 683,
+          "content": {
+            "encoding": "base64",
+            "mimeType": "application/json",
+            "size": 683,
+            "text": "[\"H4sIAAAAAAAAE3VTTa/aMBD8K6kvvYCUEAqBS6X+gV4q9fCoosXZgB+O7dprWoT4713nAx5Pr7dkPLvrnRlfhWrEVngMrs43FVSrvCyxqhZYFnm+gqIqqqbIy3xZFZtFVW3k+suySIT1uizETNj9K0oaW1gTkDHpEQibGhgv1uvNZrUsinImAgHFwFxpO6eRKUzegzwdvI2G79GCDsiQ0lqZg9hehYMLeq5o8IzaOv6+zR7lzxOWM4HeW6abqPVMtB5/RzTyUjs0oOkitvlMKDOV1w0SKB0mvjKBfJSkeI0J6+BvbSO5SDXZEz4fkLW6lqAfLTrboObrHhzNl3beKaPmi3yxnOfreVGJgeAhjZhKhu5i+3IdrOjC4f9OLFbY5MmJTYm4BMBV1W4kypZb08VhqscQ4JBs+FBuaQ2hGeaNFdN++JeYAMZYglGEl18zoe3Bebsf/3rWVvw8XrJGNRkdMfvO8v5AjR2SZ3RyKttzDE5ZdNkfRcfEVD4D57SSffuvO7Mz31BCDJgpyl5joEzaqBvzmbIjmEZj338nyIPEnchsO7bR9oyfxI3v461OO0AIivflxRLowLMrqJ8cYm85W45zypnAp0wweFY2hnoKcZ2sGPzh3TtH3EQesT7h5UPcYxK1d5VjVHfYWX9hMVmBYM2Y5V76pN7QANvW+vtfiF0HfuzOGQ/QIl34Hqlvq/Ce6oD+rPj+pMaH0ULU1NttPU5rEnYuBS0mqJhsuwoe2UH/NZrfH/C8M/q9DSoJwhFqVOwSOuh3tDwwiRzJigGc0mBd/chHPgBumOijkWPSRaMC7HUfwNinky+gzNtXlV7vW+TxOFm4pHHzoJa3mXj3KBerd9Db+rsLd3re70ag78gyQZzEu86cZmiAgBvcbv8AWeFv5icFAAA=\"]"
+          },
+          "cookies": [
+            {
+              "domain": "api.openai.com",
+              "expires": "2026-05-28T10:58:34.000Z",
+              "httpOnly": true,
+              "name": "__cf_bm",
+              "path": "/",
+              "sameSite": "None",
+              "secure": true,
+              "value": "02CMVzrPPcxsao2auj7Q.td6y5xqLQct1DB7tKaLl4k-1779964112.2245798-1.0.1.1-_860cJD87skJAAopXuTNqG3gDJwgmToosZSk2A2M0buYkEKbkrrcoTevp7KzXDybXITcuQ1e_I4qK28SXaCjtL9YCEQ32342Qfs9dcXfV4a92acSpN1WqeSWqGV1k9lG"
+            }
+          ],
+          "headers": [
+            {
+              "name": "date",
+              "value": "Thu, 28 May 2026 10:28:34 GMT"
+            },
+            {
+              "name": "content-type",
+              "value": "application/json"
+            },
+            {
+              "name": "transfer-encoding",
+              "value": "chunked"
+            },
+            {
+              "name": "connection",
+              "value": "keep-alive"
+            },
+            {
+              "name": "cf-ray",
+              "value": "a02c92b568e47a08-TLV"
+            },
+            {
+              "name": "cf-cache-status",
+              "value": "DYNAMIC"
+            },
+            {
+              "name": "server",
+              "value": "cloudflare"
+            },
+            {
+              "name": "strict-transport-security",
+              "value": "max-age=31536000; includeSubDomains; preload"
+            },
+            {
+              "name": "x-content-type-options",
+              "value": "nosniff"
+            },
+            {
+              "name": "access-control-expose-headers",
+              "value": "X-Request-ID, CF-Ray, CF-Ray"
+            },
+            {
+              "name": "openai-organization",
+              "value": "traceloop"
+            },
+            {
+              "name": "openai-processing-ms",
+              "value": "1884"
+            },
+            {
+              "name": "openai-project",
+              "value": "proj_tzz1TbPPOXaf6j9tEkVUBIAa"
+            },
+            {
+              "name": "openai-version",
+              "value": "2020-10-01"
+            },
+            {
+              "name": "x-ratelimit-limit-requests",
+              "value": "30000"
+            },
+            {
+              "name": "x-ratelimit-limit-tokens",
+              "value": "150000000"
+            },
+            {
+              "name": "x-ratelimit-remaining-requests",
+              "value": "29999"
+            },
+            {
+              "name": "x-ratelimit-remaining-tokens",
+              "value": "149999965"
+            },
+            {
+              "name": "x-ratelimit-reset-requests",
+              "value": "2ms"
+            },
+            {
+              "name": "x-ratelimit-reset-tokens",
+              "value": "0s"
+            },
+            {
+              "name": "x-request-id",
+              "value": "req_d316447754b2412ea3db6327c8d438b3"
+            },
+            {
+              "_fromType": "array",
+              "name": "set-cookie",
+              "value": "__cf_bm=02CMVzrPPcxsao2auj7Q.td6y5xqLQct1DB7tKaLl4k-1779964112.2245798-1.0.1.1-_860cJD87skJAAopXuTNqG3gDJwgmToosZSk2A2M0buYkEKbkrrcoTevp7KzXDybXITcuQ1e_I4qK28SXaCjtL9YCEQ32342Qfs9dcXfV4a92acSpN1WqeSWqGV1k9lG; HttpOnly; SameSite=None; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 28 May 2026 10:58:34 GMT"
+            },
+            {
+              "name": "content-encoding",
+              "value": "gzip"
+            },
+            {
+              "name": "alt-svc",
+              "value": "h3=\":443\"; ma=86400"
+            }
+          ],
+          "headersSize": 1146,
+          "httpVersion": "HTTP/1.1",
+          "redirectURL": "",
+          "status": 200,
+          "statusText": "OK"
+        },
+        "startedDateTime": "2026-05-28T10:28:31.988Z",
+        "time": 2834,
+        "timings": {
+          "blocked": -1,
+          "connect": -1,
+          "dns": -1,
+          "receive": 0,
+          "send": 0,
+          "ssl": -1,
+          "wait": 2834
+        },
+        "_recordingName": "Test OpenAI instrumentation/should set cache read input tokens in span for responses with cached tokens"
+      }
+    ],
+    "pages": [],
+    "version": "1.2"
+  }
+}
diff --git a/packages/instrumentation-together/package.json b/packages/instrumentation-together/package.json
index eb97c0e1..7ffe39e0 100644
--- a/packages/instrumentation-together/package.json
+++ b/packages/instrumentation-together/package.json
@@ -40,7 +40,7 @@
     "@opentelemetry/api": "^1.9.0",
     "@opentelemetry/core": "^2.0.1",
     "@opentelemetry/instrumentation": "^0.219.0",
-    "@opentelemetry/semantic-conventions": "^1.38.0",
+    "@opentelemetry/semantic-conventions": "^1.40.0",
     "@traceloop/ai-semantic-conventions": "workspace:*",
     "js-tiktoken": "^1.0.20",
     "tslib": "^2.8.1"
diff --git a/packages/instrumentation-together/src/instrumentation.ts b/packages/instrumentation-together/src/instrumentation.ts
index e0ae58d4..b40e537f 100644
--- a/packages/instrumentation-together/src/instrumentation.ts
+++ b/packages/instrumentation-together/src/instrumentation.ts
@@ -36,6 +36,7 @@ import {
   ATTR_GEN_AI_SYSTEM,
   ATTR_GEN_AI_USAGE_COMPLETION_TOKENS,
   ATTR_GEN_AI_USAGE_PROMPT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
 } from "@opentelemetry/semantic-conventions/incubating";
 import { TogetherAIInstrumentationConfig } from "./types";
 import type { Completion } from "together-ai/resources";
@@ -521,6 +522,15 @@ export class TogetherInstrumentation extends InstrumentationBase {
           ATTR_GEN_AI_USAGE_PROMPT_TOKENS,
           result.usage?.prompt_tokens,
         );
+        const cachedTokens = (
+          result.usage as unknown as Record<string, unknown>
+        ).cached_tokens;
+        if (typeof cachedTokens === "number") {
+          span.setAttribute(
+            ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+            cachedTokens,
+          );
+        }
       }
 
       if (this._shouldSendPrompts()) {
diff --git a/packages/instrumentation-together/test/instrumentation.test.ts b/packages/instrumentation-together/test/instrumentation.test.ts
index bcc63a5d..28907188 100644
--- a/packages/instrumentation-together/test/instrumentation.test.ts
+++ b/packages/instrumentation-together/test/instrumentation.test.ts
@@ -37,6 +37,7 @@ import {
   ATTR_GEN_AI_PROMPT,
   ATTR_GEN_AI_USAGE_COMPLETION_TOKENS,
   ATTR_GEN_AI_USAGE_PROMPT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
 } from "@opentelemetry/semantic-conventions/incubating";
 
 const memoryExporter = new InMemorySpanExporter();
@@ -583,4 +584,52 @@ describe("Test Together instrumentation", async function () {
       { location: "Chicago, IL", unit: "fahrenheit" },
     );
   });
+
+  it("should set cache read input tokens in span when cached_tokens is present", async function () {
+    const { server } = this.polly as Polly;
+    server
+      .post("https://api.together.xyz/v1/chat/completions")
+      .intercept((_req, res) => {
+        res.status(200).json({
+          id: "together-cache-test-001",
+          object: "chat.completion",
+          created: 1700000000,
+          model: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+          choices: [
+            {
+              index: 0,
+              message: {
+                role: "assistant",
+                content: "North, South, East, West.",
+              },
+              finish_reason: "stop",
+            },
+          ],
+          usage: {
+            prompt_tokens: 20,
+            completion_tokens: 8,
+            total_tokens: 28,
+            cached_tokens: 12,
+          },
+        });
+      });
+
+    await together.chat.completions.create({
+      model: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+      messages: [
+        { role: "user", content: "What are the 4 cardinal directions?" },
+      ],
+    });
+
+    const spans = memoryExporter.getFinishedSpans();
+    const span = spans.find((s) => s.name === "together.chat");
+
+    assert.ok(span);
+    assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_PROMPT_TOKENS], 20);
+    assert.strictEqual(span.attributes[ATTR_GEN_AI_USAGE_COMPLETION_TOKENS], 8);
+    assert.strictEqual(
+      span.attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      12,
+    );
+  });
 });
diff --git a/packages/instrumentation-vertexai/package.json b/packages/instrumentation-vertexai/package.json
index 463d1304..f80cbe8a 100644
--- a/packages/instrumentation-vertexai/package.json
+++ b/packages/instrumentation-vertexai/package.json
@@ -40,7 +40,7 @@
     "@opentelemetry/api": "^1.9.0",
     "@opentelemetry/core": "^2.0.1",
     "@opentelemetry/instrumentation": "^0.219.0",
-    "@opentelemetry/semantic-conventions": "^1.38.0",
+    "@opentelemetry/semantic-conventions": "^1.40.0",
     "@traceloop/ai-semantic-conventions": "workspace:*",
     "google-gax": "^4.0.0",
     "tslib": "^2.8.1"
diff --git a/packages/instrumentation-vertexai/src/vertexai-instrumentation.ts b/packages/instrumentation-vertexai/src/vertexai-instrumentation.ts
index 22a432f0..0a8ecc37 100644
--- a/packages/instrumentation-vertexai/src/vertexai-instrumentation.ts
+++ b/packages/instrumentation-vertexai/src/vertexai-instrumentation.ts
@@ -43,6 +43,7 @@ import {
   ATTR_GEN_AI_SYSTEM,
   ATTR_GEN_AI_USAGE_COMPLETION_TOKENS,
   ATTR_GEN_AI_USAGE_PROMPT_TOKENS,
+  ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
 } from "@opentelemetry/semantic-conventions/incubating";
 import type * as vertexAI from "@google-cloud/vertexai";
 import { version } from "../package.json";
@@ -252,6 +253,12 @@ export class VertexAIInstrumentation extends InstrumentationBase {
           streamResponse.usageMetadata.promptTokenCount,
         );
 
+      if (streamResponse.usageMetadata?.cachedContentTokenCount)
+        span.setAttribute(
+          ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+          streamResponse.usageMetadata.cachedContentTokenCount,
+        );
+
       if (this._shouldSendPrompts()) {
         streamResponse.candidates?.forEach((candidate, index) => {
           if (candidate.finishReason)
diff --git a/packages/instrumentation-vertexai/tests/gemini.test.ts b/packages/instrumentation-vertexai/tests/gemini.test.ts
index 645a7478..032fd47a 100644
--- a/packages/instrumentation-vertexai/tests/gemini.test.ts
+++ b/packages/instrumentation-vertexai/tests/gemini.test.ts
@@ -24,6 +24,7 @@ import {
   SimpleSpanProcessor,
 } from "@opentelemetry/sdk-trace-base";
 import type * as vertexAiImport from "@google-cloud/vertexai";
+import { ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS } from "@opentelemetry/semantic-conventions/incubating";
 
 const memoryExporter = new InMemorySpanExporter();
 
@@ -155,3 +156,86 @@ describe.skip("Test Gemini GenerativeModel Instrumentation", () => {
     });
   });
 });
+
+describe("Test VertexAI cache token instrumentation", () => {
+  const cacheExporter = new InMemorySpanExporter();
+  const cacheProvider = new BasicTracerProvider({
+    spanProcessors: [new SimpleSpanProcessor(cacheExporter)],
+  });
+  let cacheInstrumentation: VertexAIInstrumentation;
+  let contextManager: AsyncHooksContextManager;
+
+  const mockResponse = {
+    candidates: [
+      {
+        content: {
+          role: "model",
+          parts: [{ text: "North, South, East, West." }],
+        },
+        finishReason: "STOP",
+      },
+    ],
+    usageMetadata: {
+      promptTokenCount: 10,
+      candidatesTokenCount: 7,
+      totalTokenCount: 17,
+      cachedContentTokenCount: 5,
+    },
+  };
+
+  class MockGenerativeModel {
+    model = "gemini-1.5-pro";
+    generationConfig = {};
+    async generateContent(_request: unknown) {
+      return { response: Promise.resolve(mockResponse) };
+    }
+    async generateContentStream(_request: unknown) {
+      return { response: Promise.resolve(mockResponse) };
+    }
+  }
+
+  before(() => {
+    cacheInstrumentation = new VertexAIInstrumentation();
+    cacheInstrumentation.setTracerProvider(cacheProvider);
+    cacheInstrumentation.manuallyInstrument({
+      GenerativeModel: MockGenerativeModel,
+    } as unknown as typeof vertexAiImport);
+  });
+
+  after(async () => {
+    await cacheProvider.forceFlush();
+    cacheInstrumentation.disable();
+  });
+
+  beforeEach(() => {
+    contextManager = new AsyncHooksContextManager().enable();
+    context.setGlobalContextManager(contextManager);
+  });
+
+  afterEach(() => {
+    cacheExporter.reset();
+    context.disable();
+  });
+
+  it("should set cache read input tokens in span when cachedContentTokenCount is present", async () => {
+    const model = new MockGenerativeModel();
+
+    await model.generateContent({
+      contents: [
+        {
+          role: "user",
+          parts: [{ text: "What are the 4 cardinal directions?" }],
+        },
+      ],
+    });
+
+    const spans = cacheExporter.getFinishedSpans();
+    assert.ok(spans.length > 0, "expected at least one span");
+    const attributes = spans[0].attributes;
+
+    assert.strictEqual(
+      attributes[ATTR_GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS],
+      5,
+    );
+  });
+});
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 6988441f..9dd62002 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -733,8 +733,8 @@ importers:
         specifier: ^0.219.0
         version: 0.219.0(@opentelemetry/api@1.9.0)(supports-color@10.0.0)
       '@opentelemetry/semantic-conventions':
-        specifier: ^1.38.0
-        version: 1.38.0
+        specifier: ^1.40.0
+        version: 1.40.0
       '@traceloop/ai-semantic-conventions':
         specifier: workspace:*
         version: link:../ai-semantic-conventions
@@ -794,8 +794,8 @@ importers:
         specifier: ^0.219.0
         version: 0.219.0(@opentelemetry/api@1.9.0)(supports-color@10.0.0)
       '@opentelemetry/semantic-conventions':
-        specifier: ^1.38.0
-        version: 1.38.0
+        specifier: ^1.40.0
+        version: 1.40.0
       '@traceloop/ai-semantic-conventions':
         specifier: workspace:*
         version: link:../ai-semantic-conventions