Add new cached token gen ai attribute to Google gen AI instrumentation (#4313)

DylanRussell · web-flow · commit cf0662deea3b · 2026-04-23T13:42:25.000-04:00
* Add new cached token gen ai attribute

* Add changelog

* Address comment
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+-Add `gen_ai.usage.cache_read.input_tokens` attribute to capture cached tokens on spans/events when the experimental sem conv flag is set. ([#4313](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4313))
+
 ## Version 0.7b0 (2026-02-20)
 - Fix bug in how tokens are counted when using the streaming `generateContent` method.  ([#4152](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4152)).
 - Add `gen_ai.tool.definitions` attribute to `gen_ai.client.inference.operation.details` log event ([#4142](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4142)).
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/pyproject.toml b/instrumentation-genai/opentelemetry-instrumentation-google-genai/pyproject.toml
@@ -39,9 +39,9 @@ classifiers = [
   "Programming Language :: Python :: 3.14",
 ]
 dependencies = [
-  "opentelemetry-api ~=1.39",
-  "opentelemetry-instrumentation >=0.60b0, <2",
-  "opentelemetry-semantic-conventions >=0.60b0, <2",
+  "opentelemetry-api ~=1.40",
+  "opentelemetry-instrumentation >=0.61b0, <2",
+  "opentelemetry-semantic-conventions >=0.61b0, <2",
   "opentelemetry-util-genai >= 0.4b0.dev, <0.5b0",
 ]
 
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/flags.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/flags.py
@@ -15,7 +15,6 @@
 from os import environ
 from typing import Union
 
-from opentelemetry.instrumentation._semconv import _StabilityMode
 from opentelemetry.util.genai.environment_variables import (
     OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
 )
@@ -24,13 +23,13 @@
 
 
 def is_content_recording_enabled(
-    mode: _StabilityMode,
+    experimental_sem_convs_enabled: bool,
 ) -> Union[bool, ContentCapturingMode]:
-    if mode == _StabilityMode.DEFAULT:
-        capture_content = environ.get(
-            OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "false"
-        )
-        return capture_content.lower() == "true"
-    if mode == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL:
+    if experimental_sem_convs_enabled:
         return get_content_capturing_mode()
-    raise RuntimeError(f"{mode} mode not supported")
+    return (
+        environ.get(
+            OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "false"
+        ).lower()
+        == "true"
+    )
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py
@@ -524,12 +524,21 @@ def __init__(
         self._finish_reasons_set = set()
         self._error_type = None
         self._input_tokens = 0
+        self._cached_tokens = 0
         self._output_tokens = 0
-        self.sem_conv_opt_in_mode = _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
+        sem_conv_opt_in_mode = _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
             _OpenTelemetryStabilitySignalType.GEN_AI
         )
+        if sem_conv_opt_in_mode not in {
+            _StabilityMode.DEFAULT,
+            _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL,
+        }:
+            raise ValueError(f"{sem_conv_opt_in_mode} mode not supported")
+        self.experimental_sem_convs_enabled = (
+            sem_conv_opt_in_mode == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
+        )
         self._content_recording_enabled = is_content_recording_enabled(
-            self.sem_conv_opt_in_mode
+            self.experimental_sem_convs_enabled
         )
         self._response_index = 0
         self._candidate_index = 0
@@ -621,6 +630,11 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse):
         output_tokens = _get_response_property(
             response, "usage_metadata.candidates_token_count"
         )
+        cached_tokens = _get_response_property(
+            response, "usage_metadata.cached_content_token_count"
+        )
+        if cached_tokens and isinstance(cached_tokens, int):
+            self._cached_tokens = cached_tokens
         if input_tokens and isinstance(input_tokens, int):
             self._input_tokens = input_tokens
         if output_tokens and isinstance(output_tokens, int):
@@ -654,10 +668,7 @@ def _maybe_update_error_type(self, response: GenerateContentResponse):
         self._error_type = f"BLOCKED_{block_reason}"
 
     def _maybe_get_tool_definitions(self, config) -> list[ToolDefinition]:
-        if (
-            self.sem_conv_opt_in_mode
-            != _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
-        ):
+        if not self.experimental_sem_convs_enabled:
             return []
 
         if tools := _config_to_tools(config):
@@ -669,10 +680,7 @@ def _maybe_get_tool_definitions(self, config) -> list[ToolDefinition]:
     async def _maybe_get_tool_definitions_async(
         self, config
     ) -> list[ToolDefinition]:
-        if (
-            self.sem_conv_opt_in_mode
-            != _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
-        ):
+        if not self.experimental_sem_convs_enabled:
             return []
 
         tool_definitions = []
@@ -744,10 +752,7 @@ def _maybe_log_completion_details(
         config: Optional[GenerateContentConfigOrDict] = None,
         tool_definitions: Optional[list[ToolDefinition]] = None,
     ):
-        if (
-            self.sem_conv_opt_in_mode
-            != _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
-        ):
+        if not self.experimental_sem_convs_enabled:
             return
         system_instructions = []
         if system_content := _config_to_system_instruction(config):
@@ -758,14 +763,21 @@ def _maybe_log_completion_details(
             contents=transformers.t_contents(request)
         )
         output_messages = to_output_messages(candidates=candidates)
-
         span = trace.get_current_span()
         event = LogRecord(
             event_name="gen_ai.client.inference.operation.details",
             attributes=extra_attributes
             | request_attributes
             | final_attributes,
         )
+        # New sem conv only gets added here when we've verified that experimental mode is set.
+        span.set_attribute(
+            gen_ai_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+            self._cached_tokens,
+        )
+        event.attributes[
+            gen_ai_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS
+        ] = self._cached_tokens
         tool_definitions = tool_definitions or []
         self.completion_hook.on_completion(
             inputs=input_messages,
@@ -1014,7 +1026,7 @@ def instrumented_generate_content(
         ) as span:
             extra_attributes = _get_extra_generate_content_attributes()
             span.set_attributes(extra_attributes | request_attributes)
-            if helper.sem_conv_opt_in_mode == _StabilityMode.DEFAULT:
+            if not helper.experimental_sem_convs_enabled:
                 helper.process_request(contents, config, span)
             try:
                 response = wrapped_func(
@@ -1024,10 +1036,7 @@ def instrumented_generate_content(
                     config=helper.wrapped_config(config),
                     **kwargs,
                 )
-                if (
-                    helper.sem_conv_opt_in_mode
-                    == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
-                ):
+                if helper.experimental_sem_convs_enabled:
                     helper._update_response(response)
                     if response.candidates:
                         candidates += response.candidates
@@ -1094,7 +1103,7 @@ def instrumented_generate_content_stream(
         ) as span:
             extra_attributes = _get_extra_generate_content_attributes()
             span.set_attributes(extra_attributes | request_attributes)
-            if helper.sem_conv_opt_in_mode == _StabilityMode.DEFAULT:
+            if not helper.experimental_sem_convs_enabled:
                 helper.process_request(contents, config, span)
             try:
                 for response in wrapped_func(
@@ -1104,10 +1113,7 @@ def instrumented_generate_content_stream(
                     config=helper.wrapped_config(config),
                     **kwargs,
                 ):
-                    if (
-                        helper.sem_conv_opt_in_mode
-                        == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
-                    ):
+                    if helper.experimental_sem_convs_enabled:
                         helper._update_response(response)
                         if response.candidates:
                             candidates += response.candidates
@@ -1174,7 +1180,7 @@ async def instrumented_generate_content(
         ) as span:
             extra_attributes = _get_extra_generate_content_attributes()
             span.set_attributes(extra_attributes | request_attributes)
-            if helper.sem_conv_opt_in_mode == _StabilityMode.DEFAULT:
+            if not helper.experimental_sem_convs_enabled:
                 helper.process_request(contents, config, span)
             try:
                 response = await wrapped_func(
@@ -1184,10 +1190,7 @@ async def instrumented_generate_content(
                     config=helper.wrapped_config(config),
                     **kwargs,
                 )
-                if (
-                    helper.sem_conv_opt_in_mode
-                    == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
-                ):
+                if helper.experimental_sem_convs_enabled:
                     helper._update_response(response)
                     if response.candidates:
                         candidates += response.candidates
@@ -1255,10 +1258,7 @@ async def instrumented_generate_content_stream(
         ) as span:
             extra_attributes = _get_extra_generate_content_attributes()
             span.set_attributes(extra_attributes | request_attributes)
-            if (
-                not helper.sem_conv_opt_in_mode
-                == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
-            ):
+            if not helper.experimental_sem_convs_enabled:
                 helper.process_request(contents, config, span)
             try:
                 response_async_generator = await wrapped_func(
@@ -1294,10 +1294,7 @@ async def _response_async_generator_wrapper():
                 with trace.use_span(span, end_on_exit=True):
                     try:
                         async for response in response_async_generator:
-                            if (
-                                helper.sem_conv_opt_in_mode
-                                == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
-                            ):
+                            if helper.experimental_sem_convs_enabled:
                                 helper._update_response(response)
                                 if response.candidates:
                                     candidates += response.candidates
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/tool_call_wrapper.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/tool_call_wrapper.py
@@ -83,18 +83,17 @@ def _to_otel_attribute(python_value):
 
 
 def _is_capture_content_enabled() -> bool:
-    mode = _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
-        _OpenTelemetryStabilitySignalType.GEN_AI
-    )
-    if mode == _StabilityMode.DEFAULT:
-        return bool(is_content_recording_enabled(mode))
-    if mode == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL:
-        capturing_mode = is_content_recording_enabled(mode)
-        return capturing_mode in [
+    if (
+        _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
+            _OpenTelemetryStabilitySignalType.GEN_AI
+        )
+        == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
+    ):
+        return is_content_recording_enabled(True) in [
             ContentCapturingMode.SPAN_ONLY,
             ContentCapturingMode.SPAN_AND_EVENT,
         ]
-    raise RuntimeError(f"{mode} mode not supported")
+    return bool(is_content_recording_enabled(False))
 
 
 def _create_function_span_name(wrapped_function):
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py
@@ -260,12 +260,18 @@ def test_generated_span_has_vertex_ai_system_when_configured(self):
         )
 
     def test_generated_span_counts_tokens(self):
-        self.configure_valid_response(input_tokens=123, output_tokens=456)
+        self.configure_valid_response(
+            input_tokens=123, output_tokens=456, cached_tokens=50
+        )
         self.generate_content(model="gemini-2.0-flash", contents="Some input")
         self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
         span = self.otel.get_span_named("generate_content gemini-2.0-flash")
         self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 123)
         self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 456)
+        # New sem conv should not appear when flag is not experimental mode..
+        self.assertNotIn(
+            "gen_ai.usage.cache_read.input_tokens", span.attributes
+        )
 
     @patch.dict(
         "os.environ",
@@ -445,7 +451,9 @@ def test_new_semconv_record_completion_as_log(self):
             ):
                 self.setUp()
                 with patched_environ, patched_otel_mapping:
-                    self.configure_valid_response(text=output)
+                    self.configure_valid_response(
+                        text=output, cached_tokens=50
+                    )
                     self.generate_content(
                         model="gemini-2.0-flash",
                         contents=content,
@@ -461,6 +469,12 @@ def test_new_semconv_record_completion_as_log(self):
                     event = self.otel.get_event_named(
                         "gen_ai.client.inference.operation.details"
                     )
+                    self.assertEqual(
+                        event.attributes[
+                            "gen_ai.usage.cache_read.input_tokens"
+                        ],
+                        50,
+                    )
                     assert (
                         event.attributes[
                             "gcp.gen_ai.operation.config.response_schema"
@@ -765,7 +779,9 @@ def test_new_semconv_record_completion_in_span(self):
             ):
                 self.setUp()
                 with patched_environ, patched_otel_mapping:
-                    self.configure_valid_response(text="Some response content")
+                    self.configure_valid_response(
+                        text="Some response content", cached_tokens=50
+                    )
                     self.generate_content(
                         model="gemini-2.0-flash",
                         contents="Some input",
@@ -778,6 +794,12 @@ def test_new_semconv_record_completion_in_span(self):
                     span = self.otel.get_span_named(
                         "generate_content gemini-2.0-flash"
                     )
+                    self.assertEqual(
+                        span.attributes[
+                            "gen_ai.usage.cache_read.input_tokens"
+                        ],
+                        50,
+                    )
                     if mode in [
                         ContentCapturingMode.SPAN_ONLY,
                         ContentCapturingMode.SPAN_AND_EVENT,
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py
@@ -26,6 +26,7 @@ def create_response(
     text: Optional[str] = None,
     input_tokens: Optional[int] = None,
     output_tokens: Optional[int] = None,
+    cached_tokens: Optional[int] = None,
     model_version: Optional[str] = None,
     usage_metadata: Optional[
         genai_types.GenerateContentResponseUsageMetadata
@@ -53,6 +54,8 @@ def create_response(
         usage_metadata.prompt_token_count = input_tokens
     if output_tokens is not None:
         usage_metadata.candidates_token_count = output_tokens
+    if cached_tokens is not None:
+        usage_metadata.cached_content_token_count = cached_tokens
     return genai_types.GenerateContentResponse(
         candidates=candidates,
         usage_metadata=usage_metadata,
diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/requirements.oldest.txt b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/requirements.oldest.txt
@@ -21,10 +21,10 @@ pytest-vcr==1.0.2
 
 google-auth==2.15.0
 google-genai==1.32.0
-opentelemetry-api==1.39.0
-opentelemetry-sdk==1.39.0
-opentelemetry-semantic-conventions==0.60b0
-opentelemetry-instrumentation==0.60b0
+opentelemetry-api==1.40.0
+opentelemetry-sdk==1.40.0
+opentelemetry-semantic-conventions==0.61b0
+opentelemetry-instrumentation==0.61b0
 -e util/opentelemetry-util-genai[upload]
 
 fsspec==2025.9.0

Original file line number	Diff line number	Diff line change
`@@ -39,9 +39,9 @@ classifiers = [`
`39`	`39`	`"Programming Language :: Python :: 3.14",`
`40`	`40`	`]`
`41`	`41`	`dependencies = [`
`42`		`- "opentelemetry-api ~=1.39",`
`43`		`- "opentelemetry-instrumentation >=0.60b0, <2",`
`44`		`- "opentelemetry-semantic-conventions >=0.60b0, <2",`
	`42`	`+ "opentelemetry-api ~=1.40",`
	`43`	`+ "opentelemetry-instrumentation >=0.61b0, <2",`
	`44`	`+ "opentelemetry-semantic-conventions >=0.61b0, <2",`
`45`	`45`	`"opentelemetry-util-genai >= 0.4b0.dev, <0.5b0",`
`46`	`46`	`]`
`47`	`47`