Skip to content

Commit cf0662d

Browse files
authored
Add new cached token gen ai attribute to Google gen AI instrumentation (#4313)
* Add new cached token gen ai attribute * Add changelog * Address comment
1 parent c2967c4 commit cf0662d

8 files changed

Lines changed: 88 additions & 66 deletions

File tree

instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## Unreleased
99

10+
-Add `gen_ai.usage.cache_read.input_tokens` attribute to capture cached tokens on spans/events when the experimental sem conv flag is set. ([#4313](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4313))
11+
1012
## Version 0.7b0 (2026-02-20)
1113
- Fix bug in how tokens are counted when using the streaming `generateContent` method. ([#4152](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4152)).
1214
- Add `gen_ai.tool.definitions` attribute to `gen_ai.client.inference.operation.details` log event ([#4142](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4142)).

instrumentation-genai/opentelemetry-instrumentation-google-genai/pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ classifiers = [
3939
"Programming Language :: Python :: 3.14",
4040
]
4141
dependencies = [
42-
"opentelemetry-api ~=1.39",
43-
"opentelemetry-instrumentation >=0.60b0, <2",
44-
"opentelemetry-semantic-conventions >=0.60b0, <2",
42+
"opentelemetry-api ~=1.40",
43+
"opentelemetry-instrumentation >=0.61b0, <2",
44+
"opentelemetry-semantic-conventions >=0.61b0, <2",
4545
"opentelemetry-util-genai >= 0.4b0.dev, <0.5b0",
4646
]
4747

instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/flags.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from os import environ
1616
from typing import Union
1717

18-
from opentelemetry.instrumentation._semconv import _StabilityMode
1918
from opentelemetry.util.genai.environment_variables import (
2019
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
2120
)
@@ -24,13 +23,13 @@
2423

2524

2625
def is_content_recording_enabled(
27-
mode: _StabilityMode,
26+
experimental_sem_convs_enabled: bool,
2827
) -> Union[bool, ContentCapturingMode]:
29-
if mode == _StabilityMode.DEFAULT:
30-
capture_content = environ.get(
31-
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "false"
32-
)
33-
return capture_content.lower() == "true"
34-
if mode == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL:
28+
if experimental_sem_convs_enabled:
3529
return get_content_capturing_mode()
36-
raise RuntimeError(f"{mode} mode not supported")
30+
return (
31+
environ.get(
32+
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "false"
33+
).lower()
34+
== "true"
35+
)

instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -524,12 +524,21 @@ def __init__(
524524
self._finish_reasons_set = set()
525525
self._error_type = None
526526
self._input_tokens = 0
527+
self._cached_tokens = 0
527528
self._output_tokens = 0
528-
self.sem_conv_opt_in_mode = _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
529+
sem_conv_opt_in_mode = _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
529530
_OpenTelemetryStabilitySignalType.GEN_AI
530531
)
532+
if sem_conv_opt_in_mode not in {
533+
_StabilityMode.DEFAULT,
534+
_StabilityMode.GEN_AI_LATEST_EXPERIMENTAL,
535+
}:
536+
raise ValueError(f"{sem_conv_opt_in_mode} mode not supported")
537+
self.experimental_sem_convs_enabled = (
538+
sem_conv_opt_in_mode == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
539+
)
531540
self._content_recording_enabled = is_content_recording_enabled(
532-
self.sem_conv_opt_in_mode
541+
self.experimental_sem_convs_enabled
533542
)
534543
self._response_index = 0
535544
self._candidate_index = 0
@@ -621,6 +630,11 @@ def _maybe_update_token_counts(self, response: GenerateContentResponse):
621630
output_tokens = _get_response_property(
622631
response, "usage_metadata.candidates_token_count"
623632
)
633+
cached_tokens = _get_response_property(
634+
response, "usage_metadata.cached_content_token_count"
635+
)
636+
if cached_tokens and isinstance(cached_tokens, int):
637+
self._cached_tokens = cached_tokens
624638
if input_tokens and isinstance(input_tokens, int):
625639
self._input_tokens = input_tokens
626640
if output_tokens and isinstance(output_tokens, int):
@@ -654,10 +668,7 @@ def _maybe_update_error_type(self, response: GenerateContentResponse):
654668
self._error_type = f"BLOCKED_{block_reason}"
655669

656670
def _maybe_get_tool_definitions(self, config) -> list[ToolDefinition]:
657-
if (
658-
self.sem_conv_opt_in_mode
659-
!= _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
660-
):
671+
if not self.experimental_sem_convs_enabled:
661672
return []
662673

663674
if tools := _config_to_tools(config):
@@ -669,10 +680,7 @@ def _maybe_get_tool_definitions(self, config) -> list[ToolDefinition]:
669680
async def _maybe_get_tool_definitions_async(
670681
self, config
671682
) -> list[ToolDefinition]:
672-
if (
673-
self.sem_conv_opt_in_mode
674-
!= _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
675-
):
683+
if not self.experimental_sem_convs_enabled:
676684
return []
677685

678686
tool_definitions = []
@@ -744,10 +752,7 @@ def _maybe_log_completion_details(
744752
config: Optional[GenerateContentConfigOrDict] = None,
745753
tool_definitions: Optional[list[ToolDefinition]] = None,
746754
):
747-
if (
748-
self.sem_conv_opt_in_mode
749-
!= _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
750-
):
755+
if not self.experimental_sem_convs_enabled:
751756
return
752757
system_instructions = []
753758
if system_content := _config_to_system_instruction(config):
@@ -758,14 +763,21 @@ def _maybe_log_completion_details(
758763
contents=transformers.t_contents(request)
759764
)
760765
output_messages = to_output_messages(candidates=candidates)
761-
762766
span = trace.get_current_span()
763767
event = LogRecord(
764768
event_name="gen_ai.client.inference.operation.details",
765769
attributes=extra_attributes
766770
| request_attributes
767771
| final_attributes,
768772
)
773+
# New sem conv only gets added here when we've verified that experimental mode is set.
774+
span.set_attribute(
775+
gen_ai_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
776+
self._cached_tokens,
777+
)
778+
event.attributes[
779+
gen_ai_attributes.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS
780+
] = self._cached_tokens
769781
tool_definitions = tool_definitions or []
770782
self.completion_hook.on_completion(
771783
inputs=input_messages,
@@ -1014,7 +1026,7 @@ def instrumented_generate_content(
10141026
) as span:
10151027
extra_attributes = _get_extra_generate_content_attributes()
10161028
span.set_attributes(extra_attributes | request_attributes)
1017-
if helper.sem_conv_opt_in_mode == _StabilityMode.DEFAULT:
1029+
if not helper.experimental_sem_convs_enabled:
10181030
helper.process_request(contents, config, span)
10191031
try:
10201032
response = wrapped_func(
@@ -1024,10 +1036,7 @@ def instrumented_generate_content(
10241036
config=helper.wrapped_config(config),
10251037
**kwargs,
10261038
)
1027-
if (
1028-
helper.sem_conv_opt_in_mode
1029-
== _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
1030-
):
1039+
if helper.experimental_sem_convs_enabled:
10311040
helper._update_response(response)
10321041
if response.candidates:
10331042
candidates += response.candidates
@@ -1094,7 +1103,7 @@ def instrumented_generate_content_stream(
10941103
) as span:
10951104
extra_attributes = _get_extra_generate_content_attributes()
10961105
span.set_attributes(extra_attributes | request_attributes)
1097-
if helper.sem_conv_opt_in_mode == _StabilityMode.DEFAULT:
1106+
if not helper.experimental_sem_convs_enabled:
10981107
helper.process_request(contents, config, span)
10991108
try:
11001109
for response in wrapped_func(
@@ -1104,10 +1113,7 @@ def instrumented_generate_content_stream(
11041113
config=helper.wrapped_config(config),
11051114
**kwargs,
11061115
):
1107-
if (
1108-
helper.sem_conv_opt_in_mode
1109-
== _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
1110-
):
1116+
if helper.experimental_sem_convs_enabled:
11111117
helper._update_response(response)
11121118
if response.candidates:
11131119
candidates += response.candidates
@@ -1174,7 +1180,7 @@ async def instrumented_generate_content(
11741180
) as span:
11751181
extra_attributes = _get_extra_generate_content_attributes()
11761182
span.set_attributes(extra_attributes | request_attributes)
1177-
if helper.sem_conv_opt_in_mode == _StabilityMode.DEFAULT:
1183+
if not helper.experimental_sem_convs_enabled:
11781184
helper.process_request(contents, config, span)
11791185
try:
11801186
response = await wrapped_func(
@@ -1184,10 +1190,7 @@ async def instrumented_generate_content(
11841190
config=helper.wrapped_config(config),
11851191
**kwargs,
11861192
)
1187-
if (
1188-
helper.sem_conv_opt_in_mode
1189-
== _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
1190-
):
1193+
if helper.experimental_sem_convs_enabled:
11911194
helper._update_response(response)
11921195
if response.candidates:
11931196
candidates += response.candidates
@@ -1255,10 +1258,7 @@ async def instrumented_generate_content_stream(
12551258
) as span:
12561259
extra_attributes = _get_extra_generate_content_attributes()
12571260
span.set_attributes(extra_attributes | request_attributes)
1258-
if (
1259-
not helper.sem_conv_opt_in_mode
1260-
== _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
1261-
):
1261+
if not helper.experimental_sem_convs_enabled:
12621262
helper.process_request(contents, config, span)
12631263
try:
12641264
response_async_generator = await wrapped_func(
@@ -1294,10 +1294,7 @@ async def _response_async_generator_wrapper():
12941294
with trace.use_span(span, end_on_exit=True):
12951295
try:
12961296
async for response in response_async_generator:
1297-
if (
1298-
helper.sem_conv_opt_in_mode
1299-
== _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
1300-
):
1297+
if helper.experimental_sem_convs_enabled:
13011298
helper._update_response(response)
13021299
if response.candidates:
13031300
candidates += response.candidates

instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/tool_call_wrapper.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,18 +83,17 @@ def _to_otel_attribute(python_value):
8383

8484

8585
def _is_capture_content_enabled() -> bool:
86-
mode = _OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
87-
_OpenTelemetryStabilitySignalType.GEN_AI
88-
)
89-
if mode == _StabilityMode.DEFAULT:
90-
return bool(is_content_recording_enabled(mode))
91-
if mode == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL:
92-
capturing_mode = is_content_recording_enabled(mode)
93-
return capturing_mode in [
86+
if (
87+
_OpenTelemetrySemanticConventionStability._get_opentelemetry_stability_opt_in_mode(
88+
_OpenTelemetryStabilitySignalType.GEN_AI
89+
)
90+
== _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL
91+
):
92+
return is_content_recording_enabled(True) in [
9493
ContentCapturingMode.SPAN_ONLY,
9594
ContentCapturingMode.SPAN_AND_EVENT,
9695
]
97-
raise RuntimeError(f"{mode} mode not supported")
96+
return bool(is_content_recording_enabled(False))
9897

9998

10099
def _create_function_span_name(wrapped_function):

instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,12 +260,18 @@ def test_generated_span_has_vertex_ai_system_when_configured(self):
260260
)
261261

262262
def test_generated_span_counts_tokens(self):
263-
self.configure_valid_response(input_tokens=123, output_tokens=456)
263+
self.configure_valid_response(
264+
input_tokens=123, output_tokens=456, cached_tokens=50
265+
)
264266
self.generate_content(model="gemini-2.0-flash", contents="Some input")
265267
self.otel.assert_has_span_named("generate_content gemini-2.0-flash")
266268
span = self.otel.get_span_named("generate_content gemini-2.0-flash")
267269
self.assertEqual(span.attributes["gen_ai.usage.input_tokens"], 123)
268270
self.assertEqual(span.attributes["gen_ai.usage.output_tokens"], 456)
271+
# New sem conv should not appear when flag is not experimental mode..
272+
self.assertNotIn(
273+
"gen_ai.usage.cache_read.input_tokens", span.attributes
274+
)
269275

270276
@patch.dict(
271277
"os.environ",
@@ -445,7 +451,9 @@ def test_new_semconv_record_completion_as_log(self):
445451
):
446452
self.setUp()
447453
with patched_environ, patched_otel_mapping:
448-
self.configure_valid_response(text=output)
454+
self.configure_valid_response(
455+
text=output, cached_tokens=50
456+
)
449457
self.generate_content(
450458
model="gemini-2.0-flash",
451459
contents=content,
@@ -461,6 +469,12 @@ def test_new_semconv_record_completion_as_log(self):
461469
event = self.otel.get_event_named(
462470
"gen_ai.client.inference.operation.details"
463471
)
472+
self.assertEqual(
473+
event.attributes[
474+
"gen_ai.usage.cache_read.input_tokens"
475+
],
476+
50,
477+
)
464478
assert (
465479
event.attributes[
466480
"gcp.gen_ai.operation.config.response_schema"
@@ -765,7 +779,9 @@ def test_new_semconv_record_completion_in_span(self):
765779
):
766780
self.setUp()
767781
with patched_environ, patched_otel_mapping:
768-
self.configure_valid_response(text="Some response content")
782+
self.configure_valid_response(
783+
text="Some response content", cached_tokens=50
784+
)
769785
self.generate_content(
770786
model="gemini-2.0-flash",
771787
contents="Some input",
@@ -778,6 +794,12 @@ def test_new_semconv_record_completion_in_span(self):
778794
span = self.otel.get_span_named(
779795
"generate_content gemini-2.0-flash"
780796
)
797+
self.assertEqual(
798+
span.attributes[
799+
"gen_ai.usage.cache_read.input_tokens"
800+
],
801+
50,
802+
)
781803
if mode in [
782804
ContentCapturingMode.SPAN_ONLY,
783805
ContentCapturingMode.SPAN_AND_EVENT,

instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/util.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def create_response(
2626
text: Optional[str] = None,
2727
input_tokens: Optional[int] = None,
2828
output_tokens: Optional[int] = None,
29+
cached_tokens: Optional[int] = None,
2930
model_version: Optional[str] = None,
3031
usage_metadata: Optional[
3132
genai_types.GenerateContentResponseUsageMetadata
@@ -53,6 +54,8 @@ def create_response(
5354
usage_metadata.prompt_token_count = input_tokens
5455
if output_tokens is not None:
5556
usage_metadata.candidates_token_count = output_tokens
57+
if cached_tokens is not None:
58+
usage_metadata.cached_content_token_count = cached_tokens
5659
return genai_types.GenerateContentResponse(
5760
candidates=candidates,
5861
usage_metadata=usage_metadata,

instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/requirements.oldest.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ pytest-vcr==1.0.2
2121

2222
google-auth==2.15.0
2323
google-genai==1.32.0
24-
opentelemetry-api==1.39.0
25-
opentelemetry-sdk==1.39.0
26-
opentelemetry-semantic-conventions==0.60b0
27-
opentelemetry-instrumentation==0.60b0
24+
opentelemetry-api==1.40.0
25+
opentelemetry-sdk==1.40.0
26+
opentelemetry-semantic-conventions==0.61b0
27+
opentelemetry-instrumentation==0.61b0
2828
-e util/opentelemetry-util-genai[upload]
2929

3030
fsspec==2025.9.0

0 commit comments

Comments
 (0)