diff --git a/instrumentation/opentelemetry-instrumentation-google-genai/.changelog/24.added b/instrumentation/opentelemetry-instrumentation-google-genai/.changelog/24.added new file mode 100644 index 00000000..85eacdc8 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-google-genai/.changelog/24.added @@ -0,0 +1 @@ +`opentelemetry-instrumentation-google-genai`: Add `GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY` for attaching caller-supplied attributes that are emitted only on the `gen_ai.client.inference.operation.details` log event and never on the `generate_content {model}` span. On key collisions with `GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY`, the event-only value wins on the event. diff --git a/instrumentation/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/__init__.py b/instrumentation/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/__init__.py index 4c2fae2c..2ebae101 100644 --- a/instrumentation/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/__init__.py @@ -30,12 +30,16 @@ --- """ -from .generate_content import GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY +from .generate_content import ( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, + GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY, +) from .instrumentor import GoogleGenAiSdkInstrumentor from .version import __version__ __all__ = [ "GoogleGenAiSdkInstrumentor", "GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY", + "GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY", "__version__", ] diff --git a/instrumentation/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py index 5d0541b6..49181e82 100644 --- a/instrumentation/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py +++ b/instrumentation/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py @@ -101,6 +101,20 @@ "generate_content_extra_attributes_context_key" ) +# Attributes attached under this context key are emitted only on the +# `gen_ai.client.inference.operation.details` log event; they are NEVER +# attached to the `generate_content {model}` span. Use this for caller-supplied +# attributes that must not land on broadly-sampled spans -- for example, an +# end-user identifier that is acceptable in telemetry log events but +# undesirable on spans. On key collisions with values supplied via +# ``GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY``, the event-only value wins +# on the event. +GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY = ( + context_api.create_key( + "generate_content_event_only_extra_attributes_context_key" + ) +) + class _MethodsSnapshot: def __init__(self): @@ -495,6 +509,15 @@ def _get_extra_generate_content_attributes() -> dict[str, AttributeValue]: return dict(attrs or {}) +def _get_event_only_extra_generate_content_attributes() -> dict[ + str, AttributeValue +]: + attrs = context_api.get_value( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY + ) + return dict(attrs or {}) + + class _GenerateContentInstrumentationHelper: def __init__( self, @@ -750,6 +773,9 @@ def _maybe_log_completion_details( candidates: list[Candidate], config: Optional[GenerateContentConfigOrDict] = None, tool_definitions: Optional[list[ToolDefinition]] = None, + event_only_extra_attributes: Optional[ + dict[str, AttributeValue] + ] = None, ): if not self.experimental_sem_convs_enabled: return @@ -763,9 +789,16 @@ def _maybe_log_completion_details( ) output_messages = to_output_messages(candidates=candidates) span = trace.get_current_span() + # event_only_extra_attributes win on the event when colliding with + # extra_attributes (caller-supplied), but instrumentation-owned + # request_attributes/final_attributes (semconv fields) always take + # precedence so callers cannot accidentally clobber them. They are + # also intentionally NOT set on the span (the caller did not include + # them in the span.set_attributes() call). event = LogRecord( event_name="gen_ai.client.inference.operation.details", attributes=extra_attributes + | (event_only_extra_attributes or {}) | request_attributes | final_attributes, ) @@ -1032,6 +1065,10 @@ def instrumented_generate_content( model, "google.genai.Models.generate_content" ) as span: extra_attributes = _get_extra_generate_content_attributes() + event_only_extra_attributes = ( + _get_event_only_extra_generate_content_attributes() + ) + # event_only_extra_attributes are intentionally excluded from the span. span.set_attributes(extra_attributes | request_attributes) if not helper.experimental_sem_convs_enabled: helper.process_request(contents, config, span) @@ -1068,6 +1105,7 @@ def instrumented_generate_content( candidates, config, maybe_tool_definitions, + event_only_extra_attributes=event_only_extra_attributes, ) helper._record_token_usage_metric() helper._record_duration_metric() @@ -1109,6 +1147,10 @@ def instrumented_generate_content_stream( model, "google.genai.Models.generate_content_stream" ) as span: extra_attributes = _get_extra_generate_content_attributes() + event_only_extra_attributes = ( + _get_event_only_extra_generate_content_attributes() + ) + # event_only_extra_attributes are intentionally excluded from the span. span.set_attributes(extra_attributes | request_attributes) if not helper.experimental_sem_convs_enabled: helper.process_request(contents, config, span) @@ -1145,6 +1187,7 @@ def instrumented_generate_content_stream( candidates, config, maybe_tool_definitions, + event_only_extra_attributes=event_only_extra_attributes, ) helper._record_token_usage_metric() helper._record_duration_metric() @@ -1186,6 +1229,10 @@ async def instrumented_generate_content( model, "google.genai.AsyncModels.generate_content" ) as span: extra_attributes = _get_extra_generate_content_attributes() + event_only_extra_attributes = ( + _get_event_only_extra_generate_content_attributes() + ) + # event_only_extra_attributes are intentionally excluded from the span. span.set_attributes(extra_attributes | request_attributes) if not helper.experimental_sem_convs_enabled: helper.process_request(contents, config, span) @@ -1221,6 +1268,7 @@ async def instrumented_generate_content( candidates, config, maybe_tool_definitions, + event_only_extra_attributes=event_only_extra_attributes, ) helper._record_token_usage_metric() helper._record_duration_metric() @@ -1264,6 +1312,10 @@ async def instrumented_generate_content_stream( end_on_exit=False, ) as span: extra_attributes = _get_extra_generate_content_attributes() + event_only_extra_attributes = ( + _get_event_only_extra_generate_content_attributes() + ) + # event_only_extra_attributes are intentionally excluded from the span. span.set_attributes(extra_attributes | request_attributes) if not helper.experimental_sem_convs_enabled: helper.process_request(contents, config, span) @@ -1291,6 +1343,7 @@ async def instrumented_generate_content_stream( [], config, maybe_tool_definitions, + event_only_extra_attributes=event_only_extra_attributes, ) helper._record_duration_metric() with trace.use_span(span, end_on_exit=True): @@ -1328,6 +1381,7 @@ async def _response_async_generator_wrapper(): candidates, config, maybe_tool_definitions, + event_only_extra_attributes=event_only_extra_attributes, ) helper._record_token_usage_metric() helper._record_duration_metric() diff --git a/instrumentation/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py b/instrumentation/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py index 2d160af0..524e37f4 100644 --- a/instrumentation/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py +++ b/instrumentation/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py @@ -23,6 +23,7 @@ _StabilityMode, ) from opentelemetry.instrumentation.google_genai import ( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY, ) from opentelemetry.semconv._incubating.attributes import gen_ai_attributes @@ -925,6 +926,146 @@ def test_new_semconv_log_has_extra_genai_attributes(self): finally: context_api.detach(tok) + def test_event_only_extra_attributes_not_set_on_span(self): + """event_only_extra_attributes must never appear on the span attributes.""" + self.configure_valid_response(text="Yep, it works!") + tok = context_api.attach( + context_api.set_value( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, + {"user.id": "user-42"}, + ) + ) + try: + self.generate_content( + model="gemini-2.0-flash", contents="Does this work?" + ) + span = self.otel.get_span_named( + "generate_content gemini-2.0-flash" + ) + self.assertNotIn("user.id", span.attributes) + finally: + context_api.detach(tok) + + def test_event_only_extra_attributes_set_on_event_only(self): + """event_only_extra_attributes land on the operation-details event but not on the span. + + Also verifies the collision-precedence rule: when a key appears in both + ``GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY`` and + ``GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY``, the + event-only value wins on the event, while the span carries the + ``extra_attributes`` value (event-only is never on the span). + """ + patched_environ = patch.dict( + "os.environ", + { + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": "EVENT_ONLY", + "OTEL_SEMCONV_STABILITY_OPT_IN": "gen_ai_latest_experimental", + }, + ) + patched_otel_mapping = patch.dict( + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING, + { + _OpenTelemetryStabilitySignalType.GEN_AI: _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL + }, + ) + with patched_environ, patched_otel_mapping: + self.configure_valid_response(text="Yep, it works!") + tok_extra = context_api.attach( + context_api.set_value( + GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY, + {"shared.key": "from_extra"}, + ) + ) + tok_event_only = context_api.attach( + context_api.set_value( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, + { + "user.id": "user-42", + "shared.key": "from_event_only", + }, + ) + ) + try: + self.generate_content( + model="gemini-2.0-flash", + contents="Does this work?", + ) + + span = self.otel.get_span_named( + "generate_content gemini-2.0-flash" + ) + self.assertNotIn("user.id", span.attributes) + # On the span, only `extra_attributes` contributes the shared key. + self.assertEqual(span.attributes["shared.key"], "from_extra") + + self.otel.assert_has_event_named( + "gen_ai.client.inference.operation.details" + ) + event = self.otel.get_event_named( + "gen_ai.client.inference.operation.details" + ) + self.assertEqual(event.attributes["user.id"], "user-42") + # On the event, event_only wins on the collision. + self.assertEqual( + event.attributes["shared.key"], "from_event_only" + ) + finally: + context_api.detach(tok_event_only) + context_api.detach(tok_extra) + + def test_event_only_extra_attributes_do_not_override_semconv_attributes( + self, + ): + """event_only_extra_attributes must never override instrumentation-owned semconv attributes. + + Callers should not be able to clobber attributes set by the + instrumentation itself (request_attributes / final_attributes) via the + event-only context value, even on the event payload. + """ + patched_environ = patch.dict( + "os.environ", + { + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": "EVENT_ONLY", + "OTEL_SEMCONV_STABILITY_OPT_IN": "gen_ai_latest_experimental", + }, + ) + patched_otel_mapping = patch.dict( + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING, + { + _OpenTelemetryStabilitySignalType.GEN_AI: _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL + }, + ) + with patched_environ, patched_otel_mapping: + self.configure_valid_response(text="Yep, it works!") + tok_event_only = context_api.attach( + context_api.set_value( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, + { + # Collide with a final_attributes (semconv) key. + gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS: -1, + }, + ) + ) + try: + self.generate_content( + model="gemini-2.0-flash", + contents="Does this work?", + ) + + event = self.otel.get_event_named( + "gen_ai.client.inference.operation.details" + ) + # The instrumentation-owned semconv value must win, not the + # caller-supplied event-only value. + self.assertNotEqual( + event.attributes[ + gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS + ], + -1, + ) + finally: + context_api.detach(tok_event_only) + def test_records_metrics_data(self): self.configure_valid_response() self.generate_content(model="gemini-2.0-flash", contents="Some input") diff --git a/instrumentation/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py b/instrumentation/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py index 466f1f74..9fe5898c 100644 --- a/instrumentation/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py +++ b/instrumentation/opentelemetry-instrumentation-google-genai/tests/generate_content/streaming_base.py @@ -11,8 +11,10 @@ _StabilityMode, ) from opentelemetry.instrumentation.google_genai import ( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY, ) +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes from .base import TestCase @@ -130,3 +132,139 @@ def test_new_semconv_log_has_extra_genai_attributes(self): ) finally: context_api.detach(tok) + + def test_event_only_extra_attributes_not_set_on_span(self): + """event_only_extra_attributes must never appear on the span attributes.""" + self.configure_valid_response(text="Yep, it works!") + tok = context_api.attach( + context_api.set_value( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, + {"user.id": "user-42"}, + ) + ) + try: + self.generate_content( + model="gemini-2.0-flash", contents="Does this work?" + ) + span = self.otel.get_span_named( + "generate_content gemini-2.0-flash" + ) + self.assertNotIn("user.id", span.attributes) + finally: + context_api.detach(tok) + + def test_event_only_extra_attributes_set_on_event_only(self): + """event_only_extra_attributes land on the operation-details event but not on the span. + + Also verifies that on key collision with + ``GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY``, the event-only value + wins on the event while the span carries the ``extra_attributes`` value. + """ + patched_environ = patch.dict( + "os.environ", + { + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": "EVENT_ONLY", + "OTEL_SEMCONV_STABILITY_OPT_IN": "gen_ai_latest_experimental", + }, + ) + patched_otel_mapping = patch.dict( + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING, + { + _OpenTelemetryStabilitySignalType.GEN_AI: _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL + }, + ) + with patched_environ, patched_otel_mapping: + self.configure_valid_response(text="Yep, it works!") + tok_extra = context_api.attach( + context_api.set_value( + GENERATE_CONTENT_EXTRA_ATTRIBUTES_CONTEXT_KEY, + {"shared.key": "from_extra"}, + ) + ) + tok_event_only = context_api.attach( + context_api.set_value( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, + { + "user.id": "user-42", + "shared.key": "from_event_only", + }, + ) + ) + try: + self.generate_content( + model="gemini-2.0-flash", + contents="Does this work?", + ) + + span = self.otel.get_span_named( + "generate_content gemini-2.0-flash" + ) + self.assertNotIn("user.id", span.attributes) + self.assertEqual(span.attributes["shared.key"], "from_extra") + + self.otel.assert_has_event_named( + "gen_ai.client.inference.operation.details" + ) + event = self.otel.get_event_named( + "gen_ai.client.inference.operation.details" + ) + self.assertEqual(event.attributes["user.id"], "user-42") + self.assertEqual( + event.attributes["shared.key"], "from_event_only" + ) + finally: + context_api.detach(tok_event_only) + context_api.detach(tok_extra) + + def test_event_only_extra_attributes_do_not_override_semconv_attributes( + self, + ): + """event_only_extra_attributes must never override instrumentation-owned semconv attributes. + + Callers should not be able to clobber attributes set by the + instrumentation itself (request_attributes / final_attributes) via the + event-only context value, even on the event payload. + """ + patched_environ = patch.dict( + "os.environ", + { + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": "EVENT_ONLY", + "OTEL_SEMCONV_STABILITY_OPT_IN": "gen_ai_latest_experimental", + }, + ) + patched_otel_mapping = patch.dict( + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING, + { + _OpenTelemetryStabilitySignalType.GEN_AI: _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL + }, + ) + with patched_environ, patched_otel_mapping: + self.configure_valid_response(text="Yep, it works!") + tok_event_only = context_api.attach( + context_api.set_value( + GENERATE_CONTENT_EVENT_ONLY_EXTRA_ATTRIBUTES_CONTEXT_KEY, + { + # Collide with a final_attributes (semconv) key. + gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS: -1, + }, + ) + ) + try: + self.generate_content( + model="gemini-2.0-flash", + contents="Does this work?", + ) + + event = self.otel.get_event_named( + "gen_ai.client.inference.operation.details" + ) + # The instrumentation-owned semconv value must win, not the + # caller-supplied event-only value. + self.assertNotEqual( + event.attributes[ + gen_ai_attributes.GEN_AI_USAGE_INPUT_TOKENS + ], + -1, + ) + finally: + context_api.detach(tok_event_only)