|
14 | 14 |
|
15 | 15 | """Tests for MultiModalConversation instrumentation.""" |
16 | 16 |
|
| 17 | +import json |
| 18 | +from types import SimpleNamespace |
17 | 19 | from typing import Optional |
18 | 20 |
|
19 | 21 | import pytest |
20 | 22 | from dashscope import MultiModalConversation |
21 | 23 |
|
| 24 | +from opentelemetry.instrumentation._semconv import ( |
| 25 | + OTEL_SEMCONV_STABILITY_OPT_IN, |
| 26 | + _OpenTelemetrySemanticConventionStability, |
| 27 | +) |
| 28 | +from opentelemetry.instrumentation.dashscope.utils.multimodal import ( |
| 29 | + _extract_multimodal_output_messages, |
| 30 | + _update_invocation_from_multimodal_response, |
| 31 | +) |
22 | 32 | from opentelemetry.semconv._incubating.attributes import ( |
23 | 33 | gen_ai_attributes as GenAIAttributes, |
24 | 34 | ) |
| 35 | +from opentelemetry.util.genai.environment_variables import ( |
| 36 | + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, |
| 37 | +) |
| 38 | +from opentelemetry.util.genai.handler import TelemetryHandler |
| 39 | +from opentelemetry.util.genai.types import LLMInvocation, Text, Uri |
| 40 | + |
| 41 | + |
| 42 | +def _make_multimodal_response(content, finish_reason="stop"): |
| 43 | + return SimpleNamespace( |
| 44 | + output=SimpleNamespace( |
| 45 | + choices=[ |
| 46 | + SimpleNamespace( |
| 47 | + message=SimpleNamespace(content=content), |
| 48 | + finish_reason=finish_reason, |
| 49 | + ) |
| 50 | + ] |
| 51 | + ) |
| 52 | + ) |
| 53 | + |
| 54 | + |
| 55 | +@pytest.fixture(scope="function") |
| 56 | +def content_capture_env(monkeypatch): |
| 57 | + _OpenTelemetrySemanticConventionStability._initialized = False |
| 58 | + monkeypatch.setenv( |
| 59 | + OTEL_SEMCONV_STABILITY_OPT_IN, "gen_ai_latest_experimental" |
| 60 | + ) |
| 61 | + monkeypatch.setenv( |
| 62 | + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "SPAN_ONLY" |
| 63 | + ) |
| 64 | + _OpenTelemetrySemanticConventionStability._initialize() |
| 65 | + yield |
| 66 | + _OpenTelemetrySemanticConventionStability._initialized = False |
25 | 67 |
|
26 | 68 |
|
27 | 69 | def _safe_getattr(obj, attr, default=None): |
@@ -120,6 +162,96 @@ def _assert_multimodal_span_attributes( |
120 | 162 | ) |
121 | 163 |
|
122 | 164 |
|
| 165 | +@pytest.mark.parametrize( |
| 166 | + ("content_key", "url", "modality"), |
| 167 | + [ |
| 168 | + ("image", "https://example.com/a.png", "image"), |
| 169 | + ("audio", "https://example.com/a.wav", "audio"), |
| 170 | + ("video", "https://example.com/a.mp4", "video"), |
| 171 | + ], |
| 172 | +) |
| 173 | +def test_extract_multimodal_output_messages_with_uri_content( |
| 174 | + content_key, url, modality |
| 175 | +): |
| 176 | + """Test output message extraction for media URI content.""" |
| 177 | + messages = _extract_multimodal_output_messages( |
| 178 | + _make_multimodal_response([{content_key: url}]) |
| 179 | + ) |
| 180 | + |
| 181 | + assert len(messages) == 1 |
| 182 | + assert messages[0].role == "assistant" |
| 183 | + assert messages[0].finish_reason == "stop" |
| 184 | + assert len(messages[0].parts) == 1 |
| 185 | + |
| 186 | + part = messages[0].parts[0] |
| 187 | + assert isinstance(part, Uri) |
| 188 | + assert part.uri == url |
| 189 | + assert part.modality == modality |
| 190 | + assert part.mime_type is None |
| 191 | + assert part.type == "uri" |
| 192 | + |
| 193 | + |
| 194 | +def test_extract_multimodal_output_messages_with_text_and_image_content(): |
| 195 | + """Test output message extraction preserves mixed text and image parts.""" |
| 196 | + image_url = "https://example.com/generated.png" |
| 197 | + messages = _extract_multimodal_output_messages( |
| 198 | + _make_multimodal_response([{"text": "ok"}, {"image": image_url}]) |
| 199 | + ) |
| 200 | + |
| 201 | + assert len(messages) == 1 |
| 202 | + assert messages[0].role == "assistant" |
| 203 | + assert messages[0].finish_reason == "stop" |
| 204 | + assert len(messages[0].parts) == 2 |
| 205 | + |
| 206 | + text_part = messages[0].parts[0] |
| 207 | + assert isinstance(text_part, Text) |
| 208 | + assert text_part.content == "ok" |
| 209 | + assert text_part.type == "text" |
| 210 | + |
| 211 | + image_part = messages[0].parts[1] |
| 212 | + assert isinstance(image_part, Uri) |
| 213 | + assert image_part.uri == image_url |
| 214 | + assert image_part.modality == "image" |
| 215 | + assert image_part.mime_type is None |
| 216 | + assert image_part.type == "uri" |
| 217 | + |
| 218 | + |
| 219 | +def test_multimodal_image_output_messages_written_to_span( |
| 220 | + content_capture_env, tracer_provider, span_exporter |
| 221 | +): |
| 222 | + """Test image output URI is written to gen_ai.output.messages.""" |
| 223 | + image_url = "https://example.com/generated.png" |
| 224 | + response = _make_multimodal_response([{"image": image_url}]) |
| 225 | + invocation = LLMInvocation(request_model="wan2.7-image") |
| 226 | + invocation.provider = "dashscope" |
| 227 | + |
| 228 | + _update_invocation_from_multimodal_response(invocation, response) |
| 229 | + |
| 230 | + handler = TelemetryHandler(tracer_provider=tracer_provider) |
| 231 | + handler.start_llm(invocation) |
| 232 | + handler.stop_llm(invocation) |
| 233 | + |
| 234 | + spans = span_exporter.get_finished_spans() |
| 235 | + assert len(spans) == 1 |
| 236 | + output_messages = json.loads( |
| 237 | + spans[0].attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES] |
| 238 | + ) |
| 239 | + assert output_messages == [ |
| 240 | + { |
| 241 | + "role": "assistant", |
| 242 | + "parts": [ |
| 243 | + { |
| 244 | + "mime_type": None, |
| 245 | + "modality": "image", |
| 246 | + "uri": image_url, |
| 247 | + "type": "uri", |
| 248 | + } |
| 249 | + ], |
| 250 | + "finish_reason": "stop", |
| 251 | + } |
| 252 | + ] |
| 253 | + |
| 254 | + |
123 | 255 | @pytest.mark.vcr() |
124 | 256 | def test_multimodal_conversation_call_basic( |
125 | 257 | instrument_with_content, span_exporter |
|
0 commit comments