Skip to content

Commit 8a17210

Browse files
committed
fix: capture dashscope multimodal media outputs
1 parent 75836bd commit 8a17210

3 files changed

Lines changed: 156 additions & 0 deletions

File tree

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## Unreleased
99

10+
### Fixed
11+
12+
- Capture image and video URI outputs from `MultiModalConversation` responses.
13+
1014
## Version 0.6.0 (2026-06-03)
1115

1216
### Fixed

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/utils/multimodal.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,16 @@ def _extract_multimodal_output_messages(response: Any) -> List[OutputMessage]:
351351
type="text",
352352
)
353353
)
354+
# Image content
355+
elif "image" in item:
356+
parts.append(
357+
Uri(
358+
uri=item["image"],
359+
modality="image",
360+
mime_type=None,
361+
type="uri",
362+
)
363+
)
354364
# Audio content (when modalities includes "audio")
355365
elif "audio" in item:
356366
parts.append(
@@ -361,6 +371,16 @@ def _extract_multimodal_output_messages(response: Any) -> List[OutputMessage]:
361371
type="uri",
362372
)
363373
)
374+
# Video content
375+
elif "video" in item:
376+
parts.append(
377+
Uri(
378+
uri=item["video"],
379+
modality="video",
380+
mime_type=None,
381+
type="uri",
382+
)
383+
)
364384
elif isinstance(item, str):
365385
parts.append(Text(content=item, type="text"))
366386

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/tests/test_multimodal_conversation.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,56 @@
1414

1515
"""Tests for MultiModalConversation instrumentation."""
1616

17+
import json
18+
from types import SimpleNamespace
1719
from typing import Optional
1820

1921
import pytest
2022
from dashscope import MultiModalConversation
2123

24+
from opentelemetry.instrumentation._semconv import (
25+
OTEL_SEMCONV_STABILITY_OPT_IN,
26+
_OpenTelemetrySemanticConventionStability,
27+
)
28+
from opentelemetry.instrumentation.dashscope.utils.multimodal import (
29+
_extract_multimodal_output_messages,
30+
_update_invocation_from_multimodal_response,
31+
)
2232
from opentelemetry.semconv._incubating.attributes import (
2333
gen_ai_attributes as GenAIAttributes,
2434
)
35+
from opentelemetry.util.genai.environment_variables import (
36+
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
37+
)
38+
from opentelemetry.util.genai.handler import TelemetryHandler
39+
from opentelemetry.util.genai.types import LLMInvocation, Text, Uri
40+
41+
42+
def _make_multimodal_response(content, finish_reason="stop"):
43+
return SimpleNamespace(
44+
output=SimpleNamespace(
45+
choices=[
46+
SimpleNamespace(
47+
message=SimpleNamespace(content=content),
48+
finish_reason=finish_reason,
49+
)
50+
]
51+
)
52+
)
53+
54+
55+
@pytest.fixture(scope="function")
56+
def content_capture_env(monkeypatch):
57+
_OpenTelemetrySemanticConventionStability._initialized = False
58+
monkeypatch.setenv(
59+
OTEL_SEMCONV_STABILITY_OPT_IN, "gen_ai_latest_experimental"
60+
)
61+
monkeypatch.setenv(
62+
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "SPAN_ONLY"
63+
)
64+
_OpenTelemetrySemanticConventionStability._initialize()
65+
yield
66+
_OpenTelemetrySemanticConventionStability._initialized = False
2567

2668

2769
def _safe_getattr(obj, attr, default=None):
@@ -120,6 +162,96 @@ def _assert_multimodal_span_attributes(
120162
)
121163

122164

165+
@pytest.mark.parametrize(
166+
("content_key", "url", "modality"),
167+
[
168+
("image", "https://example.com/a.png", "image"),
169+
("audio", "https://example.com/a.wav", "audio"),
170+
("video", "https://example.com/a.mp4", "video"),
171+
],
172+
)
173+
def test_extract_multimodal_output_messages_with_uri_content(
174+
content_key, url, modality
175+
):
176+
"""Test output message extraction for media URI content."""
177+
messages = _extract_multimodal_output_messages(
178+
_make_multimodal_response([{content_key: url}])
179+
)
180+
181+
assert len(messages) == 1
182+
assert messages[0].role == "assistant"
183+
assert messages[0].finish_reason == "stop"
184+
assert len(messages[0].parts) == 1
185+
186+
part = messages[0].parts[0]
187+
assert isinstance(part, Uri)
188+
assert part.uri == url
189+
assert part.modality == modality
190+
assert part.mime_type is None
191+
assert part.type == "uri"
192+
193+
194+
def test_extract_multimodal_output_messages_with_text_and_image_content():
195+
"""Test output message extraction preserves mixed text and image parts."""
196+
image_url = "https://example.com/generated.png"
197+
messages = _extract_multimodal_output_messages(
198+
_make_multimodal_response([{"text": "ok"}, {"image": image_url}])
199+
)
200+
201+
assert len(messages) == 1
202+
assert messages[0].role == "assistant"
203+
assert messages[0].finish_reason == "stop"
204+
assert len(messages[0].parts) == 2
205+
206+
text_part = messages[0].parts[0]
207+
assert isinstance(text_part, Text)
208+
assert text_part.content == "ok"
209+
assert text_part.type == "text"
210+
211+
image_part = messages[0].parts[1]
212+
assert isinstance(image_part, Uri)
213+
assert image_part.uri == image_url
214+
assert image_part.modality == "image"
215+
assert image_part.mime_type is None
216+
assert image_part.type == "uri"
217+
218+
219+
def test_multimodal_image_output_messages_written_to_span(
220+
content_capture_env, tracer_provider, span_exporter
221+
):
222+
"""Test image output URI is written to gen_ai.output.messages."""
223+
image_url = "https://example.com/generated.png"
224+
response = _make_multimodal_response([{"image": image_url}])
225+
invocation = LLMInvocation(request_model="wan2.7-image")
226+
invocation.provider = "dashscope"
227+
228+
_update_invocation_from_multimodal_response(invocation, response)
229+
230+
handler = TelemetryHandler(tracer_provider=tracer_provider)
231+
handler.start_llm(invocation)
232+
handler.stop_llm(invocation)
233+
234+
spans = span_exporter.get_finished_spans()
235+
assert len(spans) == 1
236+
output_messages = json.loads(
237+
spans[0].attributes[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]
238+
)
239+
assert output_messages == [
240+
{
241+
"role": "assistant",
242+
"parts": [
243+
{
244+
"mime_type": None,
245+
"modality": "image",
246+
"uri": image_url,
247+
"type": "uri",
248+
}
249+
],
250+
"finish_reason": "stop",
251+
}
252+
]
253+
254+
123255
@pytest.mark.vcr()
124256
def test_multimodal_conversation_call_basic(
125257
instrument_with_content, span_exporter

0 commit comments

Comments
 (0)