Skip to content

Commit 4064298

Browse files
committed
feat: add cache token extraction and dashscope embeddings improvements
- dashscope: add _extract_cache_tokens() for cache_creation/cache_read tokens - dashscope: set server_address/port on embedding invocations - dashscope: update test to use text-embedding-v4 model - hermes-agent: extract cache tokens from usage.prompt_tokens_details - vita: extract cache tokens from OpenAI-compatible usage format These changes enable gen_ai.usage.cache_creation.input_tokens and gen_ai.usage.cache_read.input_tokens attributes for LLM plugins that interact with providers supporting prompt caching (e.g. Anthropic, OpenAI with cached prompts, DashScope with context caching). Change-Id: I6f0c4035561b43eb5e8ed132257a6438b2a323f1 Co-developed-by: Qoder <noreply@qoder.com>
1 parent fbbedf4 commit 4064298

6 files changed

Lines changed: 97 additions & 8 deletions

File tree

  • instrumentation-loongsuite

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/patch/embedding.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ def wrap_text_embedding_call(wrapped, instance, args, kwargs, handler=None):
5252
# Create embedding invocation object
5353
invocation = EmbeddingInvocation(request_model=model)
5454
invocation.provider = "dashscope"
55+
invocation.server_address = "dashscope.aliyuncs.com"
56+
invocation.server_port = 443
5557

5658
# Extract parameters from kwargs or kwargs["parameters"] dict
5759
parameters = kwargs.get("parameters", {})

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/utils/common.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,47 @@ def _extract_usage(response: Any) -> tuple[Optional[int], Optional[int]]:
8888
return None, None
8989

9090

91+
def _extract_cache_tokens(response: Any) -> tuple[Optional[int], Optional[int]]:
92+
"""Extract cache token usage from DashScope response.
93+
94+
Args:
95+
response: DashScope response object
96+
97+
Returns:
98+
Tuple of (cache_creation_input_tokens, cache_read_input_tokens)
99+
"""
100+
if not response:
101+
return None, None
102+
103+
try:
104+
usage = getattr(response, "usage", None)
105+
if not usage:
106+
return None, None
107+
108+
# DashScope may report cache tokens in various fields
109+
cache_creation = (
110+
getattr(usage, "cache_creation_input_tokens", None)
111+
or getattr(usage, "cache_creation_tokens", None)
112+
)
113+
cache_read = (
114+
getattr(usage, "cache_read_input_tokens", None)
115+
or getattr(usage, "cache_read_tokens", None)
116+
or getattr(usage, "prompt_cache_hit_tokens", None)
117+
)
118+
119+
# Also check prompt_tokens_details (OpenAI-compatible format)
120+
prompt_details = getattr(usage, "prompt_tokens_details", None)
121+
if prompt_details and cache_read is None:
122+
cache_read = getattr(prompt_details, "cached_tokens", None)
123+
124+
return (
125+
cache_creation if cache_creation and cache_creation > 0 else None,
126+
cache_read if cache_read and cache_read > 0 else None,
127+
)
128+
except (KeyError, AttributeError):
129+
return None, None
130+
131+
91132
def _extract_task_id(task: Any) -> Optional[str]:
92133
"""Extract task_id from task parameter (can be str or Response object).
93134

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/src/opentelemetry/instrumentation/dashscope/utils/generation.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,14 @@ def _update_invocation_from_response(
570570
invocation.input_tokens = input_tokens
571571
invocation.output_tokens = output_tokens
572572

573+
# Extract cache token usage
574+
from ..utils.common import _extract_cache_tokens
575+
cache_creation, cache_read = _extract_cache_tokens(response)
576+
if cache_creation is not None:
577+
invocation.usage_cache_creation_input_tokens = cache_creation
578+
if cache_read is not None:
579+
invocation.usage_cache_read_input_tokens = cache_read
580+
573581
# Extract response model name (if available)
574582
response_model = _safe_get(response, "model")
575583
if response_model:

instrumentation-loongsuite/loongsuite-instrumentation-dashscope/tests/test_embedding.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def test_text_embedding_basic(instrument, span_exporter):
141141
"""Test basic text embedding call."""
142142

143143
response = TextEmbedding.call(
144-
model="text-embedding-v1", input="Hello, world!"
144+
model="text-embedding-v4", input="Hello, world!"
145145
)
146146

147147
assert response is not None
@@ -165,7 +165,7 @@ def test_text_embedding_basic(instrument, span_exporter):
165165
# Assert all span attributes
166166
_assert_embedding_span_attributes(
167167
span,
168-
request_model="text-embedding-v1",
168+
request_model="text-embedding-v4",
169169
response=response,
170170
input_tokens=input_tokens,
171171
)
@@ -178,7 +178,7 @@ def test_text_embedding_batch(instrument, span_exporter):
178178
"""Test text embedding with batch input."""
179179

180180
response = TextEmbedding.call(
181-
model="text-embedding-v1", input=["Hello", "World"]
181+
model="text-embedding-v4", input=["Hello", "World"]
182182
)
183183

184184
assert response is not None
@@ -202,7 +202,7 @@ def test_text_embedding_batch(instrument, span_exporter):
202202
# Assert all span attributes
203203
_assert_embedding_span_attributes(
204204
span,
205-
request_model="text-embedding-v1",
205+
request_model="text-embedding-v4",
206206
response=response,
207207
input_tokens=input_tokens,
208208
)
@@ -215,7 +215,7 @@ def test_text_embedding_with_text_type(instrument, span_exporter):
215215
"""Test text embedding with text_type parameter."""
216216

217217
response = TextEmbedding.call(
218-
model="text-embedding-v1",
218+
model="text-embedding-v4",
219219
input="What is machine learning?",
220220
text_type="query",
221221
)
@@ -241,7 +241,7 @@ def test_text_embedding_with_text_type(instrument, span_exporter):
241241
# Assert all span attributes
242242
_assert_embedding_span_attributes(
243243
span,
244-
request_model="text-embedding-v1",
244+
request_model="text-embedding-v4",
245245
response=response,
246246
input_tokens=input_tokens,
247247
)
@@ -254,7 +254,7 @@ def test_text_embedding_with_dimension(instrument, span_exporter):
254254
"""Test text embedding with dimension parameter."""
255255

256256
response = TextEmbedding.call(
257-
model="text-embedding-v1",
257+
model="text-embedding-v4",
258258
input="What is machine learning?",
259259
dimension=512,
260260
)
@@ -280,7 +280,7 @@ def test_text_embedding_with_dimension(instrument, span_exporter):
280280
# Assert all span attributes including dimension_count
281281
_assert_embedding_span_attributes(
282282
span,
283-
request_model="text-embedding-v1",
283+
request_model="text-embedding-v4",
284284
response=response,
285285
input_tokens=input_tokens,
286286
dimension_count=512, # Should be captured from request

instrumentation-loongsuite/loongsuite-instrumentation-hermes-agent/src/opentelemetry/instrumentation/hermes_agent/helpers.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,26 @@ def update_llm_invocation_from_response(
693693
if output_tokens > 0:
694694
invocation.output_tokens = output_tokens
695695

696+
# Extract cache token usage
697+
usage = getattr(response, "usage", None)
698+
if usage is not None:
699+
# OpenAI-compatible: prompt_tokens_details.cached_tokens
700+
prompt_details = getattr(usage, "prompt_tokens_details", None)
701+
if prompt_details is not None:
702+
cached = getattr(prompt_details, "cached_tokens", None)
703+
if cached and cached > 0:
704+
invocation.usage_cache_read_input_tokens = cached
705+
# Direct fields (some providers)
706+
cache_creation = (
707+
getattr(usage, "cache_creation_input_tokens", None)
708+
or getattr(usage, "cache_creation_tokens", None)
709+
)
710+
if cache_creation and cache_creation > 0:
711+
invocation.usage_cache_creation_input_tokens = cache_creation
712+
cache_read = getattr(usage, "cache_read_input_tokens", None)
713+
if cache_read and cache_read > 0 and not invocation.usage_cache_read_input_tokens:
714+
invocation.usage_cache_read_input_tokens = cache_read
715+
696716
return input_tokens, output_tokens, total_tokens
697717

698718

instrumentation-loongsuite/loongsuite-instrumentation-vita/src/opentelemetry/instrumentation/vita/patch.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,15 @@ def wrap_generate_next_message(
346346
if usage and isinstance(usage, dict):
347347
invocation.input_tokens = usage.get("prompt_tokens")
348348
invocation.output_tokens = usage.get("completion_tokens")
349+
# Cache tokens
350+
prompt_details = usage.get("prompt_tokens_details")
351+
if isinstance(prompt_details, dict):
352+
cached = prompt_details.get("cached_tokens")
353+
if cached and cached > 0:
354+
invocation.usage_cache_read_input_tokens = cached
355+
cache_creation = usage.get("cache_creation_input_tokens")
356+
if cache_creation and cache_creation > 0:
357+
invocation.usage_cache_creation_input_tokens = cache_creation
349358

350359
handler.stop_invoke_agent(invocation)
351360
return result
@@ -415,6 +424,15 @@ def wrap_generate(
415424
if usage and isinstance(usage, dict):
416425
invocation.input_tokens = usage.get("prompt_tokens")
417426
invocation.output_tokens = usage.get("completion_tokens")
427+
# Cache tokens (OpenAI-compatible format)
428+
prompt_details = usage.get("prompt_tokens_details")
429+
if isinstance(prompt_details, dict):
430+
cached = prompt_details.get("cached_tokens")
431+
if cached and cached > 0:
432+
invocation.usage_cache_read_input_tokens = cached
433+
cache_creation = usage.get("cache_creation_input_tokens")
434+
if cache_creation and cache_creation > 0:
435+
invocation.usage_cache_creation_input_tokens = cache_creation
418436

419437
handler.stop_llm(invocation)
420438
return result

0 commit comments

Comments
 (0)