Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added

- Capture `gen_ai.skill.name`, `gen_ai.skill.id`, `gen_ai.skill.description`
and `gen_ai.skill.version` on the `execute_tool` span of the built-in
`Skill` tool. Skill metadata is read best-effort from the project-level
`SKILL.md` frontmatter (located via `SystemMessage.data.cwd`); `skill.id`
is reported as `claude:project:<skill-name>`. Metadata read failures never
affect the SDK call.

## Version 0.6.0 (2026-06-03)

There are no changelog entries for this release.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
"""Patch functions for Claude Agent SDK instrumentation."""

import logging
import os
import time
from typing import Any, Dict, List, Optional

import yaml

from opentelemetry import context as otel_context
from opentelemetry.instrumentation.claude_agent_sdk.utils import (
extract_usage_from_result_message,
Expand Down Expand Up @@ -86,6 +89,115 @@ def _clear_client_managed_runs() -> None:
_client_managed_runs.clear()


# The name of the Claude Agent SDK built-in tool that loads a Skill.
_SKILL_TOOL_NAME = "Skill"

# skill id prefix for project-scoped Claude Agent SDK skills.
_SKILL_ID_PREFIX = "claude:project:"


def _read_skill_metadata(skill_md_path: str) -> Dict[str, str]:
"""Best-effort read of a Skill's SKILL.md frontmatter.

Returns a dict with any of ``name``/``description``/``version`` keys that
were present in the YAML frontmatter. On any error (missing file, parse
failure, ...) returns an empty dict so telemetry never breaks the SDK call.
"""
try:
with open(skill_md_path, "r", encoding="utf-8") as f:
content = f.read()
except Exception:
# Missing or unreadable SKILL.md is expected for non-project skills.
return {}

return _parse_skill_frontmatter(content)


def _parse_skill_frontmatter(content: str) -> Dict[str, str]:
"""Parse the YAML frontmatter (``---`` delimited) of a SKILL.md body."""
try:
stripped = content.lstrip()
if not stripped.startswith("---"):
return {}
# Split off the leading ``---``; the next ``---`` closes the block.
after_open = stripped[3:]
end_index = after_open.find("\n---")
if end_index == -1:
# Frontmatter never closed; treat the remainder as the block.
frontmatter_text = after_open
else:
frontmatter_text = after_open[:end_index]

parsed = yaml.safe_load(frontmatter_text)
if not isinstance(parsed, dict):
return {}
except Exception:
return {}

metadata: Dict[str, str] = {}
for key in ("name", "description", "version"):
value = parsed.get(key)
if value is not None:
metadata[key] = str(value)
return metadata


def _apply_skill_metadata(
tool_invocation: ExecuteToolInvocation,
skill_name: str,
cwd: Optional[str],
) -> None:
"""Attach ``gen_ai.skill.*`` attributes to a Skill load tool span.

Reads the project-level ``SKILL.md`` frontmatter best-effort and fills in
``skill_name``/``skill_id``/``skill_description``/``skill_version`` on the
invocation. Any failure is swallowed so the SDK call is never affected.
"""
if not skill_name:
return

metadata: Dict[str, str] = {}
if cwd:
skill_md_path = os.path.join(
cwd, ".claude", "skills", skill_name, "SKILL.md"
)
metadata = _read_skill_metadata(skill_md_path)

# gen_ai.skill.name: prefer frontmatter, fall back to the requested name.
name = metadata.get("name") or skill_name
tool_invocation.skill_name = name
tool_invocation.skill_id = f"{_SKILL_ID_PREFIX}{name}"

description = metadata.get("description")
if description:
tool_invocation.skill_description = description
version = metadata.get("version")
if version:
tool_invocation.skill_version = version


def _apply_skill_fallback(

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor] _apply_skill_fallback is not exercised by any test.

In both new tests, skill_name is captured at span start from tool_input.get("skill"), so this function returns early at if tool_invocation.skill_name: return. The tool_use_result.commandName path below is never hit.

Also, create_mock_message_from_data in test_with_cassettes.py and test_span_validation.py does not propagate tool_use_result onto the mock UserMessage, so the cassette cannot exercise this path either.

Suggest adding a case where tool_input omits skill (or sets it empty) and tool_use_result.commandName provides the fallback, asserting skill_name/skill_id are recovered from the result.

tool_invocation: ExecuteToolInvocation,
tool_use_result: Any,
) -> None:
"""Best-effort fallback to recover skill_name before closing a Skill span.

If ``skill_name`` was not captured at span start (e.g. cwd was unavailable
so SKILL.md could not be read), try ``UserMessage.tool_use_result.commandName``
per the SDK's Skill tool result format.
"""
if tool_invocation.skill_name:
return
if not isinstance(tool_use_result, dict):
return
command_name = tool_use_result.get("commandName")

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] Optional enhancement: after recovering skill_name from commandName, re-read SKILL.md via _read_skill_metadata so description/version can still be populated on the fallback path.

Per spec this is acceptable (name/id only on fallback), so feel free to leave as-is — just flagging the option.

if command_name:
tool_invocation.skill_name = str(command_name)
tool_invocation.skill_id = (

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] skill_id does not use str(command_name).

skill_name = str(command_name) on the previous line, but skill_id = f"{_SKILL_ID_PREFIX}{command_name}". The f-string calls __str__ implicitly so behavior is identical, but for consistency with the previous line prefer f"{_SKILL_ID_PREFIX}{str(command_name)}".

f"{_SKILL_ID_PREFIX}{command_name}"
)


def _extract_message_parts(msg: Any) -> List[Any]:
"""Extract parts (text + tool calls) from an AssistantMessage."""
parts = []
Expand Down Expand Up @@ -113,12 +225,17 @@ def _create_tool_spans_from_message(
agent_invocation: InvokeAgentInvocation,
active_task_stack: List[Any],
exclude_tool_names: Optional[List[str]] = None,
cwd: Optional[str] = None,
) -> None:
"""Create tool execution spans from ToolUseBlocks in an AssistantMessage.

Tool spans are children of the active SubAgent span (if any), otherwise agent span.
When a Task tool is created, it's pushed onto active_task_stack along with a SubAgent span.

For the built-in ``Skill`` tool, ``gen_ai.skill.*`` attributes are read
best-effort from the project-level ``SKILL.md`` frontmatter (located via
``cwd``) and attached to the tool span.

The stack structure is: [{"task": ExecuteToolInvocation, "subagent": InvokeAgentInvocation}, ...]
"""
if not hasattr(msg, "content"):
Expand Down Expand Up @@ -163,6 +280,26 @@ def _create_tool_spans_from_message(
tool_call_arguments=tool_input,
tool_description=tool_name,
)

# Skill load: attach gen_ai.skill.* attributes best-effort
# from the project SKILL.md frontmatter. Failures here must
# never propagate to break the SDK call.
if tool_name == _SKILL_TOOL_NAME:
try:
skill_name = ""
if isinstance(tool_input, dict):
skill_name = str(
tool_input.get("skill") or ""
)
_apply_skill_metadata(
tool_invocation, skill_name, cwd
)
except Exception as e:
logger.warning(
f"Failed to read Skill metadata for "
f"'{tool_input}': {e}"
)

handler.start_execute_tool(tool_invocation)
_client_managed_runs[tool_use_id] = tool_invocation

Expand Down Expand Up @@ -271,6 +408,7 @@ def _process_assistant_message(
handler: ExtendedTelemetryHandler,
collected_messages: List[Dict[str, Any]],
active_task_stack: List[Any],
cwd: Optional[str] = None,
) -> None:
"""Process AssistantMessage: create LLM turn, extract parts, create tool spans."""
parts = _extract_message_parts(msg)
Expand Down Expand Up @@ -353,7 +491,7 @@ def _process_assistant_message(
turn_tracker.close_llm_turn()

_create_tool_spans_from_message(
msg, handler, agent_invocation, active_task_stack
msg, handler, agent_invocation, active_task_stack, cwd=cwd
)


Expand Down Expand Up @@ -474,6 +612,18 @@ def _process_user_message(
Error(message=error_msg, type=RuntimeError),
)
else:
# Skill load: best-effort fallback to fill skill_name
# from the tool result if it wasn't captured at start.
if tool_invocation.tool_name == _SKILL_TOOL_NAME:
try:
_apply_skill_fallback(
tool_invocation, tool_use_result
)
except Exception as e:
logger.warning(
f"Failed to apply Skill metadata "
f"fallback: {e}"
)
handler.stop_execute_tool(tool_invocation)

if tool_use_id:
Expand Down Expand Up @@ -522,18 +672,23 @@ def _process_user_message(
def _process_system_message(
msg: Any,
agent_invocation: InvokeAgentInvocation,
) -> None:
"""Process SystemMessage: extract session_id early in the stream.
) -> Optional[str]:
"""Process SystemMessage: extract session_id and cwd early in the stream.

SystemMessage appears at the beginning of the message stream and contains
the session_id in its data field. We extract it here so that it's available
for all subsequent LLM spans.
the session_id and cwd in its data field. We extract them here so they are
available for all subsequent spans (cwd is needed to locate project-level
SKILL.md files for Skill tool telemetry).

Returns the cwd if present, otherwise ``None``.
"""
if hasattr(msg, "subtype") and msg.subtype == "init":
if hasattr(msg, "data") and isinstance(msg.data, dict):
session_id = msg.data.get("session_id")
if session_id:
agent_invocation.conversation_id = session_id
return msg.data.get("cwd")
return None


def _process_result_message(
Expand Down Expand Up @@ -590,12 +745,16 @@ async def _process_agent_invocation_stream(
# When its ToolResultBlock is received, it's popped
active_task_stack: List[Any] = []

# cwd captured from SystemMessage.data.cwd, used to locate project-level
# SKILL.md files for Skill tool telemetry.
session_cwd: Optional[str] = None

try:
async for msg in wrapped_stream:
msg_type = type(msg).__name__

if msg_type == "SystemMessage":
_process_system_message(msg, agent_invocation)
session_cwd = _process_system_message(msg, agent_invocation)
elif msg_type == "AssistantMessage":
_process_assistant_message(
msg,
Expand All @@ -606,6 +765,7 @@ async def _process_agent_invocation_stream(
handler,
collected_messages,
active_task_stack,
cwd=session_cwd,
)
elif msg_type == "UserMessage":
_process_user_message(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
description: 'Skill load: project-level probe-skill loaded via Skill tool'
prompt: Use the probe-skill Skill tool first. Then answer exactly PROBE_SKILL_MARKER and nothing else.
messages:
- type: SystemMessage
subtype: init
data:
type: system
subtype: init
cwd: __SKILL_CWD__

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nit] cwd: __SKILL_CWD__ is an unsubstituted placeholder.

No substitution logic exists, so at runtime _apply_skill_metadata looks for __SKILL_CWD__/.claude/skills/probe-skill/SKILL.md as a relative path and falls through to best-effort. The cassette effectively only serves as smoke; it does not exercise the frontmatter parse path.

Either have the cassette support a real tmp-path substitution (e.g. a fixture that rewrites __SKILL_CWD__ to tmp_path) or document it as smoke-only.

session_id: skill-session-0001
tools:
- Skill
- Bash
- Read
skills:
- probe-skill
mcp_servers: []
model: qwen-plus
permissionMode: bypassPermissions
apiKeySource: ANTHROPIC_API_KEY
claude_code_version: 2.1.1
output_style: default
agents: []
slash_commands: []
plugins: []
uuid: skill-init-uuid
- type: AssistantMessage
model: qwen-plus
content:
- type: ToolUseBlock
id: call_skill_load_probe
name: Skill
input:
skill: probe-skill
parent_tool_use_id: null
error: null
- type: UserMessage
content:
- type: ToolResultBlock
tool_use_id: call_skill_load_probe
content: 'Launching skill: probe-skill'
is_error: false
uuid: skill-result-uuid
parent_tool_use_id: null
tool_use_result:
success: true
commandName: probe-skill
- type: AssistantMessage
model: qwen-plus
content:
- type: TextBlock
text: PROBE_SKILL_MARKER
parent_tool_use_id: null
error: null
- type: ResultMessage
subtype: success
duration_ms: 3210
duration_api_ms: 9000
is_error: false
num_turns: 2
session_id: skill-session-0001
total_cost_usd: 0.012
usage:
input_tokens: 1024
cache_creation_input_tokens: 0
cache_read_input_tokens: 0
output_tokens: 32
server_tool_use:
web_search_requests: 0
web_fetch_requests: 0
service_tier: standard
cache_creation:
ephemeral_1h_input_tokens: 0
ephemeral_5m_input_tokens: 0
result: PROBE_SKILL_MARKER
structured_output: null
Loading