braintrust-sdk-python/py/src/braintrust/integrations/anthropic/_utils.py at main · braintrustdata/braintrust-sdk-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""Shared utilities for Anthropic API wrappers."""

from typing import Any

from braintrust.integrations.utils import _try_to_dict as _shared_try_to_dict
from braintrust.util import is_numeric


class Wrapper:
    """Base wrapper class with __getattr__ delegation to preserve original types."""

    def __init__(self, wrapped: Any):
        self.__wrapped = wrapped

    def __getattr__(self, name: str) -> Any:
        return getattr(self.__wrapped, name)


_ANTHROPIC_USAGE_METRIC_FIELDS = (
    ("input_tokens", "prompt_tokens"),
    ("output_tokens", "completion_tokens"),
    ("cache_read_input_tokens", "prompt_cached_tokens"),
    ("cache_creation_input_tokens", "prompt_cache_creation_tokens"),
)

_ANTHROPIC_CACHE_CREATION_METRIC_FIELDS = (
    ("ephemeral_5m_input_tokens", "prompt_cache_creation_5m_tokens"),
    ("ephemeral_1h_input_tokens", "prompt_cache_creation_1h_tokens"),
)

_ANTHROPIC_USAGE_METADATA_FIELDS = frozenset(
    {
        "service_tier",
        "inference_geo",
    }
)


def _try_to_dict(obj: Any) -> dict[str, Any] | None:
    """Anthropic-flavoured object→dict conversion.

    Delegates to the shared ``_try_to_dict`` first, then returns ``None``
    (instead of the original object) when conversion fails.
    """
    result = _shared_try_to_dict(obj)
    if isinstance(result, dict):
        return result
    return None


def _set_numeric_metric(metrics: dict[str, float], name: str, value: Any) -> None:
    if is_numeric(value):
        metrics[name] = float(value)


def extract_anthropic_usage(usage: Any) -> tuple[dict[str, float], dict[str, Any]]:
    """Extract normalized metrics and allowlisted metadata from Anthropic usage.

    Numeric usage fields are converted into Braintrust metrics. Allowlisted
    non-numeric fields are attached as span metadata with a ``usage_`` prefix.
    """
    usage = _try_to_dict(usage)
    if usage is None:
        return {}, {}

    metrics: dict[str, float] = {}
    metadata: dict[str, Any] = {}
    for source_name, metric_name in _ANTHROPIC_USAGE_METRIC_FIELDS:
        _set_numeric_metric(metrics, metric_name, usage.get(source_name))

    cache_creation = _try_to_dict(usage.get("cache_creation"))
    cache_creation_breakdown: list[float] = []
    if cache_creation is not None:
        for source_name, metric_name in _ANTHROPIC_CACHE_CREATION_METRIC_FIELDS:
            value = cache_creation.get(source_name)
            if is_numeric(value):
                metrics[metric_name] = float(value)
                cache_creation_breakdown.append(float(value))

    server_tool_use = _try_to_dict(usage.get("server_tool_use"))
    if server_tool_use is not None:
        for source_name, value in server_tool_use.items():
            _set_numeric_metric(metrics, f"server_tool_use_{source_name}", value)

    if "prompt_cache_creation_tokens" not in metrics and cache_creation_breakdown:
        metrics["prompt_cache_creation_tokens"] = sum(cache_creation_breakdown)

    if metrics:
        total_prompt_tokens = (
            metrics.get("prompt_tokens", 0)
            + metrics.get("prompt_cached_tokens", 0)
            + metrics.get("prompt_cache_creation_tokens", 0)
        )
        metrics["prompt_tokens"] = total_prompt_tokens
        metrics["tokens"] = total_prompt_tokens + metrics.get("completion_tokens", 0)

    for name, value in usage.items():
        if name in _ANTHROPIC_USAGE_METADATA_FIELDS and value is not None:
            metadata[f"usage_{name}"] = value
    return metrics, metadata