Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion evaluators/contrib/budget/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ ModelPricing(input_per_1k=0.04, output_per_1k=0.16)

`input_per_1k` is applied to input tokens. `output_per_1k` is applied to output tokens.

Pricing is required when any rule uses `limit_unit="usd_cents"`. Token-only rules can omit pricing. If an event uses a model that is not in the pricing table and a cost rule exists, `unknown_model_behavior="block"` fails closed. Use `"warn"` to log a warning and treat the cost as 0.
Pricing and `model_path` are required when any rule uses `limit_unit="usd_cents"`. Token-only rules can omit both. If an event uses a model that is not in the pricing table and a cost rule exists, `unknown_model_behavior="block"` fails closed. Use `"warn"` to log a warning and treat the cost as 0.

## Dual Ceiling Pattern

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,13 @@ class BudgetEvaluatorConfig(EvaluatorConfig):
pricing table and a cost-based rule exists. block=fail closed,
warn=log warning and treat cost as 0.
pricing: Optional model pricing table. Maps model name to ModelPricing.
Used to derive cost in USD from token counts and model name.
Required when any rule uses limit_unit="usd_cents". Used to
derive cost in USD from token counts and model name.
token_path: Dot-notation path to extract token usage from step
data (e.g. "usage.total_tokens"). If None, looks for standard
fields (input_tokens, output_tokens, total_tokens, usage).
model_path: Dot-notation path to extract model name (for pricing lookup).
Required when any rule uses limit_unit="usd_cents".
metadata_paths: Mapping of metadata field name to dot-notation path
in step data. Used to extract scope dimensions (channel, user_id, etc).
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,23 @@ def _extract_tokens(data: Any, token_path: str | None) -> tuple[int, int]:
out = usage.get("output_tokens")
if out is None:
out = usage.get("completion_tokens")
inp_ok = isinstance(inp, int) and not isinstance(inp, bool)
out_ok = isinstance(out, int) and not isinstance(out, bool)
if inp_ok and out_ok:
return max(0, inp), max(0, out)
input_tokens = _extract_non_negative_int(inp)
output_tokens = _extract_non_negative_int(out)
if input_tokens is not None and output_tokens is not None:
return input_tokens, output_tokens
total = usage.get("total_tokens")
if isinstance(total, int) and not isinstance(total, bool) and total > 0:
return 0, max(0, total)
return 0, 0


def _extract_non_negative_int(value: Any) -> int | None:
"""Return a non-negative integer or None for invalid token values."""
if not isinstance(value, int) or isinstance(value, bool):
return None
return max(0, value)


def _estimate_cost(
model: str | None,
input_tokens: int,
Expand Down Expand Up @@ -196,9 +203,10 @@ async def evaluate(self, data: Any) -> EvaluatorResult:
input_tokens, output_tokens = _extract_tokens(data, self.config.token_path)

model: str | None = None
model_path_configured = bool(self.config.model_path)
if model_path_configured:
val = _extract_by_path(data, self.config.model_path)
model_path = self.config.model_path
model_path_configured = bool(model_path)
if model_path:
val = _extract_by_path(data, model_path)
if val is not None:
model = str(val)

Expand All @@ -220,9 +228,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult:
)
if has_matching_cost_rule:
if model is None:
block_reason = (
f"Model not found at path '{self.config.model_path}'"
)
block_reason = f"Model not found at path '{model_path}'"
else:
block_reason = f"Unknown model: {model}"
if self.config.unknown_model_behavior == "block":
Expand Down
27 changes: 27 additions & 0 deletions evaluators/contrib/budget/tests/budget/test_budget.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,33 @@ def test_extract_tokens_openai(self) -> None:
data = {"usage": {"prompt_tokens": 80, "completion_tokens": 40}}
assert _extract_tokens(data, None) == (80, 40)

def test_extract_tokens_falls_back_when_normalized_fields_are_none(self) -> None:
# Given: normalized fields present but unset, plus legacy OpenAI fields
data = {
"usage": {
"input_tokens": None,
"output_tokens": None,
"prompt_tokens": 80,
"completion_tokens": 40,
}
}

# When/Then: fallback still uses the legacy fields
assert _extract_tokens(data, None) == (80, 40)

def test_extract_tokens_falls_back_per_field(self) -> None:
# Given: one normalized field missing, the other present
data = {
"usage": {
"input_tokens": 100,
"output_tokens": None,
"completion_tokens": 40,
}
}

# When/Then: fallback applies independently per token side
assert _extract_tokens(data, None) == (100, 40)

def test_extract_tokens_none(self) -> None:
# Given: None data / Then: (0, 0)
assert _extract_tokens(None, None) == (0, 0)
Expand Down
Loading