agentcontrol
diff --git a/‎evaluators/contrib/budget/README.md‎
Lines changed: 135 additions & 2 deletions b/‎evaluators/contrib/budget/README.md‎
Lines changed: 135 additions & 2 deletions
diff --git a/‎evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/__init__.py‎
Lines changed: 7 additions & 1 deletion b/‎evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/__init__.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py‎
Lines changed: 40 additions & 32 deletions b/‎evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py‎
Lines changed: 40 additions & 32 deletions
@@ -1,3 +1,136 @@
-# Budget Evaluator
+# agent-control-evaluator-budget
 
-Cumulative LLM cost and token budget tracking for agent-control.
+Budget evaluator for agent-control that tracks cumulative LLM token and cost usage per scope and time window.
+
+## Install
+
+```bash
+pip install agent-control-evaluator-budget
+```
+
+## Quickstart
+
+```python
+from agent_control_evaluator_budget.budget import (
+    BudgetEvaluatorConfig,
+    BudgetLimitRule,
+    ModelPricing,
+)
+
+config = BudgetEvaluatorConfig(
+    budget_id="support-daily",
+    limits=[
+        BudgetLimitRule(
+            scope={"agent": "support"},
+            group_by="user_id",
+            window_seconds=86_400,
+            limit=500,
+            limit_unit="usd_cents",
+        ),
+        BudgetLimitRule(
+            scope={"agent": "support"},
+            group_by="user_id",
+            window_seconds=86_400,
+            limit=50_000,
+            limit_unit="tokens",
+        ),
+    ],
+    pricing={
+        "gpt-4.1-mini": ModelPricing(input_per_1k=0.04, output_per_1k=0.16),
+    },
+    model_path="model",
+    metadata_paths={
+        "agent": "metadata.agent",
+        "user_id": "metadata.user_id",
+    },
+    unknown_model_behavior="block",
+)
+```
+
+The evaluator reads token usage from standard fields such as `usage.input_tokens` and `usage.output_tokens`. Configure `token_path` only when your event shape uses a custom location.
+
+## Scope and group_by
+
+Each `BudgetLimitRule` has a static `scope` and an optional `group_by` field.
+
+`scope` filters which events a rule applies to. A rule with `scope={"agent": "support"}` only applies when extracted metadata contains `agent="support"`. An empty scope is global.
+
+`group_by` creates independent buckets per extracted metadata value. The common per-user pattern is:
+
+```python
+BudgetLimitRule(
+    scope={"agent": "support"},
+    group_by="user_id",
+    window_seconds=86_400,
+    limit=500,
+    limit_unit="usd_cents",
+)
+```
+
+With `metadata_paths={"user_id": "metadata.user_id"}`, each user gets a separate daily budget inside the support scope.
+
+## Budget pools
+
+`budget_id` identifies the accumulated budget pool.
+
+Evaluators with the same `budget_id` share accumulated spend and token totals across all evaluator instances. Each evaluator still evaluates using its own configured rules -- the shared state is the bucket (the rolling sum), not the rule set. Evaluators with different `budget_id` values are fully isolated.
+
+Use stable names such as `support-daily`, `billing-global`, or `tenant-acme-monthly`. Avoid generating a new `budget_id` per request unless each request should have an isolated budget.
+
+## Pricing
+
+`ModelPricing` stores cost rates in cents per 1K tokens:
+
+```python
+ModelPricing(input_per_1k=0.04, output_per_1k=0.16)
+```
+
+`input_per_1k` is applied to input tokens. `output_per_1k` is applied to output tokens.
+
+Pricing is required when any rule uses `limit_unit="usd_cents"`. Token-only rules can omit pricing. If an event uses a model that is not in the pricing table and a cost rule exists, `unknown_model_behavior="block"` fails closed. Use `"warn"` to log a warning and treat the cost as 0.
+
+## Dual Ceiling Pattern
+
+Use two evaluators when cost and token ceilings need independent control records or different `budget_id` pools:
+
+```python
+cost_config = BudgetEvaluatorConfig(
+    budget_id="support-cost-daily",
+    limits=[
+        BudgetLimitRule(
+            scope={"agent": "support"},
+            group_by="user_id",
+            window_seconds=86_400,
+            limit=500,
+            limit_unit="usd_cents",
+        )
+    ],
+    pricing={
+        "gpt-4.1-mini": ModelPricing(input_per_1k=0.04, output_per_1k=0.16),
+    },
+    model_path="model",
+    metadata_paths={"agent": "metadata.agent", "user_id": "metadata.user_id"},
+)
+
+token_config = BudgetEvaluatorConfig(
+    budget_id="support-token-daily",
+    limits=[
+        BudgetLimitRule(
+            scope={"agent": "support"},
+            group_by="user_id",
+            window_seconds=86_400,
+            limit=50_000,
+            limit_unit="tokens",
+        )
+    ],
+    metadata_paths={"agent": "metadata.agent", "user_id": "metadata.user_id"},
+)
+```
+
+This pattern lets cost and token budgets reset, alert, and roll out independently. A single evaluator can also contain both rules when one shared pool and one control result are sufficient.
+
+## Limitations
+
+`InMemoryBudgetStore` is single-process only. State is lost on restart and is not shared across workers or pods.
+
+Use a distributed store for production deployments that run multiple processes, multiple workers, or multiple pods.
@@ -1,6 +1,10 @@
 """Budget evaluator for per-agent LLM cost and token tracking."""
 
-from agent_control_evaluator_budget.budget.config import BudgetEvaluatorConfig
+from agent_control_evaluator_budget.budget.config import (
+    BudgetEvaluatorConfig,
+    BudgetLimitRule,
+    ModelPricing,
+)
 from agent_control_evaluator_budget.budget.evaluator import BudgetEvaluator
 from agent_control_evaluator_budget.budget.memory_store import InMemoryBudgetStore
 from agent_control_evaluator_budget.budget.store import BudgetSnapshot, BudgetStore
@@ -12,7 +16,9 @@
 __all__ = [
     "BudgetEvaluator",
     "BudgetEvaluatorConfig",
+    "BudgetLimitRule",
     "BudgetSnapshot",
     "BudgetStore",
     "InMemoryBudgetStore",
+    "ModelPricing",
 ]
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from enum import Enum
+from typing import Literal
 
 from agent_control_evaluators._base import EvaluatorConfig
 from pydantic import Field, field_validator, model_validator
@@ -17,12 +17,11 @@
 WINDOW_MONTHLY = 2592000  # 30 days
 
 
-class Currency(str, Enum):
-    """Supported budget currencies."""
+class ModelPricing(EvaluatorConfig):
+    """Per-model token pricing in cents per 1K tokens."""
 
-    USD = "usd"
-    EUR = "eur"
-    TOKENS = "tokens"
+    input_per_1k: float = 0.0
+    output_per_1k: float = 0.0
 
 
 class BudgetLimitRule(EvaluatorConfig):
@@ -43,39 +42,24 @@ class BudgetLimitRule(EvaluatorConfig):
             each user gets their own budget. None = shared/global limit.
         window_seconds: Time window for accumulation in seconds.
             None = cumulative (no reset). See WINDOW_* constants.
-        limit: Maximum spend in the window, in minor units (e.g. cents
-            for USD). None = uncapped on this dimension.
-        currency: Currency for the limit. Defaults to USD.
-        limit_tokens: Maximum tokens in the window. None = uncapped.
+        limit: Maximum usage in the window. Interpreted by limit_unit.
+        limit_unit: Unit for limit. usd_cents checks spend; tokens checks
+            input + output tokens.
     """
 
     scope: dict[str, str] = Field(default_factory=dict)
     group_by: str | None = None
     window_seconds: int | None = None
-    limit: int | None = None
-    currency: Currency = Currency.USD
-    limit_tokens: int | None = None
-
-    @model_validator(mode="after")
-    def at_least_one_limit(self) -> "BudgetLimitRule":
-        if self.limit is None and self.limit_tokens is None:
-            raise ValueError("At least one of limit or limit_tokens must be set")
-        return self
+    limit: int
+    limit_unit: Literal["usd_cents", "tokens"] = "usd_cents"
 
     @field_validator("limit")
     @classmethod
-    def validate_limit(cls, v: int | None) -> int | None:
-        if v is not None and v <= 0:
+    def validate_limit(cls, v: int) -> int:
+        if v <= 0:
             raise ValueError("limit must be a positive integer")
         return v
 
-    @field_validator("limit_tokens")
-    @classmethod
-    def validate_limit_tokens(cls, v: int | None) -> int | None:
-        if v is not None and v <= 0:
-            raise ValueError("limit_tokens must be positive")
-        return v
-
     @field_validator("window_seconds")
     @classmethod
     def validate_window_seconds(cls, v: int | None) -> int | None:
@@ -89,9 +73,13 @@ class BudgetEvaluatorConfig(EvaluatorConfig):
 
     Attributes:
         limits: List of budget limit rules. Each is checked independently.
-        pricing: Optional model pricing table. Maps model name to per-1K
-            token rates. Used to derive cost in USD from token counts and
-            model name.
+        budget_id: Unique budget pool identifier. Same budget_id shares
+            accumulated spend. Different budget_id is fully isolated.
+        unknown_model_behavior: What to do when a model is not found in the
+            pricing table and a cost-based rule exists. block=fail closed,
+            warn=log warning and treat cost as 0.
+        pricing: Optional model pricing table. Maps model name to ModelPricing.
+            Used to derive cost in USD from token counts and model name.
         token_path: Dot-notation path to extract token usage from step
             data (e.g. "usage.total_tokens"). If None, looks for standard
             fields (input_tokens, output_tokens, total_tokens, usage).
@@ -101,7 +89,27 @@ class BudgetEvaluatorConfig(EvaluatorConfig):
     """
 
     limits: list[BudgetLimitRule] = Field(min_length=1)
-    pricing: dict[str, dict[str, float]] | None = None
+    budget_id: str = Field(
+        default="default",
+        description=(
+            "Unique budget pool identifier. Same budget_id shares accumulated spend. "
+            "Different budget_id is fully isolated."
+        ),
+    )
+    unknown_model_behavior: Literal["block", "warn"] = Field(
+        default="block",
+        description=(
+            "What to do when a model is not found in the pricing table and a cost-based "
+            "rule exists. block=fail closed, warn=log warning and treat cost as 0."
+        ),
+    )
+    pricing: dict[str, ModelPricing] | None = None
     token_path: str | None = None
     model_path: str | None = None
     metadata_paths: dict[str, str] = Field(default_factory=dict)
+
+    @model_validator(mode="after")
+    def require_pricing_for_cost_rules(self) -> "BudgetEvaluatorConfig":
+        if self.pricing is None and any(rule.limit_unit == "usd_cents" for rule in self.limits):
+            raise ValueError('pricing is required when any rule uses limit_unit="usd_cents"')
+        return self