Skip to content

Commit da70a16

Browse files
Merge remote-tracking branch 'origin/main' into feat/max_steps
2 parents f3183f9 + 77ef748 commit da70a16

15 files changed

Lines changed: 435 additions & 40 deletions

docs/05_bring_your_own_model_provider.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,46 @@ class MyImageQAProvider(ImageQAProvider):
135135
```
136136

137137

138+
### Execution Cost Tracking
139+
140+
The built-in VLM providers include default pricing for supported models. You can override the pricing on any provider by passing `input_cost_per_million_tokens` and `output_cost_per_million_tokens`:
141+
142+
```python
143+
from askui import AgentSettings, ComputerAgent
144+
from askui.model_providers import AnthropicVlmProvider
145+
from askui.reporting import SimpleHtmlReporter
146+
147+
with ComputerAgent(
148+
reporters=[SimpleHtmlReporter()],
149+
settings=AgentSettings(
150+
vlm_provider=AnthropicVlmProvider(
151+
model_id="claude-sonnet-4-6",
152+
input_cost_per_million_tokens=3.0,
153+
output_cost_per_million_tokens=15.0,
154+
),
155+
),
156+
) as agent:
157+
agent.act("Open settings")
158+
```
159+
160+
If you implement a fully custom `VlmProvider`, override the `pricing` property to enable cost tracking:
161+
162+
```python
163+
from askui.model_providers import VlmProvider, ModelPricing
164+
165+
class MyVlmProvider(VlmProvider):
166+
@property
167+
def pricing(self) -> ModelPricing | None:
168+
return ModelPricing(
169+
input_cost_per_million_tokens=1.0,
170+
output_cost_per_million_tokens=5.0,
171+
)
172+
173+
# ... rest of implementation
174+
```
175+
176+
---
177+
138178
## Advanced: Injecting a Custom Client
139179

140180
For full control over HTTP settings (timeouts, proxies, retries), you can inject a pre-configured client:

docs/08_reporting.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,15 @@ This generates an HTML file (typically in the current directory) showing:
3232
SimpleHtmlReporter(output_dir="./execution_reports", filename="agent_run.html")
3333
```
3434

35+
### Execution Cost Tracking
36+
37+
The HTML report automatically shows the estimated API cost when using a `VlmProvider` with pricing information. The built-in Anthropic and AskUI providers include default pricing for supported Claude models.
38+
39+
The report will display:
40+
- Total estimated cost
41+
- Per-token rates used for the calculation
42+
- Input and output token breakdowns (as before)
43+
3544
### Custom Reporters
3645

3746
Create custom reporters by implementing the `Reporter` interface:

src/askui/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""AskUI Python SDK"""
22

3-
__version__ = "0.25.1"
3+
__version__ = "0.26.1"
44

55
import logging
66
import os

src/askui/agent_base.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,12 @@ def __init__(
7474
# Create conversation with speakers and model providers
7575
speakers = Speakers()
7676
_callbacks = list(callbacks or [])
77-
_callbacks.append(UsageTrackingCallback(reporter=self._reporter))
77+
_callbacks.append(
78+
UsageTrackingCallback(
79+
reporter=self._reporter,
80+
pricing=self._vlm_provider.pricing,
81+
)
82+
)
7883
self._conversation = Conversation(
7984
speakers=speakers,
8085
vlm_provider=self._vlm_provider,

src/askui/callbacks/usage_tracking_callback.py

Lines changed: 87 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,31 +5,69 @@
55
from typing import TYPE_CHECKING
66

77
from opentelemetry import trace
8+
from pydantic import BaseModel
89
from typing_extensions import override
910

1011
from askui.callbacks.conversation_callback import ConversationCallback
11-
from askui.models.shared.agent_message_param import UsageParam
12-
from askui.reporting import NULL_REPORTER, Reporter
12+
from askui.reporting import NULL_REPORTER
1313

1414
if TYPE_CHECKING:
15+
from askui.models.shared.agent_message_param import UsageParam
1516
from askui.models.shared.conversation import Conversation
17+
from askui.reporting import Reporter
1618
from askui.speaker.speaker import SpeakerResult
19+
from askui.utils.model_pricing import ModelPricing
20+
21+
22+
class UsageSummary(BaseModel):
23+
"""Accumulated token usage and optional cost breakdown for a conversation.
24+
25+
Args:
26+
input_tokens (int | None): Total input tokens sent to the API.
27+
output_tokens (int | None): Total output tokens generated.
28+
cache_creation_input_tokens (int | None): Tokens used for cache creation.
29+
cache_read_input_tokens (int | None): Tokens read from cache.
30+
input_cost (float | None): Computed input cost in `currency`.
31+
output_cost (float | None): Computed output cost in `currency`.
32+
total_cost (float | None): Sum of `input_cost` and `output_cost`.
33+
currency (str | None): ISO 4217 currency code (e.g. ``"USD"``).
34+
input_cost_per_million_tokens (float | None): Rate used to compute `input_cost`.
35+
output_cost_per_million_tokens (float|None): Rate used to compute `output_cost`.
36+
"""
37+
38+
input_tokens: int | None = None
39+
output_tokens: int | None = None
40+
cache_creation_input_tokens: int | None = None
41+
cache_read_input_tokens: int | None = None
42+
input_cost: float | None = None
43+
output_cost: float | None = None
44+
total_cost: float | None = None
45+
currency: str | None = None
46+
input_cost_per_million_tokens: float | None = None
47+
output_cost_per_million_tokens: float | None = None
1748

1849

1950
class UsageTrackingCallback(ConversationCallback):
2051
"""Tracks token usage per step and reports a summary at conversation end.
2152
2253
Args:
2354
reporter: Reporter to write the final usage summary to.
55+
pricing: Pricing information for cost calculation. If ``None``,
56+
no cost data is included in the usage summary.
2457
"""
2558

26-
def __init__(self, reporter: Reporter = NULL_REPORTER) -> None:
59+
def __init__(
60+
self,
61+
reporter: Reporter = NULL_REPORTER,
62+
pricing: ModelPricing | None = None,
63+
) -> None:
2764
self._reporter = reporter
28-
self._accumulated_usage = UsageParam()
65+
self._pricing = pricing
66+
self._summary = UsageSummary()
2967

3068
@override
3169
def on_conversation_start(self, conversation: Conversation) -> None:
32-
self._accumulated_usage = UsageParam()
70+
self._summary = UsageSummary()
3371

3472
@override
3573
def on_step_end(
@@ -43,27 +81,29 @@ def on_step_end(
4381

4482
@override
4583
def on_conversation_end(self, conversation: Conversation) -> None:
46-
self._reporter.add_usage_summary(self._accumulated_usage.model_dump())
84+
self._reporter.add_usage_summary(self._summary)
4785

4886
@property
49-
def accumulated_usage(self) -> UsageParam:
87+
def accumulated_usage(self) -> UsageSummary:
5088
"""Current accumulated usage statistics."""
51-
return self._accumulated_usage
89+
return self._summary
5290

5391
def _accumulate(self, step_usage: UsageParam) -> None:
54-
self._accumulated_usage.input_tokens = (
55-
self._accumulated_usage.input_tokens or 0
56-
) + (step_usage.input_tokens or 0)
57-
self._accumulated_usage.output_tokens = (
58-
self._accumulated_usage.output_tokens or 0
59-
) + (step_usage.output_tokens or 0)
60-
self._accumulated_usage.cache_creation_input_tokens = (
61-
self._accumulated_usage.cache_creation_input_tokens or 0
92+
# Add step tokens to running totals (None counts as 0)
93+
self._summary.input_tokens = (self._summary.input_tokens or 0) + (
94+
step_usage.input_tokens or 0
95+
)
96+
self._summary.output_tokens = (self._summary.output_tokens or 0) + (
97+
step_usage.output_tokens or 0
98+
)
99+
self._summary.cache_creation_input_tokens = (
100+
self._summary.cache_creation_input_tokens or 0
62101
) + (step_usage.cache_creation_input_tokens or 0)
63-
self._accumulated_usage.cache_read_input_tokens = (
64-
self._accumulated_usage.cache_read_input_tokens or 0
102+
self._summary.cache_read_input_tokens = (
103+
self._summary.cache_read_input_tokens or 0
65104
) + (step_usage.cache_read_input_tokens or 0)
66105

106+
# Record per-step token counts on the current OTel span
67107
current_span = trace.get_current_span()
68108
current_span.set_attributes(
69109
{
@@ -75,3 +115,32 @@ def _accumulate(self, step_usage: UsageParam) -> None:
75115
"cache_read_input_tokens": (step_usage.cache_read_input_tokens or 0),
76116
}
77117
)
118+
119+
# Update costs from updated totals if pricing values are set
120+
if not (
121+
self._pricing
122+
and self._pricing.input_cost_per_million_tokens
123+
and self._pricing.output_cost_per_million_tokens
124+
):
125+
return
126+
127+
input_cost = (
128+
self._summary.input_tokens
129+
* self._pricing.input_cost_per_million_tokens
130+
/ 1e6
131+
)
132+
output_cost = (
133+
self._summary.output_tokens
134+
* self._pricing.output_cost_per_million_tokens
135+
/ 1e6
136+
)
137+
self._summary.input_cost = input_cost
138+
self._summary.output_cost = output_cost
139+
self._summary.total_cost = input_cost + output_cost
140+
self._summary.currency = self._pricing.currency
141+
self._summary.input_cost_per_million_tokens = (
142+
self._pricing.input_cost_per_million_tokens
143+
)
144+
self._summary.output_cost_per_million_tokens = (
145+
self._pricing.output_cost_per_million_tokens
146+
)

src/askui/model_providers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from askui.model_providers.google_image_qa_provider import GoogleImageQAProvider
2424
from askui.model_providers.image_qa_provider import ImageQAProvider
2525
from askui.model_providers.vlm_provider import VlmProvider
26+
from askui.utils.model_pricing import ModelPricing
2627

2728
__all__ = [
2829
"AnthropicImageQAProvider",
@@ -33,5 +34,6 @@
3334
"DetectionProvider",
3435
"GoogleImageQAProvider",
3536
"ImageQAProvider",
37+
"ModelPricing",
3638
"VlmProvider",
3739
]

src/askui/model_providers/anthropic_vlm_provider.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717
from askui.models.shared.prompts import SystemPrompt
1818
from askui.models.shared.tools import ToolCollection
19+
from askui.utils.model_pricing import ModelPricing
1920

2021
_DEFAULT_MODEL_ID = "claude-sonnet-4-6"
2122

@@ -38,6 +39,11 @@ class AnthropicVlmProvider(VlmProvider):
3839
`\"claude-sonnet-4-6\"`.
3940
client (Anthropic | None, optional): Pre-configured Anthropic client.
4041
If provided, other connection parameters are ignored.
42+
input_cost_per_million_tokens (float | None, optional): Override
43+
cost in USD per 1M input tokens. Both cost params must be set
44+
to override the built-in defaults.
45+
output_cost_per_million_tokens (float | None, optional): Override
46+
cost in USD per 1M output tokens.
4147
4248
Example:
4349
```python
@@ -60,6 +66,8 @@ def __init__(
6066
auth_token: str | None = None,
6167
model_id: str | None = None,
6268
client: Anthropic | None = None,
69+
input_cost_per_million_tokens: float | None = None,
70+
output_cost_per_million_tokens: float | None = None,
6371
) -> None:
6472
self._model_id_value = (
6573
model_id or os.environ.get("VLM_PROVIDER_MODEL_ID") or _DEFAULT_MODEL_ID
@@ -72,12 +80,22 @@ def __init__(
7280
base_url=base_url,
7381
auth_token=auth_token,
7482
)
83+
self._pricing = ModelPricing.for_model(
84+
self._model_id_value,
85+
input_cost_per_million_tokens=input_cost_per_million_tokens,
86+
output_cost_per_million_tokens=output_cost_per_million_tokens,
87+
)
7588

7689
@property
7790
@override
7891
def model_id(self) -> str:
7992
return self._model_id_value
8093

94+
@property
95+
@override
96+
def pricing(self) -> ModelPricing | None:
97+
return self._pricing
98+
8199
@cached_property
82100
def _messages_api(self) -> AnthropicMessagesApi:
83101
"""Lazily initialise the AnthropicMessagesApi on first use."""

src/askui/model_providers/askui_vlm_provider.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ class AskUIVlmProvider(VlmProvider):
3737
`"claude-sonnet-4-6"`.
3838
client (Anthropic | None, optional): Pre-configured Anthropic client.
3939
If provided, `workspace_id` and `token` are ignored.
40-
4140
Example:
4241
```python
4342
from askui import AgentSettings, ComputerAgent

src/askui/model_providers/vlm_provider.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
)
1111
from askui.models.shared.prompts import SystemPrompt
1212
from askui.models.shared.tools import ToolCollection
13+
from askui.utils.model_pricing import ModelPricing
1314

1415

1516
class VlmProvider(ABC):
@@ -43,6 +44,15 @@ class VlmProvider(ABC):
4344
def model_id(self) -> str:
4445
"""The model identifier used by this provider."""
4546

47+
@property
48+
def pricing(self) -> ModelPricing | None:
49+
"""Pricing information for this provider's model.
50+
51+
Returns ``None`` if no pricing information is available.
52+
Override in subclasses to provide model-specific pricing.
53+
"""
54+
return None
55+
4656
@abstractmethod
4757
def create_message(
4858
self,

src/askui/prompts/act_prompts.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@
4646
down/up to see everything before deciding something isn't available.
4747
* When using your function calls, they take a while to run and send back
4848
to you. Where possible/feasible, try to chain multiple of these calls
49-
all into one function calls request."""
49+
all into one function calls request.
50+
* If you need to execute a click, make sure to move the mouse to the correct
51+
position first!"""
5052

5153
ANDROID_CAPABILITIES = """You are an autonomous Android device control agent
5254
operating via ADB on a test device with full system access.

0 commit comments

Comments
 (0)