Skip to content

Commit d2f2660

Browse files
authored
feat(metrics): token and cost tracking per task (#314)
## Summary - Instrument ReactAgent to record per-call token usage and persist to workspace DB - Add sync recording, task/workspace aggregation, CSV/JSON export to MetricsTracker - Add batch/task/workspace query methods to TokenRepository - Create headless `cf stats` CLI commands (tokens, costs, export) - Normalize Anthropic model names for pricing lookups - Handle unknown models gracefully (zero cost, no crash) ## Validation - Review feedback: 4 critical/important items addressed (1 round) - Demo: All 5 acceptance criteria verified via cf stats commands - Tests: 57 new tests, 2283 v2 tests passing (0 regressions) - CI: All checks green (Backend Tests, Code Quality, Security) - Linting: Clean Closes #314
1 parent 017d8be commit d2f2660

10 files changed

Lines changed: 1872 additions & 25 deletions

File tree

codeframe/cli/app.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from codeframe.cli.env_commands import env_app
2929
from codeframe.cli.engines_commands import engines_app
3030
from codeframe.cli.hooks_commands import hooks_app
31+
from codeframe.cli.stats_commands import stats_app
3132

3233
# Load environment variables from .env files
3334
# Priority: workspace .env > home .env
@@ -4870,6 +4871,7 @@ def templates_apply(
48704871

48714872
app.add_typer(engines_app, name="engines")
48724873
app.add_typer(hooks_app, name="hooks")
4874+
app.add_typer(stats_app, name="stats")
48734875

48744876

48754877
# =============================================================================

codeframe/cli/stats_commands.py

Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
"""CLI stats commands for headless token/cost tracking.
2+
3+
This module provides commands for viewing token usage and cost statistics
4+
directly from the local workspace database (no server required):
5+
6+
- tokens: View workspace token usage summary
7+
- costs: View cost report with optional period filtering
8+
- export: Export usage data to CSV or JSON
9+
10+
Usage:
11+
cf stats tokens # Workspace token summary
12+
cf stats tokens --task <id> # Per-task breakdown
13+
cf stats costs # All-time costs
14+
cf stats costs --period month # Last 30 days
15+
cf stats export --format csv --output tokens.csv
16+
"""
17+
18+
import logging
19+
from datetime import datetime, timedelta, timezone
20+
from pathlib import Path
21+
from typing import Optional
22+
23+
import typer
24+
from rich.table import Table
25+
26+
from codeframe.cli.helpers import console
27+
28+
logger = logging.getLogger(__name__)
29+
30+
stats_app = typer.Typer(
31+
name="stats",
32+
help="Token usage and cost statistics",
33+
no_args_is_help=True,
34+
)
35+
36+
37+
def _get_db():
38+
"""Get database from current workspace.
39+
40+
Looks for .codeframe/state.db relative to the current directory.
41+
42+
Returns:
43+
Initialized Database instance.
44+
45+
Raises:
46+
typer.Exit: If no workspace is found.
47+
"""
48+
from codeframe.persistence.database import Database
49+
50+
db_path = Path(".codeframe/state.db")
51+
if not db_path.exists():
52+
console.print("[red]Error:[/red] No workspace found. Run 'cf init' first.")
53+
raise typer.Exit(1)
54+
db = Database(db_path)
55+
db.initialize()
56+
return db
57+
58+
59+
def _get_tracker(db):
60+
"""Create a MetricsTracker from a database instance.
61+
62+
Args:
63+
db: Initialized Database instance.
64+
65+
Returns:
66+
MetricsTracker instance.
67+
"""
68+
from codeframe.lib.metrics_tracker import MetricsTracker
69+
70+
return MetricsTracker(db=db)
71+
72+
73+
def _format_number(n: int) -> str:
74+
"""Format number with thousands separator."""
75+
return f"{n:,}"
76+
77+
78+
@stats_app.command()
79+
def tokens(
80+
task: Optional[int] = typer.Option(
81+
None, "--task", "-t", help="Filter by task ID for per-task breakdown"
82+
),
83+
):
84+
"""Show workspace token usage summary.
85+
86+
Displays total tokens used across all tasks, with input/output breakdown
87+
and per-model statistics. Use --task to filter to a specific task.
88+
89+
Examples:
90+
cf stats tokens # Workspace summary
91+
cf stats tokens --task 1 # Task 1 breakdown
92+
"""
93+
db = _get_db()
94+
try:
95+
tracker = _get_tracker(db)
96+
97+
if task is not None:
98+
# Per-task summary
99+
summary = tracker.get_task_token_summary(task)
100+
101+
console.print(f"\n[bold]Token Usage for Task {task}[/bold]\n")
102+
103+
table = Table(show_header=True, title=None)
104+
table.add_column("Metric", style="cyan")
105+
table.add_column("Value", justify="right")
106+
107+
table.add_row("Total Tokens", _format_number(summary["total_tokens"]))
108+
table.add_row("Input Tokens", _format_number(summary["total_input_tokens"]))
109+
table.add_row("Output Tokens", _format_number(summary["total_output_tokens"]))
110+
table.add_row("Total Cost", f"${summary['total_cost_usd']:.4f}")
111+
table.add_row("LLM Calls", str(summary["call_count"]))
112+
113+
console.print(table)
114+
else:
115+
# Workspace-wide summary
116+
records = db.get_workspace_token_usage()
117+
118+
total_input = 0
119+
total_output = 0
120+
total_cost = 0.0
121+
model_stats: dict[str, dict] = {}
122+
123+
for record in records:
124+
total_input += record["input_tokens"]
125+
total_output += record["output_tokens"]
126+
total_cost += record["estimated_cost_usd"]
127+
128+
model = record["model_name"]
129+
if model not in model_stats:
130+
model_stats[model] = {
131+
"input_tokens": 0,
132+
"output_tokens": 0,
133+
"cost_usd": 0.0,
134+
"calls": 0,
135+
}
136+
model_stats[model]["input_tokens"] += record["input_tokens"]
137+
model_stats[model]["output_tokens"] += record["output_tokens"]
138+
model_stats[model]["cost_usd"] += record["estimated_cost_usd"]
139+
model_stats[model]["calls"] += 1
140+
141+
total_tokens = total_input + total_output
142+
143+
console.print("\n[bold]Workspace Token Usage Summary[/bold]\n")
144+
145+
summary_table = Table(show_header=True)
146+
summary_table.add_column("Metric", style="cyan")
147+
summary_table.add_column("Value", justify="right")
148+
149+
summary_table.add_row("Total Tokens", _format_number(total_tokens))
150+
summary_table.add_row("Input Tokens", _format_number(total_input))
151+
summary_table.add_row("Output Tokens", _format_number(total_output))
152+
summary_table.add_row("Total Cost", f"${total_cost:.4f}")
153+
summary_table.add_row("LLM Calls", str(len(records)))
154+
155+
console.print(summary_table)
156+
157+
if model_stats:
158+
console.print("\n[bold]By Model:[/bold]")
159+
model_table = Table(show_header=True)
160+
model_table.add_column("Model", style="cyan")
161+
model_table.add_column("Tokens", justify="right")
162+
model_table.add_column("Cost", justify="right")
163+
model_table.add_column("Calls", justify="right")
164+
165+
for model_name, stats in model_stats.items():
166+
model_table.add_row(
167+
model_name,
168+
_format_number(stats["input_tokens"] + stats["output_tokens"]),
169+
f"${stats['cost_usd']:.4f}",
170+
str(stats["calls"]),
171+
)
172+
173+
console.print(model_table)
174+
finally:
175+
db.close()
176+
177+
178+
@stats_app.command()
179+
def costs(
180+
period: Optional[str] = typer.Option(
181+
None,
182+
"--period",
183+
"-p",
184+
help="Time period: 'day' (24h), 'week' (7d), 'month' (30d)",
185+
),
186+
):
187+
"""Show cost report.
188+
189+
Displays total costs and per-model breakdown. Use --period to filter
190+
to a recent time window.
191+
192+
Examples:
193+
cf stats costs # All-time costs
194+
cf stats costs --period month # Last 30 days
195+
cf stats costs --period week # Last 7 days
196+
cf stats costs --period day # Last 24 hours
197+
"""
198+
db = _get_db()
199+
try:
200+
# Calculate date range from period
201+
start_date = None
202+
end_date = None
203+
now = datetime.now(timezone.utc)
204+
205+
if period == "day":
206+
start_date = now - timedelta(days=1)
207+
elif period == "week":
208+
start_date = now - timedelta(weeks=1)
209+
elif period == "month":
210+
start_date = now - timedelta(days=30)
211+
elif period is not None:
212+
console.print(
213+
f"[red]Error:[/red] Unknown period '{period}'. Use 'day', 'week', or 'month'."
214+
)
215+
raise typer.Exit(1)
216+
217+
# Single fetch: get raw records and compute summary + per-model breakdown in one pass
218+
records = db.get_workspace_token_usage(start_date=start_date, end_date=end_date)
219+
220+
total_cost = 0.0
221+
total_tokens = 0
222+
model_costs: dict[str, dict] = {}
223+
for record in records:
224+
cost = record["estimated_cost_usd"]
225+
tokens = record["input_tokens"] + record["output_tokens"]
226+
total_cost += cost
227+
total_tokens += tokens
228+
229+
model = record["model_name"]
230+
if model not in model_costs:
231+
model_costs[model] = {"cost_usd": 0.0, "tokens": 0, "calls": 0}
232+
model_costs[model]["cost_usd"] += cost
233+
model_costs[model]["tokens"] += tokens
234+
model_costs[model]["calls"] += 1
235+
236+
period_label = f" ({period})" if period else " (all time)"
237+
console.print(f"\n[bold]Cost Report{period_label}[/bold]\n")
238+
239+
table = Table(show_header=True)
240+
table.add_column("Metric", style="cyan")
241+
table.add_column("Value", justify="right")
242+
243+
table.add_row("Total Cost", f"${total_cost:.4f}")
244+
table.add_row("Total Tokens", _format_number(total_tokens))
245+
table.add_row("LLM Calls", str(len(records)))
246+
247+
console.print(table)
248+
249+
if model_costs:
250+
console.print("\n[bold]By Model:[/bold]")
251+
model_table = Table(show_header=True)
252+
model_table.add_column("Model", style="cyan")
253+
model_table.add_column("Cost", justify="right")
254+
model_table.add_column("Tokens", justify="right")
255+
model_table.add_column("Calls", justify="right")
256+
257+
for model_name, stats in model_costs.items():
258+
model_table.add_row(
259+
model_name,
260+
f"${stats['cost_usd']:.4f}",
261+
_format_number(stats["tokens"]),
262+
str(stats["calls"]),
263+
)
264+
265+
console.print(model_table)
266+
finally:
267+
db.close()
268+
269+
270+
@stats_app.command("export")
271+
def export_data(
272+
format: str = typer.Option(
273+
"csv", "--format", "-f", help="Output format: csv or json"
274+
),
275+
output: str = typer.Option(
276+
..., "--output", "-o", help="Output file path"
277+
),
278+
task: Optional[int] = typer.Option(
279+
None, "--task", "-t", help="Filter by task ID"
280+
),
281+
):
282+
"""Export usage data to CSV or JSON.
283+
284+
Exports raw token usage records to a file for external analysis.
285+
Use --task to export records for a single task only.
286+
287+
Examples:
288+
cf stats export --format csv --output tokens.csv
289+
cf stats export --format json --output tokens.json
290+
cf stats export --format csv --output task1.csv --task 1
291+
"""
292+
from codeframe.lib.metrics_tracker import MetricsTracker
293+
294+
db = _get_db()
295+
try:
296+
if task is not None:
297+
records = db.get_batch_token_usage(task_ids=[task])
298+
else:
299+
records = db.get_workspace_token_usage()
300+
301+
if format == "csv":
302+
MetricsTracker.export_to_csv(records, output)
303+
elif format == "json":
304+
MetricsTracker.export_to_json(records, output)
305+
else:
306+
console.print(f"[red]Error:[/red] Unknown format '{format}'. Use 'csv' or 'json'.")
307+
raise typer.Exit(1)
308+
309+
console.print(f"Exported {len(records)} records to {output}")
310+
finally:
311+
db.close()

0 commit comments

Comments
 (0)