Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 94 additions & 23 deletions cmd/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"time"

"github.com/agenticgokit/agk/internal/audit"
"github.com/agenticgokit/agk/internal/pricing"
"github.com/agenticgokit/agk/internal/tui"
tea "github.com/charmbracelet/bubbletea"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -637,7 +638,6 @@ func parseTraceFile(runPath string) (TraceRun, error) {
}

durationSeconds := stats.LastSpan.Sub(stats.FirstSpan).Seconds()
estimatedCost := float64(stats.TotalTokens) * 0.00001 // Rough estimate

return TraceRun{
RunID: runID,
Expand All @@ -648,17 +648,22 @@ func parseTraceFile(runPath string) (TraceRun, error) {
Duration: durationSeconds,
SpanCount: stats.SpanCount,
LLMCalls: stats.LLMCalls,
TotalTokens: stats.TotalTokens,
EstimatedCost: estimatedCost,
TotalTokens: stats.Tokens(),
EstimatedCost: stats.Cost(),
}, nil
}

type RunStats struct {
SpanCount int
LLMCalls int
TotalTokens int
FirstSpan time.Time
LastSpan time.Time
SpanCount int
LLMCalls int
InputTokens int
OutputTokens int
TotalTokens int
Model string
DirectCost float64 // summed from agk.llm.cost.usd attributes, if emitted
HasDirectCost bool
FirstSpan time.Time
LastSpan time.Time
}

func (s *RunStats) Update(span map[string]interface{}) {
Expand All @@ -671,34 +676,83 @@ func (s *RunStats) Update(span map[string]interface{}) {
}
}

// Extract token count from attributes
// Extract token/model/cost data from attributes
if attrs, ok := span["Attributes"].([]interface{}); ok {
s.extractTokens(attrs)
s.extractAttrs(attrs)
}

// Extract start and end times
s.updateTimes(span)
}

func (s *RunStats) extractTokens(attrs []interface{}) {
// extractAttrs pulls token, model, and cost data from a span's attributes.
// It recognizes the AgenticGoKit observability keys (agk.llm.tokens.*, agk.llm.model,
// agk.llm.cost.usd) plus a few legacy aliases for backward compatibility.
func (s *RunStats) extractAttrs(attrs []interface{}) {
for _, attr := range attrs {
if attrMap, ok := attr.(map[string]interface{}); ok {
if key, ok := attrMap["Key"].(string); ok {
// Look for token-related attributes
if key == "llm.usage.completion_tokens" || key == "llm.completion_tokens" {
if val, ok := attrMap["Value"].(map[string]interface{}); ok {
if tokenVal, ok := val["Value"]; ok {
if tokenInt, err := toInt64(tokenVal); err == nil {
s.TotalTokens += int(tokenInt)
}
}
}
}
attrMap, ok := attr.(map[string]interface{})
if !ok {
continue
}
key, ok := attrMap["Key"].(string)
if !ok {
continue
}
val, ok := attrMap["Value"].(map[string]interface{})
if !ok {
continue
}
raw := val["Value"]

switch key {
case "agk.llm.tokens.input", "agk.llm.tokens.prompt",
"llm.usage.prompt_tokens", "llm.prompt_tokens":
if n, err := toInt64(raw); err == nil {
s.InputTokens += int(n)
}
case "agk.llm.tokens.output", "agk.llm.tokens.completion",
"llm.usage.completion_tokens", "llm.completion_tokens":
if n, err := toInt64(raw); err == nil {
s.OutputTokens += int(n)
}
case "agk.llm.tokens.total", "llm.usage.total_tokens":
if n, err := toInt64(raw); err == nil {
s.TotalTokens += int(n)
}
case "agk.llm.cost.usd":
if f, err := toFloat64(raw); err == nil {
s.DirectCost += f
s.HasDirectCost = true
}
case "agk.llm.model":
if str, ok := raw.(string); ok && str != "" {
s.Model = str
}
}
}
}

// Tokens returns the best available total token count, preferring an explicit
// total and falling back to input+output.
func (s *RunStats) Tokens() int {
if s.TotalTokens > 0 {
return s.TotalTokens
}
return s.InputTokens + s.OutputTokens
}

// Cost returns the estimated USD cost: a directly-reported cost if present,
// otherwise a price-table estimate, otherwise 0 for unknown/local models.
func (s *RunStats) Cost() float64 {
if s.HasDirectCost {
return s.DirectCost
}
if cost, ok := pricing.Estimate(s.Model, s.InputTokens, s.OutputTokens); ok {
return cost
}
return 0
}

func (s *RunStats) updateTimes(span map[string]interface{}) {
// Extract start and end times from span
// Format: "2026-01-19T18:36:38.897+09:00"
Expand Down Expand Up @@ -741,6 +795,23 @@ func toInt64(v interface{}) (int64, error) {
}
}

// toFloat64 safely converts a value to float64
func toFloat64(v interface{}) (float64, error) {
switch val := v.(type) {
case float64:
return val, nil
case int:
return float64(val), nil
case int64:
return float64(val), nil
case string:
f, err := strconv.ParseFloat(val, 64)
return f, err
default:
return 0, fmt.Errorf("cannot convert %T to float64", v)
}
}

func getLatestRunID() string {
entries, err := os.ReadDir(runsDirName)
if err != nil {
Expand Down
83 changes: 83 additions & 0 deletions internal/pricing/pricing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Package pricing provides approximate USD cost estimates for LLM token usage.
//
// Prices are published list prices per 1,000,000 tokens and are intended for
// rough cost reporting in traces, not billing. They drift over time; update the
// table as providers change pricing. Local models (Ollama, etc.) are treated as
// free and intentionally absent from the table.
package pricing

import "strings"

// ModelPrice is the USD price per 1,000,000 tokens, split by input and output.
type ModelPrice struct {
InputPer1M float64
OutputPer1M float64
}

// table maps a normalized model key to its price. Lookups use exact match first,
// then the longest matching prefix, so dated/variant model ids (e.g.
// "gpt-4o-2024-08-06", "claude-sonnet-4-20250514") resolve to their base model.
var table = map[string]ModelPrice{
// OpenAI
"gpt-4o": {InputPer1M: 2.50, OutputPer1M: 10.00},
"gpt-4o-mini": {InputPer1M: 0.15, OutputPer1M: 0.60},
"gpt-4-turbo": {InputPer1M: 10.00, OutputPer1M: 30.00},
"gpt-4": {InputPer1M: 30.00, OutputPer1M: 60.00},
"gpt-3.5-turbo": {InputPer1M: 0.50, OutputPer1M: 1.50},
"o1": {InputPer1M: 15.00, OutputPer1M: 60.00},
"o1-mini": {InputPer1M: 1.10, OutputPer1M: 4.40},
"o3-mini": {InputPer1M: 1.10, OutputPer1M: 4.40},

// Anthropic
"claude-3-5-sonnet": {InputPer1M: 3.00, OutputPer1M: 15.00},
"claude-3-5-haiku": {InputPer1M: 0.80, OutputPer1M: 4.00},
"claude-3-opus": {InputPer1M: 15.00, OutputPer1M: 75.00},
"claude-3-sonnet": {InputPer1M: 3.00, OutputPer1M: 15.00},
"claude-3-haiku": {InputPer1M: 0.25, OutputPer1M: 1.25},
"claude-sonnet-4": {InputPer1M: 3.00, OutputPer1M: 15.00},
"claude-opus-4": {InputPer1M: 15.00, OutputPer1M: 75.00},
"claude-haiku-4": {InputPer1M: 1.00, OutputPer1M: 5.00},
}

// Estimate returns the estimated USD cost for the given token usage and whether
// the model was found in the price table. Unknown or local models return (0, false),
// letting callers decide how to report an unpriced run.
func Estimate(model string, inputTokens, outputTokens int) (float64, bool) {
p, ok := lookup(model)
if !ok {
return 0, false
}
cost := float64(inputTokens)/1e6*p.InputPer1M + float64(outputTokens)/1e6*p.OutputPer1M
return cost, true
}

func lookup(model string) (ModelPrice, bool) {
key := normalize(model)
if key == "" {
return ModelPrice{}, false
}
if p, ok := table[key]; ok {
return p, true
}
// Longest-prefix match handles dated/variant ids.
var best string
for k := range table {
if strings.HasPrefix(key, k) && len(k) > len(best) {
best = k
}
}
if best != "" {
return table[best], true
}
return ModelPrice{}, false
}

// normalize lowercases the model id and strips any provider prefix
// (e.g. "openai/gpt-4o" -> "gpt-4o").
func normalize(model string) string {
m := strings.ToLower(strings.TrimSpace(model))
if i := strings.LastIndex(m, "/"); i >= 0 {
m = m[i+1:]
}
return m
}
51 changes: 51 additions & 0 deletions internal/pricing/pricing_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package pricing

import "testing"

func TestEstimate(t *testing.T) {
tests := []struct {
name string
model string
in int
out int
want float64
matched bool
approxOK bool // compare with tolerance instead of exact
}{
{name: "gpt-4o exact", model: "gpt-4o", in: 1_000_000, out: 1_000_000, want: 12.50, matched: true},
{name: "gpt-4o-mini", model: "gpt-4o-mini", in: 1_000_000, out: 0, want: 0.15, matched: true},
{name: "gpt-4o dated suffix", model: "gpt-4o-2024-08-06", in: 1_000_000, out: 0, want: 2.50, matched: true},
{name: "longest prefix prefers mini", model: "gpt-4o-mini-2024-07-18", in: 1_000_000, out: 0, want: 0.15, matched: true},
{name: "provider prefix stripped", model: "openai/gpt-4o", in: 0, out: 1_000_000, want: 10.00, matched: true},
{name: "claude sonnet 4 dated", model: "claude-sonnet-4-20250514", in: 1_000_000, out: 0, want: 3.00, matched: true},
{name: "case insensitive", model: "GPT-4O", in: 1_000_000, out: 0, want: 2.50, matched: true},
{name: "zero tokens still matched", model: "gpt-4o", in: 0, out: 0, want: 0, matched: true},
{name: "local model unknown", model: "llama3.2", in: 1_000_000, out: 1_000_000, want: 0, matched: false},
{name: "empty model", model: "", in: 100, out: 100, want: 0, matched: false},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got, matched := Estimate(tc.model, tc.in, tc.out)
if matched != tc.matched {
t.Fatalf("matched = %v, want %v", matched, tc.matched)
}
if diff := got - tc.want; diff > 1e-9 || diff < -1e-9 {
t.Fatalf("cost = %v, want %v", got, tc.want)
}
})
}
}

func TestEstimatePartialTokens(t *testing.T) {
// 1,500 input + 500 output on gpt-4o:
// 1500/1e6*2.50 + 500/1e6*10.00 = 0.00375 + 0.005 = 0.00875
got, matched := Estimate("gpt-4o", 1500, 500)
if !matched {
t.Fatal("expected gpt-4o to match")
}
want := 0.00875
if diff := got - want; diff > 1e-9 || diff < -1e-9 {
t.Fatalf("cost = %v, want %v", got, want)
}
}
Loading