Skip to content

Commit 0156893

Browse files
Merge pull request #1357 from MervinPraison/claude/issue-1356-20260410-1554
feat: Add --metrics-json flag for structured CLI cost output
2 parents 6ea089d + a2edba0 commit 0156893

2 files changed

Lines changed: 184 additions & 0 deletions

File tree

src/praisonai/praisonai/cli/main.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import argparse
55
import warnings
66
import os
7+
import json
78

89
# Suppress Pydantic serialization warnings from LiteLLM BEFORE any imports
910
# These warnings occur when LiteLLM's response objects have field mismatches
@@ -963,6 +964,7 @@ def parse_args(self):
963964

964965
# Metrics - token/cost tracking
965966
parser.add_argument("--metrics", action="store_true", help="Display token usage and cost metrics")
967+
parser.add_argument("--metrics-json", action="store_true", help="Output structured cost and token data as JSON")
966968

967969
# Image Description (Vision) - analyze existing images
968970
parser.add_argument("--image", type=str, help="Path to image file for vision-based description/analysis")
@@ -4753,6 +4755,33 @@ def level_based_approve(function_name, arguments, risk_level):
47534755
if hasattr(self, 'args') and getattr(self.args, 'save', False):
47544756
self._save_output(prompt, result)
47554757

4758+
# Metrics JSON - Output structured cost data
4759+
if hasattr(self, 'args') and getattr(self.args, 'metrics_json', False):
4760+
try:
4761+
from .features.metrics import MetricsHandler
4762+
_mh = MetricsHandler(verbose=getattr(self.args, 'verbose', False))
4763+
# Extract from final_agent if it was used, otherwise from original agent
4764+
active_agent = final_agent if 'final_agent' in locals() else agent
4765+
agent_metrics = _mh.extract_metrics_from_agent(active_agent)
4766+
# Resolve model name: prefer what the agent reported, fall back to config
4767+
model_name = agent_metrics.get('model')
4768+
if not model_name:
4769+
model_name = agent_config.get('llm', 'unknown')
4770+
if isinstance(model_name, dict):
4771+
model_name = model_name.get('model', 'unknown')
4772+
metrics_out = {
4773+
"cost_usd": agent_metrics.get('cost', 0.0),
4774+
"tokens_in": agent_metrics.get('prompt_tokens', 0),
4775+
"tokens_out": agent_metrics.get('completion_tokens', 0),
4776+
"model": model_name or 'unknown',
4777+
"request_count": agent_metrics.get('llm_calls', 0),
4778+
}
4779+
print(json.dumps(metrics_out))
4780+
except Exception as exc:
4781+
print(f"[metrics-json] warning: could not extract metrics: {exc}", file=sys.stderr)
4782+
# CRITICAL: Always emit JSON when --metrics-json is set
4783+
print(json.dumps({"cost_usd": 0.0, "tokens_in": 0, "tokens_out": 0, "model": "unknown", "request_count": 0}))
4784+
47564785
return result
47574786
elif CREWAI_AVAILABLE:
47584787
from crewai import Agent, Task, Crew
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Unit tests for --metrics-json CLI flag.
4+
5+
These tests are pure unit tests: no network, no LLM calls, no side effects.
6+
They validate argument parsing and the JSON output assembly logic in
7+
handle_direct_prompt().
8+
"""
9+
10+
import argparse
11+
import json
12+
import pytest
13+
14+
15+
def _get_metrics_json_parser():
16+
"""Minimal argument parser that mirrors the relevant CLI args."""
17+
parser = argparse.ArgumentParser(description="praisonAI CLI")
18+
parser.add_argument("--metrics", action="store_true")
19+
parser.add_argument("--metrics-json", action="store_true")
20+
parser.add_argument("--verbose", action="store_true")
21+
parser.add_argument("command", nargs="?")
22+
return parser
23+
24+
25+
def _build_metrics_out(agent_metrics, agent_config):
26+
"""
27+
Mirror of the JSON-assembly block in handle_direct_prompt().
28+
29+
Centralized here so tests can validate logic without importing main.py.
30+
"""
31+
model_name = agent_metrics.get("model")
32+
if not model_name:
33+
model_name = agent_config.get("llm", "unknown")
34+
if isinstance(model_name, dict):
35+
model_name = model_name.get("model", "unknown")
36+
return {
37+
"cost_usd": agent_metrics.get("cost", 0.0),
38+
"tokens_in": agent_metrics.get("prompt_tokens", 0),
39+
"tokens_out": agent_metrics.get("completion_tokens", 0),
40+
"model": model_name or "unknown",
41+
"request_count": agent_metrics.get("llm_calls", 0),
42+
}
43+
44+
45+
class TestMetricsJsonArgParsing:
46+
"""Argument-parsing behaviour for --metrics-json."""
47+
48+
def test_flag_stored_as_metrics_json(self):
49+
"""--metrics-json is stored as metrics_json (underscore) on the namespace."""
50+
args = _get_metrics_json_parser().parse_args(["--metrics-json", "task"])
51+
assert args.metrics_json is True
52+
assert args.command == "task"
53+
54+
def test_default_false_when_absent(self):
55+
"""--metrics-json defaults to False when not supplied."""
56+
args = _get_metrics_json_parser().parse_args(["task"])
57+
assert args.metrics_json is False
58+
59+
def test_independent_from_metrics_flag(self):
60+
"""--metrics and --metrics-json are independent boolean flags."""
61+
args = _get_metrics_json_parser().parse_args(["--metrics", "task"])
62+
assert args.metrics is True
63+
assert args.metrics_json is False
64+
65+
args2 = _get_metrics_json_parser().parse_args(["--metrics-json", "task"])
66+
assert args2.metrics is False
67+
assert args2.metrics_json is True
68+
69+
def test_both_flags_together(self):
70+
"""Both --metrics and --metrics-json can be set simultaneously."""
71+
args = _get_metrics_json_parser().parse_args(["--metrics", "--metrics-json", "task"])
72+
assert args.metrics is True
73+
assert args.metrics_json is True
74+
75+
76+
class TestMetricsJsonOutput:
77+
"""JSON assembly logic for --metrics-json output."""
78+
79+
def test_output_has_required_keys(self):
80+
"""Emitted JSON contains exactly the five required keys."""
81+
payload = _build_metrics_out(
82+
{"prompt_tokens": 42, "completion_tokens": 17, "cost": 0.000123, "model": "test-model"},
83+
{"llm": "test-model"},
84+
)
85+
assert set(payload.keys()) == {"cost_usd", "tokens_in", "tokens_out", "model", "request_count"}
86+
87+
def test_tokens_mapped_from_correct_keys(self):
88+
"""prompt_tokens → tokens_in, completion_tokens → tokens_out."""
89+
payload = _build_metrics_out(
90+
{"prompt_tokens": 42, "completion_tokens": 17},
91+
{},
92+
)
93+
assert payload["tokens_in"] == 42
94+
assert payload["tokens_out"] == 17
95+
96+
def test_cost_preserved(self):
97+
"""cost value is preserved as cost_usd."""
98+
payload = _build_metrics_out({"cost": 0.000123}, {})
99+
assert abs(payload["cost_usd"] - 0.000123) < 1e-9
100+
101+
def test_request_count_defaults_to_zero(self):
102+
"""request_count defaults to 0 when llm_calls is absent."""
103+
payload = _build_metrics_out({}, {})
104+
assert payload["request_count"] == 0
105+
106+
def test_request_count_from_llm_calls(self):
107+
"""request_count is taken from llm_calls when present."""
108+
payload = _build_metrics_out({"llm_calls": 3}, {})
109+
assert payload["request_count"] == 3
110+
111+
def test_model_from_agent_metrics(self):
112+
"""Model is taken from agent_metrics['model'] when available."""
113+
payload = _build_metrics_out({"model": "test-model-from-agent"}, {"llm": "config-model"})
114+
assert payload["model"] == "test-model-from-agent"
115+
116+
def test_model_fallback_to_config_string(self):
117+
"""Falls back to agent_config['llm'] string when agent_metrics has no model."""
118+
payload = _build_metrics_out({}, {"llm": "config-llm-string"})
119+
assert payload["model"] == "config-llm-string"
120+
121+
def test_model_fallback_to_config_dict(self):
122+
"""When agent_config['llm'] is a dict, extracts nested 'model' key."""
123+
payload = _build_metrics_out({}, {"llm": {"model": "nested-model", "temperature": 0.5}})
124+
assert payload["model"] == "nested-model"
125+
126+
def test_model_unknown_when_no_info(self):
127+
"""Falls back to 'unknown' when neither agent nor config provides a model."""
128+
payload = _build_metrics_out({}, {})
129+
assert payload["model"] == "unknown"
130+
131+
def test_output_is_json_serialisable(self):
132+
"""The output dict round-trips through JSON without error."""
133+
payload = _build_metrics_out(
134+
{"prompt_tokens": 10, "completion_tokens": 5, "cost": 0.00001},
135+
{"llm": "test-model"},
136+
)
137+
assert json.loads(json.dumps(payload)) == payload
138+
139+
def test_regression_wrong_keys_produce_zeros(self):
140+
"""
141+
Regression guard: the old code used 'input_tokens'/'output_tokens' which
142+
are never populated by MetricsHandler.extract_metrics_from_agent().
143+
These should always be absent; the correct keys are prompt/completion_tokens.
144+
"""
145+
agent_metrics = {"prompt_tokens": 100, "completion_tokens": 50}
146+
# Wrong keys (old bug):
147+
assert agent_metrics.get("input_tokens", 0) == 0
148+
assert agent_metrics.get("output_tokens", 0) == 0
149+
# Correct keys (fixed):
150+
assert agent_metrics.get("prompt_tokens", 0) == 100
151+
assert agent_metrics.get("completion_tokens", 0) == 50
152+
153+
154+
if __name__ == "__main__":
155+
pytest.main([__file__, "-v"])

0 commit comments

Comments
 (0)