Skip to content

Commit 742ffb5

Browse files
committed
upgraded testing
1 parent 60f205a commit 742ffb5

1 file changed

Lines changed: 228 additions & 0 deletions

File tree

tests/test_core/test_usage.py

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,3 +679,231 @@ def test_skips_error_reports(self):
679679
total = reporter.total()
680680
assert total.cost is None
681681
assert isinstance(total, Usage)
682+
683+
def test_by_task_accumulates_repeats(self):
684+
"""by_task sums usage when a task_id appears in multiple reports."""
685+
reports = [
686+
{
687+
"task_id": "task_1",
688+
"repeat_idx": 0,
689+
"usage": {
690+
"models": {
691+
"m": {
692+
"cost": 0.10,
693+
"input_tokens": 100,
694+
"output_tokens": 50,
695+
"total_tokens": 150,
696+
"cached_input_tokens": 0,
697+
"cache_creation_input_tokens": 0,
698+
"reasoning_tokens": 0,
699+
"audio_tokens": 0,
700+
"units": {},
701+
"provider": None,
702+
"category": "models",
703+
"component_name": "m",
704+
"kind": "llm",
705+
}
706+
}
707+
},
708+
},
709+
{
710+
"task_id": "task_1",
711+
"repeat_idx": 1,
712+
"usage": {
713+
"models": {
714+
"m": {
715+
"cost": 0.20,
716+
"input_tokens": 200,
717+
"output_tokens": 100,
718+
"total_tokens": 300,
719+
"cached_input_tokens": 0,
720+
"cache_creation_input_tokens": 0,
721+
"reasoning_tokens": 0,
722+
"audio_tokens": 0,
723+
"units": {},
724+
"provider": None,
725+
"category": "models",
726+
"component_name": "m",
727+
"kind": "llm",
728+
}
729+
}
730+
},
731+
},
732+
]
733+
reporter = UsageReporter.from_reports(reports)
734+
by_task = reporter.by_task()
735+
736+
assert len(by_task) == 1
737+
assert by_task["task_1"].cost == pytest.approx(0.30)
738+
assert by_task["task_1"].input_tokens == 300
739+
740+
def test_plain_usage_fallback(self):
741+
"""_usage_from_dict returns plain Usage when no token fields present."""
742+
reports = [
743+
{
744+
"task_id": "task_1",
745+
"repeat_idx": 0,
746+
"usage": {
747+
"tools": {
748+
"my_tool": {
749+
"cost": 0.05,
750+
"units": {"api_calls": 3},
751+
"provider": None,
752+
"category": "tools",
753+
"component_name": "my_tool",
754+
"kind": "tool",
755+
}
756+
}
757+
},
758+
},
759+
]
760+
reporter = UsageReporter.from_reports(reports)
761+
total = reporter.total()
762+
763+
assert total.cost == pytest.approx(0.05)
764+
assert isinstance(total, Usage)
765+
assert not isinstance(total, TokenUsage)
766+
767+
def test_metadata_key_skipped(self):
768+
"""The 'metadata' key in usage dicts is not treated as a component."""
769+
reports = [
770+
{
771+
"task_id": "task_1",
772+
"repeat_idx": 0,
773+
"usage": {
774+
"metadata": {"timestamp": "2025-01-01", "total_components": 1},
775+
"models": {
776+
"m": {
777+
"cost": 0.10,
778+
"input_tokens": 50,
779+
"output_tokens": 25,
780+
"total_tokens": 75,
781+
"cached_input_tokens": 0,
782+
"cache_creation_input_tokens": 0,
783+
"reasoning_tokens": 0,
784+
"audio_tokens": 0,
785+
"units": {},
786+
"provider": None,
787+
"category": "models",
788+
"component_name": "m",
789+
"kind": "llm",
790+
}
791+
},
792+
},
793+
},
794+
]
795+
reporter = UsageReporter.from_reports(reports)
796+
total = reporter.total()
797+
798+
# Only the model's cost, metadata should not contribute
799+
assert total.cost == pytest.approx(0.10)
800+
assert total.input_tokens == 50
801+
802+
def test_skips_component_with_error(self):
803+
"""Components with error dicts are skipped, others still counted."""
804+
reports = [
805+
{
806+
"task_id": "task_1",
807+
"repeat_idx": 0,
808+
"usage": {
809+
"models": {
810+
"good_model": {
811+
"cost": 0.10,
812+
"input_tokens": 100,
813+
"output_tokens": 50,
814+
"total_tokens": 150,
815+
"cached_input_tokens": 0,
816+
"cache_creation_input_tokens": 0,
817+
"reasoning_tokens": 0,
818+
"audio_tokens": 0,
819+
"units": {},
820+
"provider": None,
821+
"category": "models",
822+
"component_name": "good_model",
823+
"kind": "llm",
824+
},
825+
"bad_model": {
826+
"error": "Failed to gather usage",
827+
"error_type": "RuntimeError",
828+
},
829+
}
830+
},
831+
},
832+
]
833+
reporter = UsageReporter.from_reports(reports)
834+
total = reporter.total()
835+
836+
assert total.cost == pytest.approx(0.10)
837+
assert total.input_tokens == 100
838+
839+
def test_environment_direct_usage(self):
840+
"""Environment/user usage (direct dicts with 'cost') are parsed."""
841+
reports = [
842+
{
843+
"task_id": "task_1",
844+
"repeat_idx": 0,
845+
"usage": {
846+
"environment": {
847+
"cost": 0.05,
848+
"units": {"steps": 10},
849+
"provider": None,
850+
"category": "environment",
851+
"component_name": "env",
852+
"kind": "env",
853+
},
854+
"models": {
855+
"m": {
856+
"cost": 0.10,
857+
"input_tokens": 100,
858+
"output_tokens": 50,
859+
"total_tokens": 150,
860+
"cached_input_tokens": 0,
861+
"cache_creation_input_tokens": 0,
862+
"reasoning_tokens": 0,
863+
"audio_tokens": 0,
864+
"units": {},
865+
"provider": None,
866+
"category": "models",
867+
"component_name": "m",
868+
"kind": "llm",
869+
}
870+
},
871+
},
872+
},
873+
]
874+
reporter = UsageReporter.from_reports(reports)
875+
total = reporter.total()
876+
877+
assert total.cost == pytest.approx(0.15)
878+
879+
880+
# =============================================================================
881+
# StaticPricingCalculator — Utility Methods
882+
# =============================================================================
883+
884+
885+
class TestStaticPricingCalculatorUtilities:
886+
"""Tests for add_model, models property, and gather_config."""
887+
888+
def test_add_model(self):
889+
calc = StaticPricingCalculator({})
890+
calc.add_model("new-model", {"input": 0.01, "output": 0.02})
891+
892+
usage = TokenUsage(input_tokens=100, output_tokens=50, total_tokens=150)
893+
cost = calc.calculate_cost(usage, "new-model")
894+
assert cost == pytest.approx(2.00)
895+
896+
def test_models_property(self):
897+
calc = StaticPricingCalculator({
898+
"model-a": {"input": 0.01, "output": 0.02},
899+
"model-b": {"input": 0.001, "output": 0.002},
900+
})
901+
assert sorted(calc.models) == ["model-a", "model-b"]
902+
903+
def test_gather_config(self):
904+
pricing = {"model-a": {"input": 0.01, "output": 0.02}}
905+
calc = StaticPricingCalculator(pricing)
906+
config = calc.gather_config()
907+
908+
assert config["type"] == "StaticPricingCalculator"
909+
assert config["pricing"] == pricing

0 commit comments

Comments
 (0)