@@ -679,3 +679,231 @@ def test_skips_error_reports(self):
679679 total = reporter .total ()
680680 assert total .cost is None
681681 assert isinstance (total , Usage )
682+
683+ def test_by_task_accumulates_repeats (self ):
684+ """by_task sums usage when a task_id appears in multiple reports."""
685+ reports = [
686+ {
687+ "task_id" : "task_1" ,
688+ "repeat_idx" : 0 ,
689+ "usage" : {
690+ "models" : {
691+ "m" : {
692+ "cost" : 0.10 ,
693+ "input_tokens" : 100 ,
694+ "output_tokens" : 50 ,
695+ "total_tokens" : 150 ,
696+ "cached_input_tokens" : 0 ,
697+ "cache_creation_input_tokens" : 0 ,
698+ "reasoning_tokens" : 0 ,
699+ "audio_tokens" : 0 ,
700+ "units" : {},
701+ "provider" : None ,
702+ "category" : "models" ,
703+ "component_name" : "m" ,
704+ "kind" : "llm" ,
705+ }
706+ }
707+ },
708+ },
709+ {
710+ "task_id" : "task_1" ,
711+ "repeat_idx" : 1 ,
712+ "usage" : {
713+ "models" : {
714+ "m" : {
715+ "cost" : 0.20 ,
716+ "input_tokens" : 200 ,
717+ "output_tokens" : 100 ,
718+ "total_tokens" : 300 ,
719+ "cached_input_tokens" : 0 ,
720+ "cache_creation_input_tokens" : 0 ,
721+ "reasoning_tokens" : 0 ,
722+ "audio_tokens" : 0 ,
723+ "units" : {},
724+ "provider" : None ,
725+ "category" : "models" ,
726+ "component_name" : "m" ,
727+ "kind" : "llm" ,
728+ }
729+ }
730+ },
731+ },
732+ ]
733+ reporter = UsageReporter .from_reports (reports )
734+ by_task = reporter .by_task ()
735+
736+ assert len (by_task ) == 1
737+ assert by_task ["task_1" ].cost == pytest .approx (0.30 )
738+ assert by_task ["task_1" ].input_tokens == 300
739+
740+ def test_plain_usage_fallback (self ):
741+ """_usage_from_dict returns plain Usage when no token fields present."""
742+ reports = [
743+ {
744+ "task_id" : "task_1" ,
745+ "repeat_idx" : 0 ,
746+ "usage" : {
747+ "tools" : {
748+ "my_tool" : {
749+ "cost" : 0.05 ,
750+ "units" : {"api_calls" : 3 },
751+ "provider" : None ,
752+ "category" : "tools" ,
753+ "component_name" : "my_tool" ,
754+ "kind" : "tool" ,
755+ }
756+ }
757+ },
758+ },
759+ ]
760+ reporter = UsageReporter .from_reports (reports )
761+ total = reporter .total ()
762+
763+ assert total .cost == pytest .approx (0.05 )
764+ assert isinstance (total , Usage )
765+ assert not isinstance (total , TokenUsage )
766+
767+ def test_metadata_key_skipped (self ):
768+ """The 'metadata' key in usage dicts is not treated as a component."""
769+ reports = [
770+ {
771+ "task_id" : "task_1" ,
772+ "repeat_idx" : 0 ,
773+ "usage" : {
774+ "metadata" : {"timestamp" : "2025-01-01" , "total_components" : 1 },
775+ "models" : {
776+ "m" : {
777+ "cost" : 0.10 ,
778+ "input_tokens" : 50 ,
779+ "output_tokens" : 25 ,
780+ "total_tokens" : 75 ,
781+ "cached_input_tokens" : 0 ,
782+ "cache_creation_input_tokens" : 0 ,
783+ "reasoning_tokens" : 0 ,
784+ "audio_tokens" : 0 ,
785+ "units" : {},
786+ "provider" : None ,
787+ "category" : "models" ,
788+ "component_name" : "m" ,
789+ "kind" : "llm" ,
790+ }
791+ },
792+ },
793+ },
794+ ]
795+ reporter = UsageReporter .from_reports (reports )
796+ total = reporter .total ()
797+
798+ # Only the model's cost, metadata should not contribute
799+ assert total .cost == pytest .approx (0.10 )
800+ assert total .input_tokens == 50
801+
802+ def test_skips_component_with_error (self ):
803+ """Components with error dicts are skipped, others still counted."""
804+ reports = [
805+ {
806+ "task_id" : "task_1" ,
807+ "repeat_idx" : 0 ,
808+ "usage" : {
809+ "models" : {
810+ "good_model" : {
811+ "cost" : 0.10 ,
812+ "input_tokens" : 100 ,
813+ "output_tokens" : 50 ,
814+ "total_tokens" : 150 ,
815+ "cached_input_tokens" : 0 ,
816+ "cache_creation_input_tokens" : 0 ,
817+ "reasoning_tokens" : 0 ,
818+ "audio_tokens" : 0 ,
819+ "units" : {},
820+ "provider" : None ,
821+ "category" : "models" ,
822+ "component_name" : "good_model" ,
823+ "kind" : "llm" ,
824+ },
825+ "bad_model" : {
826+ "error" : "Failed to gather usage" ,
827+ "error_type" : "RuntimeError" ,
828+ },
829+ }
830+ },
831+ },
832+ ]
833+ reporter = UsageReporter .from_reports (reports )
834+ total = reporter .total ()
835+
836+ assert total .cost == pytest .approx (0.10 )
837+ assert total .input_tokens == 100
838+
839+ def test_environment_direct_usage (self ):
840+ """Environment/user usage (direct dicts with 'cost') are parsed."""
841+ reports = [
842+ {
843+ "task_id" : "task_1" ,
844+ "repeat_idx" : 0 ,
845+ "usage" : {
846+ "environment" : {
847+ "cost" : 0.05 ,
848+ "units" : {"steps" : 10 },
849+ "provider" : None ,
850+ "category" : "environment" ,
851+ "component_name" : "env" ,
852+ "kind" : "env" ,
853+ },
854+ "models" : {
855+ "m" : {
856+ "cost" : 0.10 ,
857+ "input_tokens" : 100 ,
858+ "output_tokens" : 50 ,
859+ "total_tokens" : 150 ,
860+ "cached_input_tokens" : 0 ,
861+ "cache_creation_input_tokens" : 0 ,
862+ "reasoning_tokens" : 0 ,
863+ "audio_tokens" : 0 ,
864+ "units" : {},
865+ "provider" : None ,
866+ "category" : "models" ,
867+ "component_name" : "m" ,
868+ "kind" : "llm" ,
869+ }
870+ },
871+ },
872+ },
873+ ]
874+ reporter = UsageReporter .from_reports (reports )
875+ total = reporter .total ()
876+
877+ assert total .cost == pytest .approx (0.15 )
878+
879+
880+ # =============================================================================
881+ # StaticPricingCalculator — Utility Methods
882+ # =============================================================================
883+
884+
885+ class TestStaticPricingCalculatorUtilities :
886+ """Tests for add_model, models property, and gather_config."""
887+
888+ def test_add_model (self ):
889+ calc = StaticPricingCalculator ({})
890+ calc .add_model ("new-model" , {"input" : 0.01 , "output" : 0.02 })
891+
892+ usage = TokenUsage (input_tokens = 100 , output_tokens = 50 , total_tokens = 150 )
893+ cost = calc .calculate_cost (usage , "new-model" )
894+ assert cost == pytest .approx (2.00 )
895+
896+ def test_models_property (self ):
897+ calc = StaticPricingCalculator ({
898+ "model-a" : {"input" : 0.01 , "output" : 0.02 },
899+ "model-b" : {"input" : 0.001 , "output" : 0.002 },
900+ })
901+ assert sorted (calc .models ) == ["model-a" , "model-b" ]
902+
903+ def test_gather_config (self ):
904+ pricing = {"model-a" : {"input" : 0.01 , "output" : 0.02 }}
905+ calc = StaticPricingCalculator (pricing )
906+ config = calc .gather_config ()
907+
908+ assert config ["type" ] == "StaticPricingCalculator"
909+ assert config ["pricing" ] == pricing
0 commit comments