@@ -256,3 +256,230 @@ def worker(worker_id: int):
256256 for worker_id , traces in results .items ():
257257 assert f"agent_{ worker_id } " in traces ["agents" ]
258258 assert len (traces ["agents" ]) == 1
259+
260+
261+ # ==================== Usage Tracking Tests ====================
262+
263+
264+ class MockUsageComponent (TraceableMixin ):
265+ """Component that implements UsageTrackableMixin for testing."""
266+
267+ def __init__ (self , name : str , cost : float = 0.0 , input_tokens : int = 0 , output_tokens : int = 0 ):
268+ super ().__init__ ()
269+ self ._name = name
270+ self ._cost = cost
271+ self ._input_tokens = input_tokens
272+ self ._output_tokens = output_tokens
273+
274+ def gather_traces (self ) -> Dict [str , Any ]:
275+ return {"name" : self ._name }
276+
277+ def gather_usage (self ):
278+ from maseval .core .usage import TokenUsage
279+ return TokenUsage (
280+ cost = self ._cost ,
281+ input_tokens = self ._input_tokens ,
282+ output_tokens = self ._output_tokens ,
283+ total_tokens = self ._input_tokens + self ._output_tokens ,
284+ )
285+
286+
287+ class MockBrokenUsageComponent (TraceableMixin ):
288+ """Component whose gather_usage raises an exception."""
289+
290+ def __init__ (self ):
291+ super ().__init__ ()
292+
293+ def gather_traces (self ) -> Dict [str , Any ]:
294+ return {}
295+
296+ def gather_usage (self ):
297+ raise RuntimeError ("Usage collection failed" )
298+
299+
300+ # Ensure MockUsageComponent also inherits UsageTrackableMixin
301+ from maseval .core .usage import UsageTrackableMixin
302+
303+
304+ class UsageAwareComponent (TraceableMixin , UsageTrackableMixin ):
305+ """Component with both tracing and usage tracking."""
306+
307+ def __init__ (self , cost : float = 0.0 , input_tokens : int = 0 , output_tokens : int = 0 ):
308+ TraceableMixin .__init__ (self )
309+ self ._cost = cost
310+ self ._input_tokens = input_tokens
311+ self ._output_tokens = output_tokens
312+
313+ def gather_traces (self ) -> Dict [str , Any ]:
314+ return {"traced" : True }
315+
316+ def gather_usage (self ):
317+ from maseval .core .usage import TokenUsage
318+ return TokenUsage (
319+ cost = self ._cost ,
320+ input_tokens = self ._input_tokens ,
321+ output_tokens = self ._output_tokens ,
322+ total_tokens = self ._input_tokens + self ._output_tokens ,
323+ )
324+
325+
326+ class BrokenUsageComponent (TraceableMixin , UsageTrackableMixin ):
327+ """Component whose gather_usage raises an exception."""
328+
329+ def __init__ (self ):
330+ TraceableMixin .__init__ (self )
331+
332+ def gather_traces (self ) -> Dict [str , Any ]:
333+ return {}
334+
335+ def gather_usage (self ):
336+ raise RuntimeError ("Usage collection failed" )
337+
338+
339+ @pytest .mark .core
340+ class TestRegistryUsageCollection :
341+ """Tests for usage tracking through the component registry."""
342+
343+ def test_register_usage_trackable_component (self ):
344+ """UsageTrackableMixin component is registered in the usage registry."""
345+ registry = ComponentRegistry ()
346+ component = UsageAwareComponent (cost = 0.05 , input_tokens = 100 , output_tokens = 50 )
347+
348+ registry .register ("models" , "main_model" , component )
349+
350+ assert "models:main_model" in registry ._usage_registry
351+ assert registry ._usage_registry ["models:main_model" ] is component
352+
353+ def test_non_usage_component_not_in_usage_registry (self ):
354+ """Components without UsageTrackableMixin are NOT in the usage registry."""
355+ registry = ComponentRegistry ()
356+ component = MockTraceableComponent ("test" )
357+
358+ registry .register ("agents" , "my_agent" , component )
359+
360+ assert "agents:my_agent" in registry ._trace_registry
361+ assert "agents:my_agent" not in registry ._usage_registry
362+
363+ def test_collect_usage_basic (self ):
364+ """collect_usage returns structured dict with usage from registered components."""
365+ from maseval .core .usage import TokenUsage
366+
367+ registry = ComponentRegistry ()
368+ model = UsageAwareComponent (cost = 0.10 , input_tokens = 500 , output_tokens = 200 )
369+ registry .register ("models" , "main_model" , model )
370+
371+ usage = registry .collect_usage ()
372+
373+ assert "metadata" in usage
374+ assert "models" in usage
375+ assert "main_model" in usage ["models" ]
376+
377+ model_usage = usage ["models" ]["main_model" ]
378+ assert model_usage ["cost" ] == 0.10
379+ assert model_usage ["input_tokens" ] == 500
380+ assert model_usage ["output_tokens" ] == 200
381+ assert model_usage ["total_tokens" ] == 700
382+
383+ def test_collect_usage_multiple_components (self ):
384+ """Multiple components across categories are all collected."""
385+ registry = ComponentRegistry ()
386+ model = UsageAwareComponent (cost = 0.10 , input_tokens = 500 , output_tokens = 200 )
387+ tool = UsageAwareComponent (cost = 0.05 , input_tokens = 0 , output_tokens = 0 )
388+
389+ registry .register ("models" , "main_model" , model )
390+ registry .register ("tools" , "search_tool" , tool )
391+
392+ usage = registry .collect_usage ()
393+
394+ assert "main_model" in usage ["models" ]
395+ assert "search_tool" in usage ["tools" ]
396+ assert usage ["models" ]["main_model" ]["cost" ] == 0.10
397+ assert usage ["tools" ]["search_tool" ]["cost" ] == 0.05
398+
399+ def test_collect_usage_injects_grouping_fields (self ):
400+ """Registry injects category and component_name into usage records."""
401+ registry = ComponentRegistry ()
402+ model = UsageAwareComponent (cost = 0.10 , input_tokens = 100 , output_tokens = 50 )
403+ registry .register ("models" , "main_model" , model )
404+
405+ usage = registry .collect_usage ()
406+
407+ model_usage = usage ["models" ]["main_model" ]
408+ assert model_usage ["category" ] == "models"
409+ assert model_usage ["component_name" ] == "main_model"
410+
411+ def test_total_usage_accumulates (self ):
412+ """total_usage property reflects accumulated usage across collect_usage calls."""
413+ registry = ComponentRegistry ()
414+ model = UsageAwareComponent (cost = 0.10 , input_tokens = 100 , output_tokens = 50 )
415+ registry .register ("models" , "main_model" , model )
416+
417+ # First collection
418+ registry .collect_usage ()
419+ total1 = registry .total_usage
420+ assert total1 .cost == pytest .approx (0.10 )
421+
422+ # Clear and re-register (simulates next repetition)
423+ registry .clear ()
424+ model2 = UsageAwareComponent (cost = 0.20 , input_tokens = 200 , output_tokens = 100 )
425+ registry .register ("models" , "main_model" , model2 )
426+
427+ # Second collection
428+ registry .collect_usage ()
429+ total2 = registry .total_usage
430+ assert total2 .cost == pytest .approx (0.30 )
431+
432+ def test_usage_by_component_accumulates (self ):
433+ """usage_by_component accumulates per key across repetitions."""
434+ registry = ComponentRegistry ()
435+ model = UsageAwareComponent (cost = 0.10 , input_tokens = 100 , output_tokens = 50 )
436+ registry .register ("models" , "main_model" , model )
437+ registry .collect_usage ()
438+
439+ # Clear and re-register for second repetition
440+ registry .clear ()
441+ model2 = UsageAwareComponent (cost = 0.20 , input_tokens = 200 , output_tokens = 100 )
442+ registry .register ("models" , "main_model" , model2 )
443+ registry .collect_usage ()
444+
445+ by_comp = registry .usage_by_component
446+ assert "models:main_model" in by_comp
447+
448+ total = by_comp ["models:main_model" ]
449+ assert total .input_tokens == 300
450+ assert total .output_tokens == 150
451+ assert total .cost == pytest .approx (0.30 )
452+
453+ def test_usage_persists_across_clear (self ):
454+ """clear() does NOT reset total_usage or usage_by_component."""
455+ registry = ComponentRegistry ()
456+ model = UsageAwareComponent (cost = 0.10 , input_tokens = 100 , output_tokens = 50 )
457+ registry .register ("models" , "main_model" , model )
458+ registry .collect_usage ()
459+
460+ # Clear only removes per-repetition state
461+ registry .clear ()
462+
463+ assert registry .total_usage .cost == pytest .approx (0.10 )
464+ assert "models:main_model" in registry .usage_by_component
465+
466+ def test_collect_usage_handles_error_gracefully (self ):
467+ """If gather_usage raises, the error is captured in the usage dict."""
468+ registry = ComponentRegistry ()
469+ broken = BrokenUsageComponent ()
470+ registry .register ("models" , "bad_model" , broken )
471+
472+ usage = registry .collect_usage ()
473+
474+ assert "bad_model" in usage ["models" ]
475+ assert "error" in usage ["models" ]["bad_model" ]
476+ assert "RuntimeError" in usage ["models" ]["bad_model" ]["error_type" ]
477+
478+ def test_collect_usage_empty_registry (self ):
479+ """collect_usage with no components returns empty structure."""
480+ registry = ComponentRegistry ()
481+ usage = registry .collect_usage ()
482+
483+ assert usage ["metadata" ]["total_components" ] == 0
484+ assert usage ["models" ] == {}
485+ assert usage ["agents" ] == {}
0 commit comments