Skip to content

Commit fbc3d2a

Browse files
committed
* fixed example: small changes to clean cost tracking in 5 a day benchmark example
1 parent 8dc11ce commit fbc3d2a

2 files changed

Lines changed: 20 additions & 29 deletions

File tree

examples/five_a_day_benchmark/five_a_day_benchmark.py

Lines changed: 12 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -954,14 +954,13 @@ def load_benchmark_data(
954954
benchmark = FiveADayBenchmark(
955955
callbacks=[logger],
956956
seed=args.seed, # Use benchmark's seeding system
957-
fail_on_setup_error=True,
958-
fail_on_task_error=True,
959-
fail_on_evaluation_error=True,
957+
fail_on_setup_error=False,
958+
fail_on_task_error=False,
959+
fail_on_evaluation_error=False,
960960
)
961961
results = benchmark.run(tasks=tasks, agent_data=agent_configs)
962962

963963
# --- Usage summary ---
964-
from collections import defaultdict
965964
from maseval import TokenUsage
966965

967966
def _fmt_usage(usage):
@@ -972,33 +971,17 @@ def _fmt_usage(usage):
972971
parts.append(f"units={dict(usage.units)}")
973972
return " ".join(parts)
974973

974+
reporter = UsageReporter.from_reports(results)
975+
975976
print("\n--- Usage Summary ---")
976-
total = benchmark.usage
977-
print(f"Total: {_fmt_usage(total)}")
978-
979-
# Group components by category
980-
if benchmark.usage_by_component:
981-
by_category: Dict[str, Dict[str, object]] = defaultdict(dict)
982-
for key, usage in benchmark.usage_by_component.items():
983-
category, name = key.split(":", 1)
984-
by_category[category][name] = usage
985-
986-
for category in ["agents", "models", "tools", "simulators", "callbacks"]:
987-
if category not in by_category:
988-
continue
989-
print(f"\n{category.capitalize()}:")
990-
for name, usage in by_category[category].items():
991-
print(f" {name:<35} {_fmt_usage(usage)}")
992-
993-
# Print any remaining categories not in the standard list
994-
for category, components in by_category.items():
995-
if category in {"agents", "models", "tools", "simulators", "callbacks"}:
996-
continue
997-
print(f"\n{category.capitalize()}:")
998-
for name, usage in components.items():
999-
print(f" {name:<35} {_fmt_usage(usage)}")
977+
print(f"Total: {_fmt_usage(reporter.total())}")
978+
979+
by_component = reporter.by_component()
980+
if by_component:
981+
print("\nBy component:")
982+
for key, usage in by_component.items():
983+
print(f" {key:<35} {_fmt_usage(usage)}")
1000984

1001-
reporter = UsageReporter.from_reports(results)
1002985
by_task = reporter.by_task()
1003986
if by_task:
1004987
print("\nPer-task:")

uv.lock

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)