Skip to content

Commit 4506a71

Browse files
committed
fixed formatting for notebooks
1 parent ffcc7f1 commit 4506a71

2 files changed

Lines changed: 122 additions & 165 deletions

File tree

examples/five_a_day_benchmark/five_a_day_benchmark.ipynb

Lines changed: 58 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@
141141
" task_indices: list[int] | None = None,\n",
142142
") -> tuple[TaskCollection, list[Dict[str, Any]]]:\n",
143143
" \"\"\"Load tasks and agent configurations.\n",
144-
" \n",
144+
"\n",
145145
" Args:\n",
146146
" config_type: 'single' or 'multi' agent configuration\n",
147147
" framework: Agent framework to use\n",
@@ -150,34 +150,34 @@
150150
" limit: Optional limit on number of tasks (None = all 5)\n",
151151
" seed: Random seed for reproducibility\n",
152152
" task_indices: Optional list of task indices to load (e.g., [0, 2, 4])\n",
153-
" \n",
153+
"\n",
154154
" Returns:\n",
155155
" Tuple of (TaskCollection, list of agent configs)\n",
156156
" \"\"\"\n",
157157
" data_dir = Path(\"examples/five_a_day_benchmark/data\")\n",
158-
" \n",
158+
"\n",
159159
" with open(data_dir / \"tasks.json\", \"r\") as f:\n",
160160
" tasks_raw = json.load(f)\n",
161161
" with open(data_dir / f\"{config_type}agent.json\", \"r\") as f:\n",
162162
" configs_raw = json.load(f)\n",
163-
" \n",
163+
"\n",
164164
" # Apply limit first\n",
165165
" if limit:\n",
166166
" tasks_raw = tasks_raw[:limit]\n",
167167
" configs_raw = configs_raw[:limit]\n",
168-
" \n",
168+
"\n",
169169
" # Then apply task_indices filter if specified\n",
170170
" if task_indices is not None:\n",
171171
" tasks_raw = [tasks_raw[i] for i in task_indices if i < len(tasks_raw)]\n",
172172
" configs_raw = [configs_raw[i] for i in task_indices if i < len(configs_raw)]\n",
173-
" \n",
173+
"\n",
174174
" tasks_data = []\n",
175175
" configs_data = []\n",
176-
" \n",
176+
"\n",
177177
" for task_dict, config in zip(tasks_raw, configs_raw):\n",
178178
" task_id = task_dict[\"metadata\"][\"task_id\"]\n",
179179
" task_dict[\"environment_data\"][\"agent_framework\"] = framework\n",
180-
" \n",
180+
"\n",
181181
" # Create Task object\n",
182182
" tasks_data.append(\n",
183183
" Task(\n",
@@ -187,18 +187,18 @@
187187
" metadata=task_dict[\"metadata\"],\n",
188188
" )\n",
189189
" )\n",
190-
" \n",
190+
"\n",
191191
" # Enrich config with framework and model info\n",
192192
" config[\"framework\"] = framework\n",
193193
" config[\"model_config\"] = {\"model_id\": model_id, \"temperature\": temperature}\n",
194-
" \n",
194+
"\n",
195195
" # Derive seeds for reproducibility\n",
196196
" if seed is not None:\n",
197197
" for agent_spec in config[\"agents\"]:\n",
198198
" agent_spec[\"seed\"] = derive_seed(seed, task_id, agent_spec[\"agent_id\"])\n",
199-
" \n",
199+
"\n",
200200
" configs_data.append(config)\n",
201-
" \n",
201+
"\n",
202202
" return TaskCollection(tasks_data), configs_data"
203203
]
204204
},
@@ -224,14 +224,15 @@
224224
"# Tell litellm to drop unsupported params (like 'seed' for Gemini)\n",
225225
"litellm.drop_params = True\n",
226226
"\n",
227+
"\n",
227228
"def get_model(model_id: str, temperature: float = 0.7, seed: int | None = None):\n",
228229
" \"\"\"Create a model instance compatible with smolagents.\n",
229-
" \n",
230+
"\n",
230231
" Args:\n",
231232
" model_id: Model name (e.g., 'gemini-2.5-flash', 'gpt-4')\n",
232233
" temperature: Randomness (0.0 = deterministic, 1.0 = creative)\n",
233234
" seed: Random seed for reproducible outputs (ignored for models that don't support it)\n",
234-
" \n",
235+
"\n",
235236
" Returns:\n",
236237
" LiteLLMModel configured for smolagents\n",
237238
" \"\"\"\n",
@@ -242,6 +243,7 @@
242243
" seed=seed, # Will be dropped by litellm for providers that don't support it\n",
243244
" )\n",
244245
"\n",
246+
"\n",
245247
"# Test the model factory\n",
246248
"model = get_model(\"gemini-2.5-flash\", temperature=0.7, seed=42)\n",
247249
"print(f\"Created model: {model.model_id}\")"
@@ -275,7 +277,7 @@
275277
"\n",
276278
"# Extract the first (and only) task and config\n",
277279
"task_0: Task = task_data[0]\n",
278-
"config_0: Dict[str,Any] = agent_configs[0]\n",
280+
"config_0: Dict[str, Any] = agent_configs[0]\n",
279281
"\n",
280282
"print(\"=\" * 60)\n",
281283
"print(\"TASK 0: Email & Banking\")\n",
@@ -309,7 +311,7 @@
309311
"print(f\"Agent Type: {config_0['agent_type']}\")\n",
310312
"print(f\"Primary Agent: {config_0['primary_agent_id']}\\n\")\n",
311313
"\n",
312-
"for i, agent_spec in enumerate(config_0['agents'], 1):\n",
314+
"for i, agent_spec in enumerate(config_0[\"agents\"], 1):\n",
313315
" print(f\"{i}. {agent_spec['agent_name']} (ID: {agent_spec['agent_id']})\")\n",
314316
" print(f\" Tools: {agent_spec['tools'] if agent_spec['tools'] else 'None (delegates only)'}\")\n",
315317
" print(f\" Role: {agent_spec['agent_instruction'][:80]}...\")\n",
@@ -384,23 +386,23 @@
384386
" specialist_agents = []\n",
385387
"\n",
386388
" temperature = agent_data[\"model_config\"][\"temperature\"]\n",
387-
" \n",
389+
"\n",
388390
" primary_agent_id = agent_data[\"primary_agent_id\"]\n",
389391
" agents_specs = agent_data[\"agents\"]\n",
390392
" all_tool_adapters = environment.get_tools()\n",
391-
" \n",
393+
"\n",
392394
" # Build specialists first\n",
393395
" specialist_agents = []\n",
394396
" for agent_spec in agents_specs:\n",
395397
" if agent_spec[\"agent_id\"] == primary_agent_id:\n",
396398
" continue\n",
397-
" \n",
399+
"\n",
398400
" seed = agent_spec.get(\"seed\")\n",
399401
" model = get_model(model_id, temperature, seed)\n",
400402
" spec_tool_adapters = filter_tool_adapters_by_prefix(all_tool_adapters, agent_spec[\"tools\"])\n",
401403
" spec_tools = [adapter.tool for adapter in spec_tool_adapters]\n",
402404
" spec_tools.append(FinalAnswerTool())\n",
403-
" \n",
405+
"\n",
404406
" specialist = ToolCallingAgent(\n",
405407
" model=model,\n",
406408
" tools=spec_tools,\n",
@@ -410,12 +412,12 @@
410412
" verbosity_level=0,\n",
411413
" )\n",
412414
" specialist_agents.append(specialist)\n",
413-
" \n",
415+
"\n",
414416
" # Build orchestrator\n",
415417
" primary_spec = next(a for a in agents_specs if a[\"agent_id\"] == primary_agent_id)\n",
416418
" primary_seed = primary_spec.get(\"seed\")\n",
417419
" primary_model = get_model(model_id, temperature, primary_seed)\n",
418-
" \n",
420+
"\n",
419421
" orchestrator = ToolCallingAgent(\n",
420422
" model=primary_model,\n",
421423
" tools=[FinalAnswerTool()],\n",
@@ -425,7 +427,7 @@
425427
" verbosity_level=0,\n",
426428
" )\n",
427429
"\n",
428-
" return [orchestrator], {agent.name: agent for agent in specialist_agents}\n"
430+
" return [orchestrator], {agent.name: agent for agent in specialist_agents}"
429431
]
430432
},
431433
{
@@ -475,17 +477,17 @@
475477
" \"\"\"Initialize environment state from task data.\"\"\"\n",
476478
" env_data = task_data[\"environment_data\"].copy()\n",
477479
" tool_names = env_data.get(\"tools\", [])\n",
478-
" \n",
480+
"\n",
479481
" # Create state objects (e.g., email inboxes, bank accounts)\n",
480482
" states = get_states(tool_names, env_data)\n",
481483
" env_data.update(states)\n",
482-
" \n",
484+
"\n",
483485
" return env_data\n",
484486
"\n",
485487
" def create_tools(self) -> list:\n",
486488
" \"\"\"Create and convert tools to framework-specific format.\"\"\"\n",
487489
" tools_list = []\n",
488-
" \n",
490+
"\n",
489491
" # Map tool names to their collection classes\n",
490492
" tool_mapping = {\n",
491493
" \"email\": (EmailToolCollection, lambda: (self.state[\"email_state\"],)),\n",
@@ -499,17 +501,17 @@
499501
" \"my_calendar_mcp\": (MCPCalendarToolCollection, lambda: (self.state[\"my_calendar_mcp_state\"],)),\n",
500502
" \"other_calendar_mcp\": (MCPCalendarToolCollection, lambda: (self.state[\"other_calendar_mcp_state\"],)),\n",
501503
" }\n",
502-
" \n",
504+
"\n",
503505
" for tool_name in self.state[\"tools\"]:\n",
504506
" if tool_name in tool_mapping:\n",
505507
" ToolClass, get_init_args = tool_mapping[tool_name]\n",
506508
" tool_instance = ToolClass(*get_init_args())\n",
507-
" \n",
509+
"\n",
508510
" # Get base tools and convert to framework format\n",
509511
" for base_tool in tool_instance.get_sub_tools():\n",
510512
" framework_tool = base_tool.to_smolagents()\n",
511513
" tools_list.append(framework_tool)\n",
512-
" \n",
514+
"\n",
513515
" return tools_list"
514516
]
515517
},
@@ -534,7 +536,7 @@
534536
"source": [
535537
"print(f\"{config_0['task_description']}\")\n",
536538
"\n",
537-
"for i, agent_spec in enumerate(config_0['agents'], 1):\n",
539+
"for i, agent_spec in enumerate(config_0[\"agents\"], 1):\n",
538540
" print(f\"{i}. {agent_spec['agent_name']} (ID: {agent_spec['agent_id']})\")\n",
539541
" print(f\" Tools: {agent_spec['tools'] if agent_spec['tools'] else 'None (delegates only)'}\")\n",
540542
" print(f\" Role: {agent_spec['agent_instruction'][:80]}...\")\n",
@@ -560,18 +562,20 @@
560562
"# Note: model_config is already set by load_benchmark_data()\n",
561563
"\n",
562564
"# Create environment from task data\n",
563-
"environment_0 = FiveADayEnvironment({\n",
564-
" \"environment_data\": task_0.environment_data,\n",
565-
" \"query\": task_0.query,\n",
566-
" \"evaluation_data\": task_0.evaluation_data,\n",
567-
" \"metadata\": task_0.metadata,\n",
568-
"})\n",
565+
"environment_0 = FiveADayEnvironment(\n",
566+
" {\n",
567+
" \"environment_data\": task_0.environment_data,\n",
568+
" \"query\": task_0.query,\n",
569+
" \"evaluation_data\": task_0.evaluation_data,\n",
570+
" \"metadata\": task_0.metadata,\n",
571+
" }\n",
572+
")\n",
569573
"\n",
570574
"# Build agents using the build_agents function\n",
571575
"agents_to_run, agents_to_monitor = build_agents(config_0, environment_0)\n",
572576
"\n",
573577
"print(f\"\\nBuilt Agents for Task: {task_0.metadata['task_id']}\")\n",
574-
"print(f\"{'='*60}\")\n",
578+
"print(f\"{'=' * 60}\")\n",
575579
"print(f\"\\nAgents to run: {[agent.name for agent in agents_to_run]}\")\n",
576580
"print(f\"Agents to monitor: {list(agents_to_monitor.keys())}\")\n",
577581
"\n",
@@ -580,7 +584,7 @@
580584
" print(f\"\\n Agent: {agent.name}\")\n",
581585
" # smolagents stores tools as a dict with string keys\n",
582586
" print(f\" Tools: {list(agent.tools.keys())}\")\n",
583-
" if hasattr(agent, 'managed_agents') and agent.managed_agents:\n",
587+
" if hasattr(agent, \"managed_agents\") and agent.managed_agents:\n",
584588
" # managed_agents is also a dict with string keys\n",
585589
" print(f\" Managed agents: {list(agent.managed_agents.keys())}\")\n",
586590
" for agent_name, managed in agent.managed_agents.items():\n",
@@ -623,25 +627,25 @@
623627
" \"evaluation_data\": task.evaluation_data,\n",
624628
" \"metadata\": task.metadata,\n",
625629
" }\n",
626-
" \n",
630+
"\n",
627631
" environment = FiveADayEnvironment(task_data)\n",
628-
" \n",
632+
"\n",
629633
" # Register all tools for tracing\n",
630634
" for tool_adapter in environment.get_tools():\n",
631635
" tool_name = getattr(tool_adapter, \"name\", str(type(tool_adapter).__name__))\n",
632636
" self.register(\"tools\", tool_name, tool_adapter)\n",
633-
" \n",
637+
"\n",
634638
" return environment\n",
635639
"\n",
636640
" def setup_agents(\n",
637641
" self, agent_data: Dict[str, Any], environment: Environment, task: Task, user=None\n",
638642
" ) -> tuple[list[SmolAgentAdapter], Dict[str, SmolAgentAdapter]]:\n",
639643
" \"\"\"Create multi-agent system with orchestrator and specialists.\"\"\"\n",
640644
" agents_to_run, agents_to_monitor = build_agents(agent_data, environment)\n",
641-
" \n",
645+
"\n",
642646
" # Create adapters for the primary agent(s) to run\n",
643647
" adapters_to_run = [SmolAgentAdapter(agent, agent.name) for agent in agents_to_run]\n",
644-
" \n",
648+
"\n",
645649
" # This ensures all agent traces are collected by the benchmark\n",
646650
" all_agents = {agent.name: agent for agent in agents_to_run} | agents_to_monitor\n",
647651
" adapters_to_monitor = {name: SmolAgentAdapter(agent, name) for name, agent in all_agents.items()}\n",
@@ -651,12 +655,12 @@
651655
" \"\"\"Create evaluators based on task's evaluation criteria.\"\"\"\n",
652656
" if not task.evaluation_data[\"evaluators\"]:\n",
653657
" return []\n",
654-
" \n",
658+
"\n",
655659
" evaluator_instances = []\n",
656660
" for name in task.evaluation_data[\"evaluators\"]:\n",
657661
" evaluator_class = getattr(evaluators, name)\n",
658662
" evaluator_instances.append(evaluator_class(task, environment, user))\n",
659-
" \n",
663+
"\n",
660664
" return evaluator_instances\n",
661665
"\n",
662666
" def run_agents(self, agents: Sequence[AgentAdapter], task: Task, environment: Environment) -> Sequence[Any]:\n",
@@ -741,7 +745,7 @@
741745
" fail_on_evaluation_error=True,\n",
742746
")\n",
743747
"\n",
744-
"results = benchmark.run(tasks=tasks)\n"
748+
"results = benchmark.run(tasks=tasks)"
745749
]
746750
},
747751
{
@@ -764,26 +768,26 @@
764768
"console = Console()\n",
765769
"\n",
766770
"for task in results[:2]:\n",
767-
" task_id = task['task_id']\n",
771+
" task_id = task[\"task_id\"]\n",
768772
" print(\"=\" * 60)\n",
769773
" print(f\"Results for Task ID: {task_id}\")\n",
770774
" print(\"=\" * 60)\n",
771-
" traces = task['traces']\n",
772-
" agent_traces = traces['agents']\n",
775+
" traces = task[\"traces\"]\n",
776+
" agent_traces = traces[\"agents\"]\n",
773777
" print(f\"Traces available for agents: {list(agent_traces.keys())}\")\n",
774778
" orchestrator_name = list(traces[\"agents\"].keys())[0]\n",
775779
" print(f\"Last 5 messages for '{orchestrator_name}'\")\n",
776780
" print(traces[\"agents\"].keys())\n",
777781
" messages = traces[\"agents\"][orchestrator_name][\"messages\"]\n",
778782
" for msg in messages[-5:]:\n",
779783
" role = msg.get(\"role\", \"unknown\")\n",
780-
" content = msg.get(\"content\", [])[0].get(\"text\", '')\n",
784+
" content = msg.get(\"content\", [])[0].get(\"text\", \"\")\n",
781785
" panel = Panel.fit(\n",
782786
" content,\n",
783787
" title=f\" {role} \",\n",
784788
" title_align=\"left\",\n",
785789
" )\n",
786-
" console.print(panel)\n"
790+
" console.print(panel)"
787791
]
788792
},
789793
{
@@ -795,13 +799,13 @@
795799
"source": [
796800
"# print results for first two tasks\n",
797801
"for task in results[:2]:\n",
798-
" task_id = task['task_id']\n",
802+
" task_id = task[\"task_id\"]\n",
799803
" print(\"=\" * 60)\n",
800804
" print(f\"Results for Task ID: {task_id}\")\n",
801805
" print(\"=\" * 60)\n",
802-
" eval_results = task['eval']\n",
806+
" eval_results = task[\"eval\"]\n",
803807
" for evals in eval_results:\n",
804-
" for k,v in evals.items():\n",
808+
" for k, v in evals.items():\n",
805809
" print(f\"{k:<35} {v}\")"
806810
]
807811
},

0 commit comments

Comments
 (0)