parameterlab
diff --git a/‎examples/five_a_day_benchmark/five_a_day_benchmark.ipynb‎
Lines changed: 58 additions & 54 deletions b/‎examples/five_a_day_benchmark/five_a_day_benchmark.ipynb‎
Lines changed: 58 additions & 54 deletions
@@ -141,7 +141,7 @@
     "    task_indices: list[int] | None = None,\n",
     ") -> tuple[TaskCollection, list[Dict[str, Any]]]:\n",
     "    \"\"\"Load tasks and agent configurations.\n",
-    "    \n",
+    "\n",
     "    Args:\n",
     "        config_type: 'single' or 'multi' agent configuration\n",
     "        framework: Agent framework to use\n",
@@ -150,34 +150,34 @@
     "        limit: Optional limit on number of tasks (None = all 5)\n",
     "        seed: Random seed for reproducibility\n",
     "        task_indices: Optional list of task indices to load (e.g., [0, 2, 4])\n",
-    "    \n",
+    "\n",
     "    Returns:\n",
     "        Tuple of (TaskCollection, list of agent configs)\n",
     "    \"\"\"\n",
     "    data_dir = Path(\"examples/five_a_day_benchmark/data\")\n",
-    "    \n",
+    "\n",
     "    with open(data_dir / \"tasks.json\", \"r\") as f:\n",
     "        tasks_raw = json.load(f)\n",
     "    with open(data_dir / f\"{config_type}agent.json\", \"r\") as f:\n",
     "        configs_raw = json.load(f)\n",
-    "    \n",
+    "\n",
     "    # Apply limit first\n",
     "    if limit:\n",
     "        tasks_raw = tasks_raw[:limit]\n",
     "        configs_raw = configs_raw[:limit]\n",
-    "    \n",
+    "\n",
     "    # Then apply task_indices filter if specified\n",
     "    if task_indices is not None:\n",
     "        tasks_raw = [tasks_raw[i] for i in task_indices if i < len(tasks_raw)]\n",
     "        configs_raw = [configs_raw[i] for i in task_indices if i < len(configs_raw)]\n",
-    "    \n",
+    "\n",
     "    tasks_data = []\n",
     "    configs_data = []\n",
-    "    \n",
+    "\n",
     "    for task_dict, config in zip(tasks_raw, configs_raw):\n",
     "        task_id = task_dict[\"metadata\"][\"task_id\"]\n",
     "        task_dict[\"environment_data\"][\"agent_framework\"] = framework\n",
-    "        \n",
+    "\n",
     "        # Create Task object\n",
     "        tasks_data.append(\n",
     "            Task(\n",
@@ -187,18 +187,18 @@
     "                metadata=task_dict[\"metadata\"],\n",
     "            )\n",
     "        )\n",
-    "        \n",
+    "\n",
     "        # Enrich config with framework and model info\n",
     "        config[\"framework\"] = framework\n",
     "        config[\"model_config\"] = {\"model_id\": model_id, \"temperature\": temperature}\n",
-    "        \n",
+    "\n",
     "        # Derive seeds for reproducibility\n",
     "        if seed is not None:\n",
     "            for agent_spec in config[\"agents\"]:\n",
     "                agent_spec[\"seed\"] = derive_seed(seed, task_id, agent_spec[\"agent_id\"])\n",
-    "        \n",
+    "\n",
     "        configs_data.append(config)\n",
-    "    \n",
+    "\n",
     "    return TaskCollection(tasks_data), configs_data"
    ]
   },
@@ -224,14 +224,15 @@
     "# Tell litellm to drop unsupported params (like 'seed' for Gemini)\n",
     "litellm.drop_params = True\n",
     "\n",
+    "\n",
     "def get_model(model_id: str, temperature: float = 0.7, seed: int | None = None):\n",
     "    \"\"\"Create a model instance compatible with smolagents.\n",
-    "    \n",
+    "\n",
     "    Args:\n",
     "        model_id: Model name (e.g., 'gemini-2.5-flash', 'gpt-4')\n",
     "        temperature: Randomness (0.0 = deterministic, 1.0 = creative)\n",
     "        seed: Random seed for reproducible outputs (ignored for models that don't support it)\n",
-    "    \n",
+    "\n",
     "    Returns:\n",
     "        LiteLLMModel configured for smolagents\n",
     "    \"\"\"\n",
@@ -242,6 +243,7 @@
     "        seed=seed,  # Will be dropped by litellm for providers that don't support it\n",
     "    )\n",
     "\n",
+    "\n",
     "# Test the model factory\n",
     "model = get_model(\"gemini-2.5-flash\", temperature=0.7, seed=42)\n",
     "print(f\"Created model: {model.model_id}\")"
@@ -275,7 +277,7 @@
     "\n",
     "# Extract the first (and only) task and config\n",
     "task_0: Task = task_data[0]\n",
-    "config_0: Dict[str,Any] = agent_configs[0]\n",
+    "config_0: Dict[str, Any] = agent_configs[0]\n",
     "\n",
     "print(\"=\" * 60)\n",
     "print(\"TASK 0: Email & Banking\")\n",
@@ -309,7 +311,7 @@
     "print(f\"Agent Type: {config_0['agent_type']}\")\n",
     "print(f\"Primary Agent: {config_0['primary_agent_id']}\\n\")\n",
     "\n",
-    "for i, agent_spec in enumerate(config_0['agents'], 1):\n",
+    "for i, agent_spec in enumerate(config_0[\"agents\"], 1):\n",
     "    print(f\"{i}. {agent_spec['agent_name']} (ID: {agent_spec['agent_id']})\")\n",
     "    print(f\"   Tools: {agent_spec['tools'] if agent_spec['tools'] else 'None (delegates only)'}\")\n",
     "    print(f\"   Role: {agent_spec['agent_instruction'][:80]}...\")\n",
@@ -384,23 +386,23 @@
     "    specialist_agents = []\n",
     "\n",
     "    temperature = agent_data[\"model_config\"][\"temperature\"]\n",
-    "    \n",
+    "\n",
     "    primary_agent_id = agent_data[\"primary_agent_id\"]\n",
     "    agents_specs = agent_data[\"agents\"]\n",
     "    all_tool_adapters = environment.get_tools()\n",
-    "    \n",
+    "\n",
     "    # Build specialists first\n",
     "    specialist_agents = []\n",
     "    for agent_spec in agents_specs:\n",
     "        if agent_spec[\"agent_id\"] == primary_agent_id:\n",
     "            continue\n",
-    "            \n",
+    "\n",
     "        seed = agent_spec.get(\"seed\")\n",
     "        model = get_model(model_id, temperature, seed)\n",
     "        spec_tool_adapters = filter_tool_adapters_by_prefix(all_tool_adapters, agent_spec[\"tools\"])\n",
     "        spec_tools = [adapter.tool for adapter in spec_tool_adapters]\n",
     "        spec_tools.append(FinalAnswerTool())\n",
-    "        \n",
+    "\n",
     "        specialist = ToolCallingAgent(\n",
     "            model=model,\n",
     "            tools=spec_tools,\n",
@@ -410,12 +412,12 @@
     "            verbosity_level=0,\n",
     "        )\n",
     "        specialist_agents.append(specialist)\n",
-    "    \n",
+    "\n",
     "    # Build orchestrator\n",
     "    primary_spec = next(a for a in agents_specs if a[\"agent_id\"] == primary_agent_id)\n",
     "    primary_seed = primary_spec.get(\"seed\")\n",
     "    primary_model = get_model(model_id, temperature, primary_seed)\n",
-    "    \n",
+    "\n",
     "    orchestrator = ToolCallingAgent(\n",
     "        model=primary_model,\n",
     "        tools=[FinalAnswerTool()],\n",
@@ -425,7 +427,7 @@
     "        verbosity_level=0,\n",
     "    )\n",
     "\n",
-    "    return [orchestrator], {agent.name: agent for agent in specialist_agents}\n"
+    "    return [orchestrator], {agent.name: agent for agent in specialist_agents}"
    ]
   },
   {
@@ -475,17 +477,17 @@
     "        \"\"\"Initialize environment state from task data.\"\"\"\n",
     "        env_data = task_data[\"environment_data\"].copy()\n",
     "        tool_names = env_data.get(\"tools\", [])\n",
-    "        \n",
+    "\n",
     "        # Create state objects (e.g., email inboxes, bank accounts)\n",
     "        states = get_states(tool_names, env_data)\n",
     "        env_data.update(states)\n",
-    "        \n",
+    "\n",
     "        return env_data\n",
     "\n",
     "    def create_tools(self) -> list:\n",
     "        \"\"\"Create and convert tools to framework-specific format.\"\"\"\n",
     "        tools_list = []\n",
-    "        \n",
+    "\n",
     "        # Map tool names to their collection classes\n",
     "        tool_mapping = {\n",
     "            \"email\": (EmailToolCollection, lambda: (self.state[\"email_state\"],)),\n",
@@ -499,17 +501,17 @@
     "            \"my_calendar_mcp\": (MCPCalendarToolCollection, lambda: (self.state[\"my_calendar_mcp_state\"],)),\n",
     "            \"other_calendar_mcp\": (MCPCalendarToolCollection, lambda: (self.state[\"other_calendar_mcp_state\"],)),\n",
     "        }\n",
-    "        \n",
+    "\n",
     "        for tool_name in self.state[\"tools\"]:\n",
     "            if tool_name in tool_mapping:\n",
     "                ToolClass, get_init_args = tool_mapping[tool_name]\n",
     "                tool_instance = ToolClass(*get_init_args())\n",
-    "                \n",
+    "\n",
     "                # Get base tools and convert to framework format\n",
     "                for base_tool in tool_instance.get_sub_tools():\n",
     "                    framework_tool = base_tool.to_smolagents()\n",
     "                    tools_list.append(framework_tool)\n",
-    "        \n",
+    "\n",
     "        return tools_list"
    ]
   },
@@ -534,7 +536,7 @@
    "source": [
     "print(f\"{config_0['task_description']}\")\n",
     "\n",
-    "for i, agent_spec in enumerate(config_0['agents'], 1):\n",
+    "for i, agent_spec in enumerate(config_0[\"agents\"], 1):\n",
     "    print(f\"{i}. {agent_spec['agent_name']} (ID: {agent_spec['agent_id']})\")\n",
     "    print(f\"   Tools: {agent_spec['tools'] if agent_spec['tools'] else 'None (delegates only)'}\")\n",
     "    print(f\"   Role: {agent_spec['agent_instruction'][:80]}...\")\n",
@@ -560,18 +562,20 @@
     "# Note: model_config is already set by load_benchmark_data()\n",
     "\n",
     "# Create environment from task data\n",
-    "environment_0 = FiveADayEnvironment({\n",
-    "    \"environment_data\": task_0.environment_data,\n",
-    "    \"query\": task_0.query,\n",
-    "    \"evaluation_data\": task_0.evaluation_data,\n",
-    "    \"metadata\": task_0.metadata,\n",
-    "})\n",
+    "environment_0 = FiveADayEnvironment(\n",
+    "    {\n",
+    "        \"environment_data\": task_0.environment_data,\n",
+    "        \"query\": task_0.query,\n",
+    "        \"evaluation_data\": task_0.evaluation_data,\n",
+    "        \"metadata\": task_0.metadata,\n",
+    "    }\n",
+    ")\n",
     "\n",
     "# Build agents using the build_agents function\n",
     "agents_to_run, agents_to_monitor = build_agents(config_0, environment_0)\n",
     "\n",
     "print(f\"\\nBuilt Agents for Task: {task_0.metadata['task_id']}\")\n",
-    "print(f\"{'='*60}\")\n",
+    "print(f\"{'=' * 60}\")\n",
     "print(f\"\\nAgents to run: {[agent.name for agent in agents_to_run]}\")\n",
     "print(f\"Agents to monitor: {list(agents_to_monitor.keys())}\")\n",
     "\n",
@@ -580,7 +584,7 @@
     "    print(f\"\\n  Agent: {agent.name}\")\n",
     "    # smolagents stores tools as a dict with string keys\n",
     "    print(f\"    Tools: {list(agent.tools.keys())}\")\n",
-    "    if hasattr(agent, 'managed_agents') and agent.managed_agents:\n",
+    "    if hasattr(agent, \"managed_agents\") and agent.managed_agents:\n",
     "        # managed_agents is also a dict with string keys\n",
     "        print(f\"    Managed agents: {list(agent.managed_agents.keys())}\")\n",
     "        for agent_name, managed in agent.managed_agents.items():\n",
@@ -623,25 +627,25 @@
     "            \"evaluation_data\": task.evaluation_data,\n",
     "            \"metadata\": task.metadata,\n",
     "        }\n",
-    "        \n",
+    "\n",
     "        environment = FiveADayEnvironment(task_data)\n",
-    "        \n",
+    "\n",
     "        # Register all tools for tracing\n",
     "        for tool_adapter in environment.get_tools():\n",
     "            tool_name = getattr(tool_adapter, \"name\", str(type(tool_adapter).__name__))\n",
     "            self.register(\"tools\", tool_name, tool_adapter)\n",
-    "        \n",
+    "\n",
     "        return environment\n",
     "\n",
     "    def setup_agents(\n",
     "        self, agent_data: Dict[str, Any], environment: Environment, task: Task, user=None\n",
     "    ) -> tuple[list[SmolAgentAdapter], Dict[str, SmolAgentAdapter]]:\n",
     "        \"\"\"Create multi-agent system with orchestrator and specialists.\"\"\"\n",
     "        agents_to_run, agents_to_monitor = build_agents(agent_data, environment)\n",
-    "        \n",
+    "\n",
     "        # Create adapters for the primary agent(s) to run\n",
     "        adapters_to_run = [SmolAgentAdapter(agent, agent.name) for agent in agents_to_run]\n",
-    "        \n",
+    "\n",
     "        # This ensures all agent traces are collected by the benchmark\n",
     "        all_agents = {agent.name: agent for agent in agents_to_run} | agents_to_monitor\n",
     "        adapters_to_monitor = {name: SmolAgentAdapter(agent, name) for name, agent in all_agents.items()}\n",
@@ -651,12 +655,12 @@
     "        \"\"\"Create evaluators based on task's evaluation criteria.\"\"\"\n",
     "        if not task.evaluation_data[\"evaluators\"]:\n",
     "            return []\n",
-    "        \n",
+    "\n",
     "        evaluator_instances = []\n",
     "        for name in task.evaluation_data[\"evaluators\"]:\n",
     "            evaluator_class = getattr(evaluators, name)\n",
     "            evaluator_instances.append(evaluator_class(task, environment, user))\n",
-    "        \n",
+    "\n",
     "        return evaluator_instances\n",
     "\n",
     "    def run_agents(self, agents: Sequence[AgentAdapter], task: Task, environment: Environment) -> Sequence[Any]:\n",
@@ -741,7 +745,7 @@
     "    fail_on_evaluation_error=True,\n",
     ")\n",
     "\n",
-    "results = benchmark.run(tasks=tasks)\n"
+    "results = benchmark.run(tasks=tasks)"
    ]
   },
   {
@@ -764,26 +768,26 @@
     "console = Console()\n",
     "\n",
     "for task in results[:2]:\n",
-    "    task_id = task['task_id']\n",
+    "    task_id = task[\"task_id\"]\n",
     "    print(\"=\" * 60)\n",
     "    print(f\"Results for Task ID: {task_id}\")\n",
     "    print(\"=\" * 60)\n",
-    "    traces = task['traces']\n",
-    "    agent_traces = traces['agents']\n",
+    "    traces = task[\"traces\"]\n",
+    "    agent_traces = traces[\"agents\"]\n",
     "    print(f\"Traces available for agents: {list(agent_traces.keys())}\")\n",
     "    orchestrator_name = list(traces[\"agents\"].keys())[0]\n",
     "    print(f\"Last 5 messages for '{orchestrator_name}'\")\n",
     "    print(traces[\"agents\"].keys())\n",
     "    messages = traces[\"agents\"][orchestrator_name][\"messages\"]\n",
     "    for msg in messages[-5:]:\n",
     "        role = msg.get(\"role\", \"unknown\")\n",
-    "        content = msg.get(\"content\", [])[0].get(\"text\", '')\n",
+    "        content = msg.get(\"content\", [])[0].get(\"text\", \"\")\n",
     "        panel = Panel.fit(\n",
     "            content,\n",
     "            title=f\" {role} \",\n",
     "            title_align=\"left\",\n",
     "        )\n",
-    "        console.print(panel)\n"
+    "        console.print(panel)"
    ]
   },
   {
@@ -795,13 +799,13 @@
    "source": [
     "# print results for first two tasks\n",
     "for task in results[:2]:\n",
-    "    task_id = task['task_id']\n",
+    "    task_id = task[\"task_id\"]\n",
     "    print(\"=\" * 60)\n",
     "    print(f\"Results for Task ID: {task_id}\")\n",
     "    print(\"=\" * 60)\n",
-    "    eval_results = task['eval']\n",
+    "    eval_results = task[\"eval\"]\n",
     "    for evals in eval_results:\n",
-    "        for k,v in evals.items():\n",
+    "        for k, v in evals.items():\n",
     "            print(f\"{k:<35} {v}\")"
    ]
   },