Skip to content

Commit ba6bdb3

Browse files
committed
updated more instances of seeding pattern
1 parent f160fd3 commit ba6bdb3

13 files changed

Lines changed: 114 additions & 92 deletions

File tree

docs/guides/seeding.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,39 @@ results = benchmark.run(tasks, agent_data=config)
3838

3939
This creates a `DefaultSeedGenerator` internally and passes it to all setup methods.
4040

41+
### Disabling Seeding
42+
43+
When you don't pass a `seed` parameter (or pass `seed=None`), seeding is disabled:
44+
45+
```python
46+
# No seed = seeding disabled
47+
benchmark = MyBenchmark()
48+
49+
# Explicit None = also disabled
50+
benchmark = MyBenchmark(seed=None)
51+
```
52+
53+
When seeding is disabled:
54+
55+
1. A `DefaultSeedGenerator(global_seed=None)` is still created internally
56+
2. Setup methods still receive a `seed_generator` parameter (it's never `None`)
57+
3. `derive_seed()` returns `None` instead of an integer
58+
4. This `None` flows directly to model adapters (which accept `Optional[int]`)
59+
60+
This design simplifies setup method implementations—you don't need `if seed_generator is not None:` checks:
61+
62+
```python
63+
def setup_agents(self, agent_data, environment, task, user, seed_generator):
64+
# Always works - seed_generator is never None
65+
agent_gen = seed_generator.child("agents")
66+
67+
# Returns None if seeding disabled, int if enabled
68+
agent_seed = agent_gen.derive_seed("orchestrator")
69+
70+
# Model adapters accept Optional[int], so None works fine
71+
model = self.get_model_adapter(model_id, seed=agent_seed)
72+
```
73+
4174
### Using Seeds in Setup Methods
4275

4376
All setup methods receive a `seed_generator` parameter. Use it to derive seeds for your components. When seeding is disabled (no `seed` passed to benchmark), `derive_seed()` returns `None`:

examples/five_a_day_benchmark/five_a_day_benchmark.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@
523523
"id": "70c66cd0",
524524
"metadata": {},
525525
"outputs": [],
526-
"source": "class FiveADayBenchmark(Benchmark):\n \"\"\"5-A-Day benchmark with multi-agent support.\"\"\"\n\n def setup_environment(self, agent_data: Dict[str, Any], task: Task, seed_generator: Optional[SeedGenerator] = None) -> Environment:\n \"\"\"Create environment from task data.\"\"\"\n task_data = {\n \"environment_data\": task.environment_data,\n \"query\": task.query,\n \"evaluation_data\": task.evaluation_data,\n \"metadata\": task.metadata,\n }\n\n environment = FiveADayEnvironment(task_data)\n\n # Register all tools for tracing\n for tool_name, tool_adapter in environment.get_tools().items():\n self.register(\"tools\", tool_name, tool_adapter)\n\n return environment\n\n def setup_agents(\n self,\n agent_data: Dict[str, Any],\n environment: Environment,\n task: Task,\n user=None,\n seed_generator: Optional[SeedGenerator] = None,\n ) -> tuple[list[SmolAgentAdapter], Dict[str, SmolAgentAdapter]]:\n \"\"\"Create multi-agent system with orchestrator and specialists.\n\n If seed_generator is provided, seeds are derived for each agent\n using the benchmark's seeding system with hierarchical paths.\n \"\"\"\n # Build seeds dict if seed_generator is available\n # Use child(\"agents\") to create logical paths like \"agents/primary_agent\"\n seeds = None\n if seed_generator is not None:\n agent_gen = seed_generator.child(\"agents\")\n seeds = {}\n for agent_spec in agent_data[\"agents\"]:\n seeds[agent_spec[\"agent_id\"]] = agent_gen.derive_seed(agent_spec[\"agent_id\"])\n\n agents_to_run, agents_to_monitor = build_agents(agent_data, environment, seeds)\n\n # Create adapters for the primary agent(s) to run\n adapters_to_run = [SmolAgentAdapter(agent, agent.name) for agent in agents_to_run]\n\n # This ensures all agent traces are collected by the benchmark\n all_agents = {agent.name: agent for agent in agents_to_run} | agents_to_monitor\n adapters_to_monitor = {name: SmolAgentAdapter(agent, name) for name, agent in all_agents.items()}\n return adapters_to_run, adapters_to_monitor\n\n def setup_evaluators(self, environment, task, agents, user, seed_generator: Optional[SeedGenerator] = None) -> Sequence[Evaluator]:\n \"\"\"Create evaluators based on task's evaluation criteria.\"\"\"\n if not task.evaluation_data[\"evaluators\"]:\n return []\n\n evaluator_instances = []\n for name in task.evaluation_data[\"evaluators\"]:\n evaluator_class = getattr(evaluators, name)\n evaluator_instances.append(evaluator_class(task, environment, user))\n\n return evaluator_instances\n\n def run_agents(self, agents: Sequence[AgentAdapter], task: Task, environment: Environment, query: str) -> Sequence[Any]:\n \"\"\"Execute agents and return their final answers.\"\"\"\n answers = [agent.run(query) for agent in agents]\n return answers\n\n def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter:\n \"\"\"Return a model adapter for benchmark components that need LLM access.\n\n This benchmark doesn't use simulated tools, user simulators, or LLM judges,\n so this method is not called during execution.\n \"\"\"\n raise NotImplementedError(\"This benchmark doesn't use model adapters for tools/users/evaluators.\")\n\n def evaluate(\n self,\n evaluators: Sequence[Evaluator],\n agents: Dict[str, AgentAdapter],\n final_answer: Any,\n traces: Dict[str, Any],\n ) -> list[Dict[str, Any]]:\n \"\"\"Evaluate agent performance.\"\"\"\n results = []\n for evaluator in evaluators:\n filtered_traces = evaluator.filter_traces(traces)\n results.append(evaluator(filtered_traces, final_answer))\n return results"
526+
"source": "class FiveADayBenchmark(Benchmark):\n \"\"\"5-A-Day benchmark with multi-agent support.\"\"\"\n\n def setup_environment(self, agent_data: Dict[str, Any], task: Task, seed_generator: SeedGenerator) -> Environment:\n \"\"\"Create environment from task data.\"\"\"\n task_data = {\n \"environment_data\": task.environment_data,\n \"query\": task.query,\n \"evaluation_data\": task.evaluation_data,\n \"metadata\": task.metadata,\n }\n\n environment = FiveADayEnvironment(task_data)\n\n # Register all tools for tracing\n for tool_name, tool_adapter in environment.get_tools().items():\n self.register(\"tools\", tool_name, tool_adapter)\n\n return environment\n\n def setup_agents(\n self,\n agent_data: Dict[str, Any],\n environment: Environment,\n task: Task,\n user,\n seed_generator: SeedGenerator,\n ) -> tuple[list[SmolAgentAdapter], Dict[str, SmolAgentAdapter]]:\n \"\"\"Create multi-agent system with orchestrator and specialists.\n\n Seeds are derived for each agent using the benchmark's seeding system\n with hierarchical paths. derive_seed() returns None if seeding is disabled.\n \"\"\"\n # Build seeds dict using seed_generator\n # Use child(\"agents\") to create logical paths like \"agents/primary_agent\"\n agent_gen = seed_generator.child(\"agents\")\n seeds = {}\n for agent_spec in agent_data[\"agents\"]:\n seeds[agent_spec[\"agent_id\"]] = agent_gen.derive_seed(agent_spec[\"agent_id\"])\n\n agents_to_run, agents_to_monitor = build_agents(agent_data, environment, seeds)\n\n # Create adapters for the primary agent(s) to run\n adapters_to_run = [SmolAgentAdapter(agent, agent.name) for agent in agents_to_run]\n\n # This ensures all agent traces are collected by the benchmark\n all_agents = {agent.name: agent for agent in agents_to_run} | agents_to_monitor\n adapters_to_monitor = {name: SmolAgentAdapter(agent, name) for name, agent in all_agents.items()}\n return adapters_to_run, adapters_to_monitor\n\n def setup_evaluators(self, environment, task, agents, user, seed_generator: SeedGenerator) -> Sequence[Evaluator]:\n \"\"\"Create evaluators based on task's evaluation criteria.\"\"\"\n if not task.evaluation_data[\"evaluators\"]:\n return []\n\n evaluator_instances = []\n for name in task.evaluation_data[\"evaluators\"]:\n evaluator_class = getattr(evaluators, name)\n evaluator_instances.append(evaluator_class(task, environment, user))\n\n return evaluator_instances\n\n def run_agents(self, agents: Sequence[AgentAdapter], task: Task, environment: Environment, query: str) -> Sequence[Any]:\n \"\"\"Execute agents and return their final answers.\"\"\"\n answers = [agent.run(query) for agent in agents]\n return answers\n\n def get_model_adapter(self, model_id: str, **kwargs) -> ModelAdapter:\n \"\"\"Return a model adapter for benchmark components that need LLM access.\n\n This benchmark doesn't use simulated tools, user simulators, or LLM judges,\n so this method is not called during execution.\n \"\"\"\n raise NotImplementedError(\"This benchmark doesn't use model adapters for tools/users/evaluators.\")\n\n def evaluate(\n self,\n evaluators: Sequence[Evaluator],\n agents: Dict[str, AgentAdapter],\n final_answer: Any,\n traces: Dict[str, Any],\n ) -> list[Dict[str, Any]]:\n \"\"\"Evaluate agent performance.\"\"\"\n results = []\n for evaluator in evaluators:\n filtered_traces = evaluator.filter_traces(traces)\n results.append(evaluator(filtered_traces, final_answer))\n return results"
527527
},
528528
{
529529
"cell_type": "markdown",

examples/five_a_day_benchmark/five_a_day_benchmark.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,7 @@ class FiveADayBenchmark(Benchmark):
729729
Supports single-agent and multi-agent (orchestrator+specialist) configurations.
730730
"""
731731

732-
def setup_environment(self, agent_data: Dict[str, Any], task: Task, seed_generator: Optional[SeedGenerator] = None) -> Environment:
732+
def setup_environment(self, agent_data: Dict[str, Any], task: Task, seed_generator: SeedGenerator) -> Environment:
733733
"""Create environment from task data."""
734734
# Pass full task data to environment
735735
task_data = {
@@ -753,8 +753,8 @@ def setup_agents(
753753
agent_data: Dict[str, Any],
754754
environment: Environment,
755755
task: Task,
756-
user=None,
757-
seed_generator: Optional[SeedGenerator] = None,
756+
user,
757+
seed_generator: SeedGenerator,
758758
) -> tuple[List[AgentAdapter], Dict[str, AgentAdapter]]:
759759
"""Create framework-specific agent with tools from environment.
760760
@@ -775,14 +775,13 @@ def setup_agents(
775775
primary_spec = next(a for a in agents_specs if a["agent_id"] == primary_agent_id)
776776
specialist_specs = [a for a in agents_specs if a["agent_id"] != primary_agent_id]
777777

778-
# Derive seeds for agents using seed_generator if available
778+
# Derive seeds for agents using seed_generator
779779
# Use child("agents") to create logical paths like "agents/primary_agent"
780-
seeds = None
781-
if seed_generator is not None:
782-
agent_gen = seed_generator.child("agents")
783-
seeds = {primary_spec["agent_id"]: agent_gen.derive_seed(primary_spec["agent_id"])}
784-
for spec in specialist_specs:
785-
seeds[spec["agent_id"]] = agent_gen.derive_seed(spec["agent_id"])
780+
# derive_seed() returns None if seeding is disabled
781+
agent_gen = seed_generator.child("agents")
782+
seeds = {primary_spec["agent_id"]: agent_gen.derive_seed(primary_spec["agent_id"])}
783+
for spec in specialist_specs:
784+
seeds[spec["agent_id"]] = agent_gen.derive_seed(spec["agent_id"])
786785

787786
# Build agent using unified interface - now returns (primary_adapter, all_adapters_dict)
788787
builder = get_agent_builder(framework, agent_type)
@@ -791,7 +790,7 @@ def setup_agents(
791790
# Return primary adapter to run, and all adapters for trace registration
792791
return [primary_adapter], all_adapters_dict
793792

794-
def setup_evaluators(self, environment, task, agents, user, seed_generator: Optional[SeedGenerator] = None) -> Sequence[Evaluator]:
793+
def setup_evaluators(self, environment, task, agents, user, seed_generator: SeedGenerator) -> Sequence[Evaluator]:
795794
"""Create evaluators based on task's evaluation_data.evaluators list."""
796795
if not task.evaluation_data["evaluators"]:
797796
return []

examples/macs_benchmark/macs_benchmark.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def setup_user(
179179
agent_data: Dict[str, Any],
180180
environment: Environment,
181181
task: Task,
182-
seed_generator: Optional[SeedGenerator] = None,
182+
seed_generator: SeedGenerator,
183183
) -> SmolagentsMACSUser:
184184
"""Create smolagents-compatible user simulator.
185185
@@ -210,7 +210,7 @@ def setup_agents(
210210
environment: MACSEnvironment, # type: ignore[override]
211211
task: Task,
212212
user: Optional[User],
213-
seed_generator: Optional[SeedGenerator] = None,
213+
seed_generator: SeedGenerator,
214214
) -> Tuple[List[AgentAdapter], Dict[str, AgentAdapter]]:
215215
"""Create smolagents multi-agent hierarchy.
216216
@@ -435,7 +435,7 @@ def setup_user(
435435
agent_data: Dict[str, Any],
436436
environment: Environment,
437437
task: Task,
438-
seed_generator: Optional[SeedGenerator] = None,
438+
seed_generator: SeedGenerator,
439439
) -> LangGraphMACSUser:
440440
"""Create langgraph-compatible user simulator.
441441
@@ -466,7 +466,7 @@ def setup_agents(
466466
environment: MACSEnvironment, # type: ignore[override]
467467
task: Task,
468468
user: Optional[User],
469-
seed_generator: Optional[SeedGenerator] = None,
469+
seed_generator: SeedGenerator,
470470
) -> Tuple[List[AgentAdapter], Dict[str, AgentAdapter]]:
471471
"""Create langgraph multi-agent hierarchy.
472472

examples/tau2_benchmark/tau2_benchmark.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def setup_user(
173173
agent_data: Dict[str, Any],
174174
environment: Tau2Environment,
175175
task: Task,
176-
seed_generator: Optional[SeedGenerator] = None,
176+
seed_generator: SeedGenerator,
177177
) -> DefaultTau2User:
178178
"""Create user simulator with tool support for default agent."""
179179
user_data = task.user_data
@@ -213,7 +213,7 @@ def setup_agents(
213213
environment: Tau2Environment,
214214
task: Task,
215215
user: Optional[DefaultTau2User],
216-
seed_generator: Optional[SeedGenerator] = None,
216+
seed_generator: SeedGenerator,
217217
):
218218
"""Create the default agent with user tool support."""
219219
agents_to_run, agents_dict = super().setup_agents(agent_data, environment, task, user, seed_generator)
@@ -248,7 +248,7 @@ def setup_user(
248248
agent_data: Dict[str, Any],
249249
environment: Tau2Environment,
250250
task: Task,
251-
seed_generator: Optional[SeedGenerator] = None,
251+
seed_generator: SeedGenerator,
252252
) -> DefaultTau2User:
253253
"""Create user simulator with tool support for default agent."""
254254
user_data = task.user_data
@@ -288,7 +288,7 @@ def setup_agents(
288288
environment: Tau2Environment,
289289
task: Task,
290290
user: Optional[DefaultTau2User],
291-
seed_generator: Optional[SeedGenerator] = None,
291+
seed_generator: SeedGenerator,
292292
):
293293
"""Create the default agent with user tool support."""
294294
agents_to_run, agents_dict = super().setup_agents(agent_data, environment, task, user, seed_generator)
@@ -372,7 +372,7 @@ def setup_user(
372372
agent_data: Dict[str, Any],
373373
environment: Tau2Environment,
374374
task: Task,
375-
seed_generator: Optional[SeedGenerator] = None,
375+
seed_generator: SeedGenerator,
376376
) -> SmolagentsTau2User:
377377
"""Create smolagents-compatible user simulator."""
378378
user_data = task.user_data
@@ -411,7 +411,7 @@ def setup_agents(
411411
environment: Tau2Environment,
412412
task: Task,
413413
user: Optional[User],
414-
seed_generator: Optional[SeedGenerator] = None,
414+
seed_generator: SeedGenerator,
415415
) -> Tuple[List[AgentAdapter], Dict[str, AgentAdapter]]:
416416
"""Create smolagents customer service agent."""
417417
model_id = agent_data.get("model_id", "gemini-2.5-flash")
@@ -546,7 +546,7 @@ def setup_user(
546546
agent_data: Dict[str, Any],
547547
environment: Tau2Environment,
548548
task: Task,
549-
seed_generator: Optional[SeedGenerator] = None,
549+
seed_generator: SeedGenerator,
550550
) -> LangGraphTau2User:
551551
"""Create langgraph-compatible user simulator."""
552552
user_data = task.user_data
@@ -585,7 +585,7 @@ def setup_agents(
585585
environment: Tau2Environment,
586586
task: Task,
587587
user: Optional[User],
588-
seed_generator: Optional[SeedGenerator] = None,
588+
seed_generator: SeedGenerator,
589589
) -> Tuple[List[AgentAdapter], Dict[str, AgentAdapter]]:
590590
"""Create langgraph customer service agent."""
591591
model_id = agent_data.get("model_id", "gemini-2.5-flash")

0 commit comments

Comments
 (0)