44from browsergym .workarena import get_all_tasks_agents
55from browsergym .workarena .instance import SNowInstance
66from pydantic import ConfigDict
7- from ray .cloudpickle import instance
87
98from agentlab .actions import ToolsActionSet
109from agentlab .backends .browser .base import BrowserBackend
@@ -22,6 +21,7 @@ class WorkArenaBenchmark(AbstractBenchmark):
2221 backend_cls : type [BrowserBackend ]
2322 name : str = "workarena"
2423 level : str = "l1"
24+ n_seeds : int = 1
2525 env_args_list : list [BrowserEnvArgs ] = None # type: ignore
2626 dataset : list [WorkarenaTask ] = None # type: ignore
2727 is_multi_tab : bool = False
@@ -33,16 +33,15 @@ def model_post_init(self, __context: Any) -> None:
3333 self ._snow_instance = SNowInstance ()
3434 self .env_args_list = []
3535 if self .dataset is None :
36- task_seed_tuples = get_all_tasks_agents (filter = self .level )
37- self .dataset = self .load_tasks (task_seed_tuples , self .level )
36+ self .dataset = self .load_tasks (self .level )
3837 for task in self .dataset :
3938 env_args = BrowserEnvArgs (task = task , backend_cls = self .backend_cls )
4039 self .env_args_list .append (env_args )
4140 logger .info (f"Loaded { len (self .env_args_list )} workarena tasks" )
4241
43- def load_tasks (self , task_seed_tuples : list [tuple [type , int ]], level : str ) -> list [WorkarenaTask ]:
42+ def load_tasks (self , level : str ) -> list [WorkarenaTask ]:
43+ task_seed_tuples = get_all_tasks_agents (filter = self .level , n_seed_l1 = self .n_seeds )
4444 tasks = []
45-
4645 for task_cls , seed in task_seed_tuples :
4746 task = WorkarenaTask (
4847 url = "" ,
@@ -53,4 +52,5 @@ def load_tasks(self, task_seed_tuples: list[tuple[type, int]], level: str) -> li
5352 seed = seed ,
5453 )
5554 tasks .append (task )
55+ logger .info (f"Loaded { len (tasks )} tasks for level { level } " )
5656 return tasks
0 commit comments