parameterlab
diff --git a/‎docs/getting-started/quickstart.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/getting-started/quickstart.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/reference/task.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/reference/task.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/five_a_day_benchmark/five_a_day_benchmark.ipynb‎
Lines changed: 4 additions & 4 deletions b/‎examples/five_a_day_benchmark/five_a_day_benchmark.ipynb‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/five_a_day_benchmark/five_a_day_benchmark.py‎
Lines changed: 4 additions & 4 deletions b/‎examples/five_a_day_benchmark/five_a_day_benchmark.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/introduction/tutorial.ipynb‎
Lines changed: 8 additions & 8 deletions b/‎examples/introduction/tutorial.ipynb‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎maseval/__init__.py‎
Lines changed: 15 additions & 6 deletions b/‎maseval/__init__.py‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎maseval/benchmark/macs/data_loader.py‎
Lines changed: 8 additions & 8 deletions b/‎maseval/benchmark/macs/data_loader.py‎
Lines changed: 8 additions & 8 deletions
@@ -117,7 +117,7 @@ Once implemented, run your benchmark:
 
 ```python
 # Define your tasks
-tasks = TaskCollection([Task(query="...", expected="..."), ...])
+tasks = TaskQueue([Task(query="..."), ...])
 
 # Configure your agents (e.g., model parameters, tool settings)
 agent_config = {"model": "gpt-4", "temperature": 0.7}
 
@@ -1,9 +1,9 @@
 # Task
 
-Tasks define individual benchmark scenarios including inputs, expected outputs, and any metadata needed for evaluation. TaskCollections group related tasks together.
+Tasks define individual benchmark scenarios including inputs, expected outputs, and any metadata needed for evaluation. TaskQueues group related tasks together.
 
 [:material-github: View source](https://github.com/parameterlab/maseval/blob/main/maseval/core/task.py){ .md-source-file }
 
 ::: maseval.core.task.Task
 
-::: maseval.core.task.TaskCollection
+::: maseval.core.task.TaskQueue
@@ -124,7 +124,7 @@
     "from smolagents import ToolCallingAgent, LiteLLMModel, FinalAnswerTool\n",
     "\n",
     "# MASEval core components\n",
-    "from maseval import Benchmark, Environment, Task, TaskCollection, AgentAdapter, Evaluator, ModelAdapter\n",
+    "from maseval import Benchmark, Environment, Task, TaskQueue, AgentAdapter, Evaluator, ModelAdapter\n",
     "from maseval.interface.agents.smolagents import SmolAgentAdapter\n",
     "\n",
     "# Import evaluators module (dynamically loaded later)\n",
@@ -139,7 +139,7 @@
     "    limit: int | None = None,\n",
     "    seed: int | None = None,\n",
     "    task_indices: list[int] | None = None,\n",
-    ") -> tuple[TaskCollection, list[Dict[str, Any]]]:\n",
+    ") -> tuple[TaskQueue, list[Dict[str, Any]]]:\n",
     "    \"\"\"Load tasks and agent configurations.\n",
     "\n",
     "    Args:\n",
@@ -152,7 +152,7 @@
     "        task_indices: Optional list of task indices to load (e.g., [0, 2, 4])\n",
     "\n",
     "    Returns:\n",
-    "        Tuple of (TaskCollection, list of agent configs)\n",
+    "        Tuple of (TaskQueue, list of agent configs)\n",
     "    \"\"\"\n",
     "    data_dir = Path(\"examples/five_a_day_benchmark/data\")\n",
     "\n",
@@ -199,7 +199,7 @@
     "\n",
     "        configs_data.append(config)\n",
     "\n",
-    "    return TaskCollection(tasks_data), configs_data"
+    "    return TaskQueue(tasks_data), configs_data"
    ]
   },
   {
 
@@ -26,7 +26,7 @@
 
 from utils import derive_seed, sanitize_name  # type: ignore[unresolved-import]
 
-from maseval import Benchmark, Environment, Evaluator, Task, TaskCollection, AgentAdapter, ModelAdapter
+from maseval import Benchmark, Environment, Evaluator, Task, TaskQueue, AgentAdapter, ModelAdapter
 from maseval.core.callbacks.result_logger import FileResultLogger
 
 # Import tool implementations
@@ -825,7 +825,7 @@ def load_benchmark_data(
     limit: Optional[int] = None,
     specific_task: Optional[int] = None,
     seed: Optional[int] = None,
-) -> tuple[TaskCollection, List[Dict[str, Any]]]:
+) -> tuple[TaskQueue, List[Dict[str, Any]]]:
     """Load tasks and agent configurations with validation.
 
     Args:
@@ -838,7 +838,7 @@ def load_benchmark_data(
         seed: Base random seed for reproducibility (None for non-deterministic)
 
     Returns:
-        Tuple of (TaskCollection, agent_configs_list)
+        Tuple of (TaskQueue, agent_configs_list)
     """
     if limit is not None and specific_task is not None:
         raise ValueError("Cannot specify both limit and specific_task")
@@ -896,7 +896,7 @@ def load_benchmark_data(
 
     print(f"Loaded {len(tasks_data)} tasks and {len(configs_data)} agent configs\n")
 
-    return TaskCollection(tasks_data), configs_data
+    return TaskQueue(tasks_data), configs_data
 
 
 # ============================================================================
 
@@ -330,7 +330,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from maseval import Benchmark, Environment, Evaluator, Task, TaskCollection\n",
+    "from maseval import Benchmark, Environment, Evaluator, Task, TaskQueue\n",
     "from maseval.interface.agents.smolagents import SmolAgentAdapter\n",
     "\n",
     "print(\"MASEval components imported successfully!\")"
@@ -634,13 +634,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Create benchmark instance with agent configuration\n",
-    "agent_data = {\"model_id\": \"gemini/gemini-2.5-flash\", \"temperature\": 0.7}\n",
-    "\n",
-    "benchmark = SimpleBenchmark(agent_data=agent_data, progress_bar=False)\n",
-    "\n",
-    "# Create task collection\n",
-    "tasks = TaskCollection([task])\n",
+    "\"# Create benchmark instance with agent configuration\\n\",\n",
+    "    \"agent_data = {\\\"model_id\\\": \\\"gemini/gemini-2.5-flash\\\", \\\"temperature\\\": 0.7}\\n\",\n",
+    "    \"\\n\",\n",
+    "    \"benchmark = SimpleBenchmark(agent_data=agent_data, progress_bar=False)\\n\",\n",
+    "    \"\\n\",\n",
+    "    \"# Create task queue\\n\",\n",
+    "    \"tasks = TaskQueue([task])\\n\",\n",
     "\n",
     "# Run the benchmark\n",
     "print(\"Running benchmark...\\n\")\n",
 
@@ -8,7 +8,17 @@
 Benchmarks sit in the `maseval.benchmark` submodule.
 """
 
-from .core.task import Task, TaskCollection, TaskProtocol, TimeoutAction
+from .core.task import (
+    Task,
+    TaskProtocol,
+    TimeoutAction,
+    # Task queue classes
+    BaseTaskQueue,
+    TaskQueue,
+    SequentialTaskQueue,
+    PriorityTaskQueue,
+    AdaptiveTaskQueue,
+)
 from .core.environment import Environment
 from .core.agent import AgentAdapter
 from .core.benchmark import Benchmark, TaskExecutionStatus
@@ -29,7 +39,6 @@
 from .core.tracing import TraceableMixin
 from .core.registry import ComponentRegistry
 from .core.context import TaskContext
-from .core.queue import TaskQueue, SequentialQueue, PriorityQueue, AdaptiveQueue
 from .core.exceptions import (
     MASEvalError,
     AgentError,
@@ -45,7 +54,6 @@
 __all__ = [
     # Tasks
     "Task",
-    "TaskCollection",
     "TaskProtocol",
     "TimeoutAction",
     # Core abstractions
@@ -79,10 +87,11 @@
     "ComponentRegistry",
     "TaskContext",
     # Task queues
+    "BaseTaskQueue",
     "TaskQueue",
-    "SequentialQueue",
-    "PriorityQueue",
-    "AdaptiveQueue",
+    "SequentialTaskQueue",
+    "PriorityTaskQueue",
+    "AdaptiveTaskQueue",
     # Exceptions and validation
     "MASEvalError",
     "AgentError",
 
@@ -15,7 +15,7 @@
 from urllib.error import HTTPError, URLError
 from urllib.request import urlopen
 
-from maseval import Task, TaskCollection
+from maseval import Task, TaskQueue
 
 
 # =============================================================================
@@ -422,7 +422,7 @@ def load_tasks(
     domain: str,
     data_dir: Optional[Path] = None,
     limit: Optional[int] = None,
-) -> TaskCollection:
+) -> TaskQueue:
     """Load tasks for a MACS domain.
 
     Args:
@@ -432,7 +432,7 @@ def load_tasks(
         limit: Maximum number of tasks to load
 
     Returns:
-        TaskCollection containing Task objects
+        TaskQueue containing Task objects
 
     Raises:
         ValueError: If domain is not valid
@@ -465,7 +465,7 @@ def load_tasks(
             )
         )
 
-    return TaskCollection(tasks)
+    return TaskQueue(tasks)
 
 
 def load_agent_config(
@@ -503,12 +503,12 @@ def load_agent_config(
 
 
 def configure_model_ids(
-    tasks: Union[TaskCollection, List[Task]],
+    tasks: Union[TaskQueue, List[Task]],
     *,
     tool_model_id: Optional[str] = None,
     user_model_id: Optional[str] = None,
     evaluator_model_id: Optional[str] = None,
-) -> Union[TaskCollection, List[Task]]:
+) -> Union[TaskQueue, List[Task]]:
     """Configure model IDs for benchmark components in task data.
 
     This helper merges runtime model configuration into task data structures,
@@ -519,13 +519,13 @@ def configure_model_ids(
     task-specific overrides in the original data to take precedence.
 
     Args:
-        tasks: TaskCollection or list of Tasks to configure
+        tasks: TaskQueue or list of Tasks to configure
         tool_model_id: Model ID for tool simulators (stored in environment_data)
         user_model_id: Model ID for user simulator (stored in user_data)
         evaluator_model_id: Model ID for evaluators (stored in evaluation_data)
 
     Returns:
-        The same collection (mutated in place for convenience)
+        The same queue or list (mutated in place for convenience)
 
     Example:
         ```python