fixed imports and pip speeds

xzrderek · xzrderek · commit 78d0cdb744f5 · 2025-10-02T01:50:02.000-07:00
diff --git a/eval_protocol/__init__.py b/eval_protocol/__init__.py
@@ -23,15 +23,6 @@
     test_mcp,
 )
 from .data_loader import DynamicDataLoader, InlineDataLoader
-
-# Try to import FireworksPolicy if available
-try:
-    from .mcp_env import FireworksPolicy
-
-    _FIREWORKS_AVAILABLE = True
-except (ImportError, AttributeError):
-    _FIREWORKS_AVAILABLE = False
-# Import submodules to make them available via eval_protocol.rewards, etc.
 from . import mcp, rewards
 from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata
 from .playback_policy import PlaybackPolicyBase
@@ -42,6 +33,13 @@
 from .pytest import evaluation_test, SingleTurnRolloutProcessor, RemoteRolloutProcessor
 from .pytest.parameterize import DefaultParameterIdGenerator
 
+from .types.remote_rollout_processor import (
+    InitRequest,
+    RolloutMetadata,
+    StatusResponse,
+    create_langfuse_config_tags,
+)
+
 try:
     from .adapters import OpenAIResponsesAdapter
 except ImportError:
@@ -62,14 +60,6 @@
 except ImportError:
     LangSmithAdapter = None
 
-# Remote server types
-from .types.remote_rollout_processor import (
-    InitRequest,
-    RolloutMetadata,
-    StatusResponse,
-    create_langfuse_config_tags,
-)
-
 warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
 
 __all__ = [
diff --git a/eval_protocol/adapters/huggingface.py b/eval_protocol/adapters/huggingface.py
@@ -13,12 +13,9 @@
 logger = logging.getLogger(__name__)
 
 try:
-    from datasets import Dataset, DatasetDict, load_dataset
-
-    DATASETS_AVAILABLE = True
+    from datasets import Dataset, DatasetDict, load_dataset  # pyright: ignore[reportAttributeAccessIssue]
 except ImportError:
-    DATASETS_AVAILABLE = False
-    logger.warning("HuggingFace datasets not installed. Install with: pip install 'eval-protocol[huggingface]'")
+    raise ImportError("HuggingFace datasets not installed. Install with: pip install 'eval-protocol[huggingface]'")
 
 # Type alias for transformation function
 TransformFunction = Callable[[Dict[str, Any]], Dict[str, Any]]
@@ -80,11 +77,6 @@ def __init__(
             revision: Optional dataset revision/commit hash
             **load_dataset_kwargs: Additional arguments to pass to load_dataset
         """
-        if not DATASETS_AVAILABLE:
-            raise ImportError(
-                "HuggingFace datasets not installed. Install with: pip install 'eval-protocol[huggingface]'"
-            )
-
         self.dataset_id = dataset_id
         self.transform_fn = transform_fn
         self.config_name = config_name
diff --git a/eval_protocol/execution/pipeline.py b/eval_protocol/execution/pipeline.py
@@ -12,7 +12,6 @@
 
 import aiohttp
 import hydra
-from datasets import Dataset, DatasetDict
 from hydra.errors import InstantiationException
 from omegaconf import DictConfig, OmegaConf
 
@@ -24,6 +23,14 @@
 from eval_protocol.utils.module_loader import load_function as load_reward_function
 from eval_protocol.utils.packaging_utils import install_requirements
 
+try:
+    from datasets import Dataset, DatasetDict  # pyright: ignore[reportAttributeAccessIssue]
+except ImportError:
+    raise ImportError(
+        "The 'datasets' package is required to use this function. "
+        "Please install it with 'pip install \"eval-protocol[huggingface]\"'"
+    )
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/eval_protocol/mcp/execution/manager.py b/eval_protocol/mcp/execution/manager.py
@@ -17,9 +17,6 @@
 import anyio
 from openai.types import CompletionUsage
 
-from vendor.tau2.data_model.message import AssistantMessage, UserMessage
-from vendor.tau2.user.user_simulator import UserSimulator
-
 from ...models import EvaluationRow, InputMetadata, Message, Status
 from ...types import TerminationReason, Trajectory, NonSkippableException
 
@@ -234,6 +231,10 @@ def extract_text_content(msg_dict):
 
             # If user simulation is enabled, initial message is from the simulated user
             if dataset_row.user_simulation and dataset_row.user_simulation.get("enabled", False):
+                # Lazy import vendor.tau2 - only load when user simulation is actually used
+                from vendor.tau2.data_model.message import AssistantMessage, UserMessage
+                from vendor.tau2.user.user_simulator import UserSimulator
+
                 user_simulator = UserSimulator(
                     instructions=dataset_row.user_simulation.get("system_prompt"),
                     llm=dataset_row.user_simulation.get("llm", "gpt-4.1"),
@@ -598,6 +599,9 @@ def _get_user_simulator_messages(self, conversation_history: List[Dict[str, Any]
         """
         Filter conversation history for user simulator and convert to tau2-bench format.
         """
+        # Lazy import vendor.tau2 types
+        from vendor.tau2.data_model.message import AssistantMessage, UserMessage
+
         tau2_messages = []
 
         for message in conversation_history:
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,32 +27,25 @@ dependencies = [
     "aiohttp",
     "mcp>=1.9.2",
     "PyYAML>=5.0",
-    # Pin minimum datasets to avoid pyarrow API mismatch (PyExtensionType removal in pyarrow>=21)
-    "datasets>=3.0.0",
-    "fsspec",
     "hydra-core>=1.3.2",
     "omegaconf>=2.3.0",
-    "gymnasium>=0.29.0",
     "httpx>=0.24.0",
     "anthropic>=0.59.0",
-    "ipykernel>=6.30.0",
-    "jupyter>=1.1.1",
+    "litellm<1.75.0",
+    "pytest>=6.0.0",
+    "pytest-asyncio>=0.21.0",
+    "peewee>=3.18.2",
+    "backoff>=2.2.0",
     # Dependencies for vendored tau2 package
     "toml>=0.10.0",
     "loguru>=0.6.0",
     "docstring-parser>=0.15",
     "rich>=12.0.0",
     "psutil>=5.8.0",
-    "litellm<1.75.0",
     "addict>=2.4.0",
     "deepdiff>=6.0.0",
-    "pandas>=1.5.0",
     "websockets>=15.0.1",
     "fastapi>=0.116.1",
-    "pytest>=6.0.0",
-    "pytest-asyncio>=0.21.0",
-    "peewee>=3.18.2",
-    "backoff>=2.2.0",
 ]
 
 [project.urls]
@@ -66,6 +59,7 @@ dev = [
     "werkzeug>=2.0.0",
     "ruff>=0.5.0",
     "transformers>=4.0.0",
+    "pandas>=1.5.0",
     "types-setuptools",
     "types-requests",
     "types-PyYAML",
@@ -109,12 +103,6 @@ huggingface = [
     "datasets>=3.0.0",
     "transformers>=4.0.0",
 ]
-adapters = [
-    "langfuse>=2.0.0",
-    # Keep in sync with core dependency to ensure compatibility with latest pyarrow
-    "datasets>=3.0.0",
-    "transformers>=4.0.0",
-]
 langsmith = [
     "langsmith>=0.1.86",
 ]
diff --git a/uv.lock b/uv.lock