Skip to content

Commit b6af88f

Browse files
committed
fixed testing of marble
1 parent de674ce commit b6af88f

3 files changed

Lines changed: 60 additions & 15 deletions

File tree

tests/test_benchmarks/test_multiagentbench/conftest.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515

1616
import pytest
1717
from typing import Any, Dict, List, Optional, Sequence, Tuple
18-
from unittest.mock import MagicMock
18+
from unittest.mock import MagicMock, patch
1919

2020
from conftest import DummyModelAdapter
2121
from maseval import AgentAdapter, Task, MessageHistory
22+
from maseval.benchmark.multiagentbench.environment import MultiAgentBenchEnvironment
2223

2324

2425
# =============================================================================
@@ -44,6 +45,33 @@ def ensure_marble_data():
4445
return ensure_marble_exists(auto_download=True)
4546

4647

48+
@pytest.fixture(autouse=True)
49+
def _mock_marble_environment():
50+
"""Mock MARBLE environment creation for all Tier 1 (offline) tests.
51+
52+
MultiAgentBenchEnvironment.setup_state() calls _create_marble_environment()
53+
which imports marble — a vendored dependency not available in CI. This fixture
54+
replaces that method with a MagicMock so structural tests can exercise the full
55+
benchmark pipeline without marble installed.
56+
57+
Tests that need the real _create_marble_environment can override this fixture
58+
with a no-op at the class or file level (standard pytest fixture scoping).
59+
"""
60+
mock_env = MagicMock()
61+
mock_env.is_done.return_value = False
62+
mock_env.is_task_completed.return_value = False
63+
mock_env.get_state.return_value = {}
64+
mock_env._action_handlers = {}
65+
mock_env.action_handler_descriptions = {}
66+
67+
with patch.object(
68+
MultiAgentBenchEnvironment,
69+
"_create_marble_environment",
70+
return_value=mock_env,
71+
):
72+
yield
73+
74+
4775
# =============================================================================
4876
# Sample Task Data
4977
# =============================================================================

tests/test_benchmarks/test_multiagentbench/test_environment.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,19 +45,6 @@ def test_init_with_bargaining_task(self, sample_bargaining_task_data: Dict[str,
4545

4646
assert env.domain == "bargaining"
4747

48-
def test_init_raises_without_marble(self, sample_research_task_data: Dict[str, Any]):
49-
"""Constructor should raise ImportError when MARBLE is not available."""
50-
marble_modules = {k: v for k, v in sys.modules.items() if "marble" in k}
51-
for module_name in marble_modules:
52-
sys.modules.pop(module_name, None)
53-
54-
try:
55-
with patch.dict("sys.modules", {"marble.environments.base_env": None}):
56-
with pytest.raises(ImportError, match="MARBLE is not available"):
57-
MultiAgentBenchEnvironment(task_data=sample_research_task_data)
58-
finally:
59-
sys.modules.update(marble_modules)
60-
6148
def test_setup_state_extracts_domain(self, sample_research_task_data: Dict[str, Any]):
6249
"""setup_state should extract domain from task data."""
6350
env = MultiAgentBenchEnvironment(task_data=sample_research_task_data)
@@ -102,8 +89,31 @@ def test_gather_config_includes_domain(self, sample_research_task_data: Dict[str
10289
assert config["domain"] == "research"
10390
assert "tool_descriptions" in config
10491

92+
93+
class TestMultiAgentBenchEnvironmentRealMarble:
94+
"""Tests that need the real _create_marble_environment (no mock)."""
95+
96+
@pytest.fixture(autouse=True)
97+
def _mock_marble_environment(self):
98+
"""Override: let these tests use real marble imports."""
99+
yield
100+
101+
def test_init_raises_without_marble(self, sample_research_task_data: Dict[str, Any]):
102+
"""Constructor should raise ImportError when MARBLE is not available."""
103+
marble_modules = {k: v for k, v in sys.modules.items() if "marble" in k}
104+
for module_name in marble_modules:
105+
sys.modules.pop(module_name, None)
106+
107+
try:
108+
with patch.dict("sys.modules", {"marble.environments.base_env": None}):
109+
with pytest.raises(ImportError, match="MARBLE is not available"):
110+
MultiAgentBenchEnvironment(task_data=sample_research_task_data)
111+
finally:
112+
sys.modules.update(marble_modules)
113+
114+
@pytest.mark.live
105115
def test_marble_env_type_in_state(self, sample_research_task_data: Dict[str, Any]):
106-
"""setup_state should include MARBLE env type."""
116+
"""setup_state should include MARBLE env type (needs real marble)."""
107117
env = MultiAgentBenchEnvironment(task_data=sample_research_task_data)
108118

109119
assert env.state["marble_env_type"] == "ResearchEnvironment"

tests/test_benchmarks/test_multiagentbench/test_integration_real_data.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@
2626

2727
pytestmark = [pytest.mark.live, pytest.mark.slow, pytest.mark.benchmark]
2828

29+
30+
@pytest.fixture(autouse=True)
31+
def _mock_marble_environment():
32+
"""Override: integration tests use real marble."""
33+
yield
34+
35+
2936
# Domains that can be tested without external infrastructure (Docker, Minecraft Server)
3037
NON_INFRA_DOMAINS = sorted(VALID_DOMAINS - INFRASTRUCTURE_DOMAINS - {"minecraft", "werewolf"})
3138

0 commit comments

Comments
 (0)