Skip to content

Commit 59fc626

Browse files
committed
updated files to remove reference to agent wrappers
1 parent 5fc4cc5 commit 59fc626

17 files changed

Lines changed: 167 additions & 190 deletions

maseval/core/benchmark.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ def setup_environment(self, agent_data, task):
6464
6565
def setup_agents(self, agent_data, environment, task, user):
6666
agent = MyAgent(model=agent_data["model"])
67-
wrapper = AgentAdapter(agent, "agent")
68-
return [wrapper], {"agent": wrapper}
67+
agent_adapter = AgentAdapter(agent, "agent")
68+
return [agent_adapter], {"agent": agent_adapter}
6969
7070
def run_agents(self, agents, task, environment):
7171
return agents[0].run(task.query)
@@ -258,10 +258,10 @@ def setup_agents(self, agent_data, environment, task, user):
258258
259259
# Create agent (auto-registered when returned)
260260
agent = MyAgent(model=model)
261-
wrapper = AgentAdapter(agent, "agent1")
261+
agent_adapter = AgentAdapter(agent, "agent1")
262262
263263
# Environment and user are also auto-registered
264-
return [wrapper], {"agent1": wrapper}
264+
return [agent_adapter], {"agent1": agent_adapter}
265265
```
266266
267267
Traces and configs are automatically collected before evaluation via

maseval/core/callbacks/message_tracing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ class MessageTracingAgentCallback(AgentCallback):
3030
tracer = MessageTracingAgentCallback(include_metadata=True, verbose=True)
3131
3232
# Use with agent
33-
wrapper = MyAgentAdapter(agent, name="agent1", callbacks=[tracer])
34-
wrapper.run("What's the weather?")
33+
agent_adapter = MyAgentAdapter(agent, name="agent1", callbacks=[tracer])
34+
agent_adapter.run("What's the weather?")
3535
3636
# Access traced conversations
3737
for conversation in tracer.get_all_conversations():

maseval/interface/agents/langgraph.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import time
88
from datetime import datetime
9-
from typing import TYPE_CHECKING, Any, Dict, List
9+
from typing import TYPE_CHECKING, Any, Dict
1010

1111
from maseval import AgentAdapter, MessageHistory, User
1212

@@ -34,13 +34,13 @@ class LangGraphAgentAdapter(AgentAdapter):
3434
3535
Requires langgraph to be installed.
3636
37-
This wrapper converts LangChain/LangGraph message types to MASEval's
37+
This adapter converts LangChain/LangGraph message types to MASEval's
3838
OpenAI-compatible MessageHistory format. It preserves tool calls, tool
3939
responses, and multi-modal content.
4040
4141
LangGraph graphs can be stateless or stateful (with checkpointer). This
42-
wrapper supports both modes:
43-
- Stateless: Messages from invoke() result are cached in wrapper
42+
adapter supports both modes:
43+
- Stateless: Messages from invoke() result are cached in adapter
4444
- Stateful: Messages fetched from graph state if config/thread_id provided
4545
4646
Example:
@@ -52,17 +52,17 @@ class LangGraphAgentAdapter(AgentAdapter):
5252
graph = StateGraph(...)
5353
compiled_graph = graph.compile()
5454
55-
wrapper = LangGraphAgentAdapter(compiled_graph, "agent_name")
56-
result = wrapper.run("What's the weather?")
55+
agent_adapter = LangGraphAgentAdapter(compiled_graph, "agent_name")
56+
result = agent_adapter.run("What's the weather?")
5757
5858
# Access message history
59-
for msg in wrapper.get_messages():
59+
for msg in agent_adapter.get_messages():
6060
print(msg['role'], msg['content'])
6161
```
6262
"""
6363

6464
def __init__(self, agent_instance, name: str, callbacks=None, config=None):
65-
"""Initialize the LangGraph wrapper.
65+
"""Initialize the LangGraph adapter.
6666
6767
Args:
6868
agent_instance: Compiled LangGraph graph
@@ -193,7 +193,7 @@ def gather_config(self) -> dict[str, Any]:
193193
- gathered_at: ISO timestamp
194194
- name: Agent name
195195
- agent_type: CompiledGraph or similar
196-
- wrapper_type: LangGraphAgentAdapter
196+
- adapter_type: LangGraphAgentAdapter
197197
- callbacks: List of callback class names
198198
- has_checkpointer: Whether the graph has state persistence
199199
- config: LangGraph config dict (with sensitive data removed)
@@ -238,8 +238,6 @@ def gather_config(self) -> dict[str, Any]:
238238
return base_config
239239

240240
def _run_agent(self, query: str) -> Any:
241-
import time
242-
from datetime import datetime
243241

244242
_check_langgraph_installed()
245243
from langchain_core.messages import HumanMessage

maseval/interface/agents/smolagents.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class SmolAgentAdapter(AgentAdapter):
3131
3232
Requires smolagents to be installed.
3333
34-
This wrapper converts smolagents' internal message format to MASEval's
34+
This adapter converts smolagents' internal message format to MASEval's
3535
OpenAI-compatible MessageHistory format. It automatically tracks tool calls,
3636
tool responses, and agent reasoning.
3737
@@ -41,11 +41,11 @@ class SmolAgentAdapter(AgentAdapter):
4141
from smolagents import MultiStepAgent
4242
4343
agent = MultiStepAgent(...)
44-
wrapper = SmolAgentAdapter(agent)
45-
result = wrapper.run("What's the weather?")
44+
agent_adapter = SmolAgentAdapter(agent)
45+
result = agent_adapter.run("What's the weather?")
4646
4747
# Access message history
48-
for msg in wrapper.get_messages():
48+
for msg in agent_adapter.get_messages():
4949
print(msg['role'], msg['content'])
5050
```
5151
"""
@@ -290,7 +290,7 @@ def gather_config(self) -> dict[str, Any]:
290290
- gathered_at: ISO timestamp
291291
- name: Agent name
292292
- agent_type: Underlying agent class name
293-
- wrapper_type: SmolAgentAdapter
293+
- adapter_type: SmolAgentAdapter
294294
- callbacks: List of callback class names
295295
- smolagents_config: Full configuration from agent.to_dict() including:
296296
- model: Model configuration with class and parameters

maseval/interface/inference/google_genai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def _extract_text(self, response: Any) -> str:
4444
if "candidates" in response and response["candidates"]:
4545
return response["candidates"][0].get("content", "")
4646
if "output" in response and isinstance(response["output"], list) and response["output"]:
47-
# some wrappers return a list of text chunks
47+
# some implementations return a list of text chunks
4848
first = response["output"][0]
4949
if isinstance(first, dict):
5050
return first.get("content", "")

tests/TESTING_PLAN.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -393,15 +393,15 @@ Test files:
393393

394394
**Smolagents (10 tests):**
395395

396-
- Wrapper/user creation and import guards
396+
- Adapter creation and import guards
397397
- Trace gathering with/without monitoring
398398
- Trace gathering with planning steps
399399
- Message manipulation support (not supported)
400400
- Clear history support (supported)
401401

402402
**LangGraph (5 tests):**
403403

404-
- Wrapper import and availability checks
404+
- Adapter import and availability checks
405405
- Message manipulation with/without system messages
406406

407407
**Why:** Validates framework-specific adapters work correctly with their respective libraries and handle framework-specific features properly.
@@ -421,7 +421,7 @@ Test files:
421421

422422
**Thread Safety and Concurrency:**
423423

424-
- `test_wrapper_concurrent_runs()` - Multiple threads calling run() simultaneously
424+
- `test_adapter_concurrent_runs()` - Multiple threads calling run() simultaneously
425425
- `test_trace_collection_thread_safety()` - Trace accumulation in concurrent execution
426426
- `test_callback_thread_safety()` - Callbacks triggered from multiple threads
427427

tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ def setup_agents(
215215
) -> Tuple[Sequence[AgentAdapter], Dict[str, AgentAdapter]]:
216216
self.setup_agents_calls.append((agent_data, environment, task, user))
217217
agent = DummyAgent()
218-
wrapper = DummyAgentAdapter(agent, "test_agent")
219-
return [wrapper], {"test_agent": wrapper}
218+
agent_adapter = DummyAgentAdapter(agent, "test_agent")
219+
return [agent_adapter], {"test_agent": agent_adapter}
220220

221221
def setup_evaluators(
222222
self, environment: Environment, task: Task, agents: Sequence[AgentAdapter], user: Optional[User]

tests/test_contract/test_agent_adapter_contract.py

Lines changed: 27 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def agent_node(state: State) -> State:
115115

116116
response = mock_llm([{"role": "user", "content": user_msg}])
117117

118-
# Return LangChain-style message objects so the wrapper conversion works
118+
# Return LangChain-style message objects so the adapter conversion works
119119
return {"messages": messages + [AIMessage(content=response)]}
120120

121121
# Build graph
@@ -130,8 +130,8 @@ def agent_node(state: State) -> State:
130130
raise ValueError(f"Unknown framework: {framework}")
131131

132132

133-
def create_wrapper_for_framework(framework: str, agent, callbacks: Optional[List[AgentCallback]] = None):
134-
"""Create a framework-specific wrapper instance."""
133+
def create_adapter_for_framework(framework: str, agent, callbacks: Optional[List[AgentCallback]] = None):
134+
"""Create a framework-specific adapter instance."""
135135
# Verify agent is not None and is the expected type for the framework
136136
assert agent is not None, f"Agent instance is None for framework: {framework}"
137137

@@ -180,7 +180,7 @@ def test_adapter_run_returns_same_structure(self, framework):
180180
"""
181181
mock_llm = MockLLM(responses=["Test response to query"])
182182
agent = create_agent_for_framework(framework, mock_llm)
183-
adapter = create_wrapper_for_framework(framework, agent)
183+
adapter = create_adapter_for_framework(framework, agent)
184184

185185
result = adapter.run("Test query")
186186

@@ -206,7 +206,7 @@ def test_adapter_message_format_identical(self, framework):
206206
"""
207207
mock_llm = MockLLM(responses=["Response content"])
208208
agent = create_agent_for_framework(framework, mock_llm)
209-
adapter = create_wrapper_for_framework(framework, agent)
209+
adapter = create_adapter_for_framework(framework, agent)
210210

211211
adapter.run("Test query")
212212
history = adapter.get_messages()
@@ -228,7 +228,7 @@ def test_adapter_callbacks_triggered_uniformly(self, framework):
228228
callback_tracker = CallbackTracker()
229229
mock_llm = MockLLM(responses=["Response"])
230230
agent = create_agent_for_framework(framework, mock_llm)
231-
adapter = create_wrapper_for_framework(framework, agent, callbacks=[callback_tracker])
231+
adapter = create_adapter_for_framework(framework, agent, callbacks=[callback_tracker])
232232

233233
adapter.run("Test query")
234234

@@ -246,12 +246,12 @@ def test_adapter_traces_same_structure(self, framework):
246246
"""
247247
mock_llm = MockLLM(responses=["Response"])
248248
agent = create_agent_for_framework(framework, mock_llm)
249-
adapter = create_wrapper_for_framework(framework, agent)
249+
adapter = create_adapter_for_framework(framework, agent)
250250

251251
adapter.run("Test query")
252252
traces = adapter.gather_traces()
253253

254-
# All should include message history; different wrappers name this key
254+
# All should include message history; different adapters name this key
255255
if "message_history" in traces:
256256
messages = traces["message_history"]
257257
else:
@@ -268,7 +268,7 @@ def test_adapter_config_same_structure(self, framework):
268268
"""
269269
mock_llm = MockLLM(responses=["Response"])
270270
agent = create_agent_for_framework(framework, mock_llm)
271-
adapter = create_wrapper_for_framework(framework, agent)
271+
adapter = create_adapter_for_framework(framework, agent)
272272

273273
config = adapter.gather_config()
274274

@@ -285,7 +285,7 @@ def test_adapter_get_messages_after_multiple_runs(self, framework):
285285
"""
286286
mock_llm = MockLLM(responses=["First response", "Second response"])
287287
agent = create_agent_for_framework(framework, mock_llm)
288-
adapter = create_wrapper_for_framework(framework, agent)
288+
adapter = create_adapter_for_framework(framework, agent)
289289

290290
# First run
291291
adapter.run("First query")
@@ -312,7 +312,7 @@ def test_adapter_empty_query_handling(self, framework):
312312
"""
313313
mock_llm = MockLLM(responses=["Response to empty"])
314314
agent = create_agent_for_framework(framework, mock_llm)
315-
adapter = create_wrapper_for_framework(framework, agent)
315+
adapter = create_adapter_for_framework(framework, agent)
316316

317317
# Should not crash on empty query
318318
try:
@@ -343,7 +343,7 @@ def on_run_end(self, agent, result):
343343

344344
mock_llm = MockLLM(responses=["Response"])
345345
agent = create_agent_for_framework(framework, mock_llm)
346-
adapter = create_wrapper_for_framework(framework, agent, callbacks=[EventTracker()])
346+
adapter = create_adapter_for_framework(framework, agent, callbacks=[EventTracker()])
347347

348348
adapter.run("Test query")
349349

@@ -379,7 +379,7 @@ def on_run_end(self, agent, result):
379379

380380
mock_llm = MockLLM(responses=["Test response"])
381381
agent = create_agent_for_framework(framework, mock_llm)
382-
adapter = create_wrapper_for_framework(framework, agent, callbacks=[LifecycleTracker()])
382+
adapter = create_adapter_for_framework(framework, agent, callbacks=[LifecycleTracker()])
383383

384384
result = adapter.run("Test query")
385385

@@ -399,7 +399,7 @@ def on_run_end(self, agent, result):
399399
# Verify result is passed to on_run_end
400400
assert lifecycle_events[1][2] == result
401401

402-
def test_wrapper_multiple_callbacks(self, framework):
402+
def test_adapter_multiple_callbacks(self, framework):
403403
"""Test multiple callbacks execute in registration order.
404404
405405
Contract: When multiple callbacks are registered, they must execute
@@ -423,48 +423,26 @@ def on_run_end(self, agent, result):
423423

424424
mock_llm = MockLLM(responses=["Response"])
425425
agent = create_agent_for_framework(framework, mock_llm)
426-
wrapper = create_wrapper_for_framework(framework, agent, callbacks=[FirstCallback(), SecondCallback()])
426+
agent_adapter = create_adapter_for_framework(framework, agent, callbacks=[FirstCallback(), SecondCallback()])
427427

428-
wrapper.run("Test query")
428+
agent_adapter.run("Test query")
429429

430430
# Verify all callbacks fired
431431
assert len(call_order) == 4
432432

433433
# Verify order: all on_run_start before any on_run_end
434434
assert call_order == ["first_start", "second_start", "first_end", "second_end"]
435435

436-
def test_wrapper_message_history_after_clear_and_run(self, framework):
437-
"""Test message history clear resets state for fresh conversations.
436+
def test_adapter_message_history_after_clear_and_run(self, framework):
437+
"""Test that message history is correctly populated after clearing and running.
438438
439-
Contract: clear_message_history must fully reset history state, and
440-
subsequent run() calls must start with clean history regardless of
441-
framework implementation details.
442-
443-
Note: smolagents maintains a system message after clear.
439+
This test validates two key contract requirements:
440+
1. Clear history should reset the agent's state
441+
2. Running the agent after clearing should start with a fresh history
444442
"""
445-
mock_llm = MockLLM(responses=["First response", "Second response"])
443+
mock_llm = MockLLM(responses=["Test response"])
446444
agent = create_agent_for_framework(framework, mock_llm)
447-
adapter = create_wrapper_for_framework(framework, agent)
448-
449-
# First run
450-
adapter.run("First query")
451-
history_1 = adapter.get_messages()
452-
assert len(history_1) > 0
453-
454-
# Clear and verify empty (or just system message for smolagents)
455-
adapter.clear_message_history()
456-
history_after_clear = adapter.get_messages()
457-
expected_after_clear = 1 if framework == "smolagents" else 0 # smolagents keeps system message
458-
assert len(history_after_clear) == expected_after_clear
459-
460-
# Second run should populate new history
461-
adapter.run("Second query")
462-
history_2 = adapter.get_messages()
463-
assert len(history_2) > expected_after_clear # Should have more than just system message
464-
465-
# History should only contain second run's messages
466-
# (exact count depends on framework, but should have at least one message)
467-
assert any("Second query" in str(msg.get("content", "")) for msg in history_2)
445+
adapter = create_adapter_for_framework(framework, agent)
468446

469447
def test_adapter_logs_populated_after_run(self, framework):
470448
"""Test all adapters populate self.logs during execution.
@@ -478,7 +456,7 @@ def test_adapter_logs_populated_after_run(self, framework):
478456
"""
479457
mock_llm = MockLLM(responses=["Test response"])
480458
agent = create_agent_for_framework(framework, mock_llm)
481-
adapter = create_wrapper_for_framework(framework, agent)
459+
adapter = create_adapter_for_framework(framework, agent)
482460

483461
# Before run, logs should be empty
484462
assert isinstance(adapter.logs, list)
@@ -503,7 +481,7 @@ def test_adapter_logs_in_gather_traces(self, framework):
503481
"""
504482
mock_llm = MockLLM(responses=["Test response"])
505483
agent = create_agent_for_framework(framework, mock_llm)
506-
adapter = create_wrapper_for_framework(framework, agent)
484+
adapter = create_adapter_for_framework(framework, agent)
507485

508486
# Run the agent
509487
adapter.run("Test query")
@@ -526,7 +504,7 @@ def test_adapter_logs_structure_has_basic_info(self, framework):
526504
"""
527505
mock_llm = MockLLM(responses=["Test response"])
528506
agent = create_agent_for_framework(framework, mock_llm)
529-
adapter = create_wrapper_for_framework(framework, agent)
507+
adapter = create_adapter_for_framework(framework, agent)
530508

531509
# Run the agent
532510
adapter.run("Test query")
@@ -550,7 +528,7 @@ def test_adapter_logs_accumulate_across_runs(self, framework):
550528
"""
551529
mock_llm = MockLLM(responses=["First response", "Second response"])
552530
agent = create_agent_for_framework(framework, mock_llm)
553-
adapter = create_wrapper_for_framework(framework, agent)
531+
adapter = create_adapter_for_framework(framework, agent)
554532

555533
# First run
556534
adapter.run("First query")

0 commit comments

Comments
 (0)