Skip to content

Commit fdc6a51

Browse files
committed
removed non-contractual functions from AgentAdapter
1 parent 1ea274d commit fdc6a51

12 files changed

Lines changed: 29 additions & 423 deletions

File tree

AGENTS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,7 @@ For lists and dictionaries, use `Dict[...,...]`, `List[...]`, `Sequence[...]` et
236236
- DO NOT publicly distribute code or data
237237
- DO NOT publish without explicit permission
238238
- DO NOT share copyrighted third-party benchmark data
239+
240+
## Changelog
241+
242+
When the task is completed, add your changes to the Changelog.

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1919

2020
### Removed
2121

22+
- Removed `set_message_history`, `append_message_history` and `clear_message_history` for `AgentAdapter` and subclasses. (PR: #3)
23+
2224
## [0.1.2] - 2025-11-18
2325

2426
### Added

docs/guides/message-tracing.md

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,21 @@ for msg in messages:
4545
print(f" Tools called: {[tc['function']['name'] for tc in msg['tool_calls']]}")
4646
```
4747

48-
### Clearing History Between Tasks
48+
### Fresh Conversations for Multiple Tasks
4949

50-
In benchmarks, you typically want to clear history before each new task:
50+
In benchmarks, you typically want a fresh agent instance for each task:
5151

5252
```python
5353
# In your benchmark loop
5454
for task in benchmark.tasks:
55-
agent_adapter.clear_message_history() # Reset for new task
55+
# Create a new adapter instance for each task
56+
agent_adapter = YourAgentAdapter(agent_instance=agent, name="task_agent")
5657
result = agent_adapter.run(task.query)
5758
evaluate(result, task.ground_truth)
5859
```
5960

61+
This ensures each task starts with a clean slate and avoids conversation history contamination.
62+
6063
## Using the Tracing Callback
6164

6265
For multi-agent systems or when you need to collect conversations from many runs, use `MessageTracingAgentCallback`:
@@ -190,7 +193,7 @@ Messages use OpenAI's chat completion format:
190193
}
191194
```
192195

193-
## Custom agent adapters
196+
## Custom Agent Adapters
194197

195198
If you're implementing a custom adapter, the framework handles message storage automatically via `get_messages()`. Just ensure your `_run_agent()` method returns a `MessageHistory`:
196199

@@ -211,13 +214,13 @@ class MyAgentAdapter(AgentAdapter):
211214
return history
212215
```
213216

214-
See the [agent adapter guide](../reference/agent.md) for details on implementing custom adapters.
217+
See the [AgentAdapter guide](../reference/agent.md) for details on implementing custom adapters.
215218

216219
## Tips
217220

218221
**For debugging**: Use `verbose=True` to see traces in real-time.
219222

220-
**For benchmarks**: Clear history between tasks with `agent_adapter.clear_message_history()`.
223+
**For benchmarks**: Create a new adapter instance for each task to ensure clean conversation history.
221224

222225
**For multi-agent systems**: Use a shared tracer and `get_conversations_by_agent()` to analyze each agent separately.
223226

maseval/core/agent.py

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from abc import ABC, abstractmethod
2-
from typing import List, Any, Optional, Union, Dict
2+
from typing import List, Any, Optional, Dict
33

44
from .callback import AgentCallback
5-
from .history import MessageHistory, RoleType
5+
from .history import MessageHistory
66
from .tracing import TraceableMixin
77
from .config import ConfigurableMixin
88

@@ -101,35 +101,6 @@ def get_messages(self) -> MessageHistory:
101101
"""
102102
return self.messages if self.messages is not None else MessageHistory()
103103

104-
def set_message_history(self, history: MessageHistory) -> None:
105-
"""Set the message history.
106-
107-
This is typically called by _run_agent() implementations after executing
108-
the agent, but can also be used to inject or modify history.
109-
110-
Args:
111-
history: The MessageHistory to set
112-
"""
113-
self.messages = history
114-
115-
def clear_message_history(self) -> None:
116-
"""Clear the message history."""
117-
self.messages = None
118-
119-
def append_to_message_history(self, role: Union[RoleType, str], content: Union[str, List[Any]], **kwargs) -> None:
120-
"""Append a message to the history.
121-
122-
If no history exists, creates a new one.
123-
124-
Args:
125-
role: The message role ("user", "assistant", "system", "tool")
126-
content: The message content (string or list of content parts)
127-
**kwargs: Additional fields (name, metadata, timestamp, etc.)
128-
"""
129-
if self.messages is None:
130-
self.messages = MessageHistory()
131-
self.messages.add_message(role, content, **kwargs) # type: ignore
132-
133104
def gather_traces(self) -> dict[str, Any]:
134105
"""Gather execution traces from this agent.
135106

maseval/interface/agents/langgraph.py

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -105,85 +105,6 @@ def get_messages(self) -> MessageHistory:
105105
# No messages available
106106
return MessageHistory()
107107

108-
def set_message_history(self, history: MessageHistory) -> None:
109-
"""Set message history for langgraph.
110-
111-
For stateless graphs, updates the cached result.
112-
For stateful graphs, this is not fully supported as LangGraph manages state internally.
113-
114-
Args:
115-
history: MASEval MessageHistory to set
116-
"""
117-
_check_langgraph_installed()
118-
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
119-
120-
# Convert MessageHistory to LangChain messages
121-
lc_messages = []
122-
for msg in history:
123-
role = msg.get("role", "assistant")
124-
content = msg.get("content", "")
125-
126-
if role == "user":
127-
lc_messages.append(HumanMessage(content=content))
128-
elif role == "assistant":
129-
lc_messages.append(AIMessage(content=content))
130-
elif role == "system":
131-
lc_messages.append(SystemMessage(content=content))
132-
elif role == "tool":
133-
tool_call_id = msg.get("tool_call_id", "")
134-
lc_messages.append(ToolMessage(content=content, tool_call_id=tool_call_id))
135-
136-
# Update cached result
137-
self._last_result = {"messages": lc_messages}
138-
139-
# Also update base class cache
140-
super().set_message_history(history)
141-
142-
def clear_message_history(self) -> None:
143-
"""Clear message history for langgraph.
144-
145-
Clears the cached result. For stateful graphs, this doesn't clear
146-
the persistent state in the checkpointer.
147-
"""
148-
self._last_result = None
149-
super().clear_message_history()
150-
151-
def append_to_message_history(self, role: str, content: Any, **kwargs) -> None:
152-
"""Append message to history.
153-
154-
For stateless graphs, this appends to the cached result.
155-
For stateful graphs, messages are managed by LangGraph during invoke().
156-
157-
Args:
158-
role: Message role
159-
content: Message content (string or list)
160-
**kwargs: Additional message fields
161-
"""
162-
_check_langgraph_installed()
163-
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
164-
165-
# Get current messages
166-
current_messages = []
167-
if self._last_result and "messages" in self._last_result:
168-
current_messages = self._last_result["messages"]
169-
170-
# Create new message
171-
if role == "user":
172-
new_msg = HumanMessage(content=str(content))
173-
elif role == "assistant":
174-
new_msg = AIMessage(content=str(content))
175-
elif role == "system":
176-
new_msg = SystemMessage(content=str(content))
177-
else:
178-
new_msg = AIMessage(content=str(content))
179-
180-
# Append and update cache
181-
current_messages.append(new_msg)
182-
self._last_result = {"messages": current_messages}
183-
184-
# Also update base class cache
185-
super().append_to_message_history(role, content, **kwargs)
186-
187108
def gather_config(self) -> dict[str, Any]:
188109
"""Gather configuration from this LangGraph agent.
189110

maseval/interface/agents/smolagents.py

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -351,54 +351,6 @@ def get_messages(self) -> MessageHistory:
351351
# Convert and return
352352
return self._convert_smolagents_messages(smol_messages)
353353

354-
def set_message_history(self, history: MessageHistory) -> None:
355-
"""Set message history - NOT SUPPORTED by smolagents.
356-
357-
Args:
358-
history: MASEval MessageHistory to set
359-
360-
Raises:
361-
NotImplementedError: smolagents doesn't support arbitrary message injection
362-
"""
363-
raise NotImplementedError(
364-
"smolagents doesn't support setting arbitrary message history. "
365-
"The agent's memory is built from execution steps and cannot be directly manipulated. "
366-
"Use clear_message_history() to reset, then run() to generate new conversation."
367-
)
368-
369-
def clear_message_history(self) -> None:
370-
"""Clear message history by resetting smolagents memory."""
371-
_check_smolagents_installed()
372-
from smolagents.memory import AgentMemory
373-
374-
# Get system prompt before clearing
375-
system_prompt = ""
376-
if hasattr(self.agent, "memory") and hasattr(self.agent.memory, "system_prompt"):
377-
system_prompt = self.agent.memory.system_prompt
378-
379-
# Reset memory
380-
self.agent.memory = AgentMemory(system_prompt=system_prompt)
381-
382-
# Also clear base class cache
383-
super().clear_message_history()
384-
385-
def append_to_message_history(self, role: str, content: Any, **kwargs) -> None:
386-
"""Append message to history - NOT SUPPORTED by smolagents.
387-
388-
Args:
389-
role: Message role
390-
content: Message content (string or list)
391-
**kwargs: Additional message fields
392-
393-
Raises:
394-
NotImplementedError: smolagents doesn't support arbitrary message injection
395-
"""
396-
raise NotImplementedError(
397-
"smolagents doesn't support appending arbitrary messages to history. "
398-
"The agent's memory is built from execution steps and cannot be directly manipulated. "
399-
"Use run() to generate conversation messages."
400-
)
401-
402354
def _run_agent(self, query: str) -> str:
403355
_check_smolagents_installed()
404356

tests/conftest.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,19 +97,17 @@ class DummyAgentAdapter(AgentAdapter):
9797
def _run_agent(self, query: str) -> str:
9898
import time
9999

100-
# Create message history
101-
history = MessageHistory()
102-
history.add_message(role="user", content=query)
103-
104100
# Track timing
105101
start_time = time.time()
106102

107103
# Run underlying agent
108104
response = self.agent.run(query)
109-
history.add_message(role="assistant", content=response)
110105

111-
# Store history
112-
self.set_message_history(history)
106+
# Store history directly
107+
if self.messages is None:
108+
self.messages = MessageHistory()
109+
self.messages.add_message(role="user", content=query)
110+
self.messages.add_message(role="assistant", content=response)
113111

114112
# Populate logs to fulfill contract
115113
duration = time.time() - start_time

tests/test_contract/test_agent_adapter_contract.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -433,37 +433,6 @@ def on_run_end(self, agent, result):
433433
# Verify order: all on_run_start before any on_run_end
434434
assert call_order == ["first_start", "second_start", "first_end", "second_end"]
435435

436-
def test_adapter_message_history_after_clear_and_run(self, framework):
437-
"""Test that message history is correctly populated after clearing and running.
438-
439-
This test validates two key contract requirements:
440-
1. Clear history should reset the agent's state
441-
2. Running the agent after clearing should start with a fresh history
442-
"""
443-
mock_llm = MockLLM(responses=["Test response"])
444-
agent = create_agent_for_framework(framework, mock_llm)
445-
adapter = create_adapter_for_framework(framework, agent)
446-
447-
# First run
448-
adapter.run("First query")
449-
history_1 = adapter.get_messages()
450-
assert len(history_1) > 0
451-
452-
# Clear and verify empty (or just system message for smolagents)
453-
adapter.clear_message_history()
454-
history_after_clear = adapter.get_messages()
455-
expected_after_clear = 1 if framework == "smolagents" else 0 # smolagents keeps system message
456-
assert len(history_after_clear) == expected_after_clear
457-
458-
# Second run should populate new history
459-
adapter.run("Second query")
460-
history_2 = adapter.get_messages()
461-
assert len(history_2) > expected_after_clear # Should have more than just system message
462-
463-
# History should only contain second run's messages
464-
# (exact count depends on framework, but should have at least one message)
465-
assert any("Second query" in str(msg.get("content", "")) for msg in history_2)
466-
467436
def test_adapter_logs_populated_after_run(self, framework):
468437
"""Test all adapters populate self.logs during execution.
469438
Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -49,37 +49,6 @@ def test_agent_adapter_get_messages_returns_history(self, dummy_agent_adapter):
4949
assert history[0]["role"] == "user"
5050
assert history[1]["role"] == "assistant"
5151

52-
def test_agent_adapter_set_message_history(self, dummy_agent_adapter):
53-
"""Test that message history can be set manually."""
54-
new_history = MessageHistory()
55-
new_history.add_message("user", "Custom message")
56-
new_history.add_message("assistant", "Custom response")
57-
58-
dummy_agent_adapter.set_message_history(new_history)
59-
60-
retrieved = dummy_agent_adapter.get_messages()
61-
assert len(retrieved) == 2
62-
assert retrieved[0]["content"] == "Custom message"
63-
assert retrieved[1]["content"] == "Custom response"
64-
65-
def test_agent_adapter_clear_message_history(self, dummy_agent_adapter):
66-
"""Test that message history can be cleared."""
67-
dummy_agent_adapter.run("Test")
68-
assert len(dummy_agent_adapter.get_messages()) > 0
69-
70-
dummy_agent_adapter.clear_message_history()
71-
assert len(dummy_agent_adapter.get_messages()) == 0
72-
73-
def test_agent_adapter_append_to_message_history(self, dummy_agent_adapter):
74-
"""Test that messages can be appended to history."""
75-
dummy_agent_adapter.append_to_message_history("user", "First message")
76-
dummy_agent_adapter.append_to_message_history("assistant", "First response")
77-
78-
history = dummy_agent_adapter.get_messages()
79-
assert len(history) == 2
80-
assert history[0]["content"] == "First message"
81-
assert history[1]["content"] == "First response"
82-
8352
def test_agent_adapter_gather_traces_includes_messages(self, dummy_agent_adapter):
8453
"""Test that gather_traces() includes message history."""
8554
dummy_agent_adapter.run("Test query")
@@ -110,17 +79,15 @@ def test_agent_adapter_gather_config(self, dummy_agent_adapter):
11079
assert config["type"] == "DummyAgentAdapter"
11180

11281
def test_agent_adapter_multiple_runs(self, dummy_agent_adapter):
113-
"""Test that adapter can be run multiple times."""
82+
"""Test that adapter can be run multiple times and history accumulates."""
11483
result1 = dummy_agent_adapter.run("Query 1")
11584
assert "Query 1" in result1
11685

117-
# Clear history for second run
118-
dummy_agent_adapter.clear_message_history()
119-
12086
result2 = dummy_agent_adapter.run("Query 2")
12187
assert "Query 2" in result2
12288

123-
# History should only have second run
89+
# History should have both runs
12490
history = dummy_agent_adapter.get_messages()
125-
assert len(history) == 2
126-
assert history[0]["content"] == "Query 2"
91+
assert len(history) == 4 # 2 messages per run
92+
assert history[0]["content"] == "Query 1"
93+
assert history[2]["content"] == "Query 2"

tests/test_core/test_message_tracing_callback.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ def _run_agent(self, query: str) -> str:
4747
# Normal response without tools
4848
history.add_message(role="assistant", content=response)
4949

50-
# Store history so get_messages() can retrieve it
51-
self.set_message_history(history)
50+
# Store history directly
51+
self.messages = history
5252

5353
return response
5454

0 commit comments

Comments
 (0)