forked from OpenHands/OpenHands
-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathtest_empty_image_url_fix_v2.py
More file actions
267 lines (229 loc) Β· 10.6 KB
/
test_empty_image_url_fix_v2.py
File metadata and controls
267 lines (229 loc) Β· 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
"""Test for fixing empty image URL issue in multimodal browsing."""
from openhands.core.config.agent_config import AgentConfig
from openhands.core.message import ImageContent
from openhands.events.observation.browse import BrowserOutputObservation
from openhands.memory.conversation_memory import ConversationMemory
from openhands.utils.prompt import PromptManager
def test_empty_image_url_handling():
"""Test that empty image URLs are properly filtered out and notification text is added."""
# Create a browser observation with empty screenshot and set_of_marks
browser_obs = BrowserOutputObservation(
url='https://example.com',
trigger_by_action='browse_interactive',
screenshot='', # Empty screenshot
set_of_marks='', # Empty set_of_marks
content='Some webpage content',
)
# Create conversation memory with vision enabled
agent_config = AgentConfig(enable_som_visual_browsing=True)
prompt_manager = PromptManager(
prompt_dir='openhands/agenthub/codeact_agent/prompts'
)
conv_memory = ConversationMemory(agent_config, prompt_manager)
# Process the observation with vision enabled
messages = conv_memory._process_observation(
obs=browser_obs,
tool_call_id_to_message={},
max_message_chars=None,
vision_is_active=True,
enable_som_visual_browsing=True,
current_index=0,
events=[],
)
# Check that no empty image URLs are included
has_image_content = False
has_notification_text = False
for message in messages:
for content in message.content:
if isinstance(content, ImageContent):
has_image_content = True
# All image URLs should be non-empty and valid
for url in content.image_urls:
assert url != '', 'Empty image URL should be filtered out'
assert url is not None, 'None image URL should be filtered out'
# Should start with data: prefix for base64 images
if url: # Only check if URL is not empty
assert url.startswith('data:'), (
f'Invalid image URL format: {url}'
)
elif hasattr(content, 'text'):
# Check for notification text about missing visual information
if (
'No visual information' in content.text
or 'has been filtered' in content.text
):
has_notification_text = True
# Should not have image content but should have notification text
assert not has_image_content, 'Should not have ImageContent for empty images'
assert has_notification_text, (
'Should have notification text about missing visual information'
)
def test_valid_image_url_handling():
"""Test that valid image URLs are properly handled."""
# Create a browser observation with valid base64 image data
valid_base64_image = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='
browser_obs = BrowserOutputObservation(
url='https://example.com',
trigger_by_action='browse_interactive',
screenshot=valid_base64_image,
set_of_marks=valid_base64_image,
content='Some webpage content',
)
# Create conversation memory with vision enabled
agent_config = AgentConfig(enable_som_visual_browsing=True)
prompt_manager = PromptManager(
prompt_dir='openhands/agenthub/codeact_agent/prompts'
)
conv_memory = ConversationMemory(agent_config, prompt_manager)
# Process the observation with vision enabled
messages = conv_memory._process_observation(
obs=browser_obs,
tool_call_id_to_message={},
max_message_chars=None,
vision_is_active=True,
enable_som_visual_browsing=True,
current_index=0,
events=[],
)
# Check that valid image URLs are included
found_image_content = False
for message in messages:
for content in message.content:
if isinstance(content, ImageContent):
found_image_content = True
# Should have at least one valid image URL
assert len(content.image_urls) > 0, 'Should have at least one image URL'
for url in content.image_urls:
assert url != '', 'Image URL should not be empty'
assert url.startswith('data:image/'), (
f'Invalid image URL format: {url}'
)
assert found_image_content, 'Should have found ImageContent with valid URLs'
def test_mixed_image_url_handling():
"""Test handling of mixed valid and invalid image URLs."""
# Create a browser observation with one empty and one valid image
valid_base64_image = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='
browser_obs = BrowserOutputObservation(
url='https://example.com',
trigger_by_action='browse_interactive',
screenshot='', # Empty screenshot
set_of_marks=valid_base64_image, # Valid set_of_marks
content='Some webpage content',
)
# Create conversation memory with vision enabled
agent_config = AgentConfig(enable_som_visual_browsing=True)
prompt_manager = PromptManager(
prompt_dir='openhands/agenthub/codeact_agent/prompts'
)
conv_memory = ConversationMemory(agent_config, prompt_manager)
# Process the observation with vision enabled
messages = conv_memory._process_observation(
obs=browser_obs,
tool_call_id_to_message={},
max_message_chars=None,
vision_is_active=True,
enable_som_visual_browsing=True,
current_index=0,
events=[],
)
# Check that only valid image URLs are included
found_image_content = False
for message in messages:
for content in message.content:
if isinstance(content, ImageContent):
found_image_content = True
# Should have exactly one valid image URL (set_of_marks)
assert len(content.image_urls) == 1, (
f'Should have exactly one image URL, got {len(content.image_urls)}'
)
url = content.image_urls[0]
assert url == valid_base64_image, (
f'Should use the valid image URL: {url}'
)
assert found_image_content, 'Should have found ImageContent with valid URL'
def test_ipython_empty_image_url_handling():
"""Test that empty image URLs in IPython observations are properly filtered with notification text."""
from openhands.events.observation.commands import IPythonRunCellObservation
# Create an IPython observation with empty image URLs
ipython_obs = IPythonRunCellObservation(
content='Some output',
code='print("hello")',
image_urls=['', None, ''], # Empty and None image URLs
)
# Create conversation memory with vision enabled
agent_config = AgentConfig(enable_som_visual_browsing=True)
prompt_manager = PromptManager(
prompt_dir='openhands/agenthub/codeact_agent/prompts'
)
conv_memory = ConversationMemory(agent_config, prompt_manager)
# Process the observation with vision enabled
messages = conv_memory._process_observation(
obs=ipython_obs,
tool_call_id_to_message={},
max_message_chars=None,
vision_is_active=True,
enable_som_visual_browsing=True,
current_index=0,
events=[],
)
# Check that no empty image URLs are included and notification text is added
has_image_content = False
has_notification_text = False
for message in messages:
for content in message.content:
if isinstance(content, ImageContent):
has_image_content = True
elif hasattr(content, 'text'):
# Check for notification text about filtered images
if 'invalid or empty and have been filtered' in content.text:
has_notification_text = True
# Should not have image content but should have notification text
assert not has_image_content, 'Should not have ImageContent for empty images'
assert has_notification_text, 'Should have notification text about filtered images'
def test_ipython_mixed_image_url_handling():
"""Test handling of mixed valid and invalid image URLs in IPython observations."""
from openhands.events.observation.commands import IPythonRunCellObservation
# Create an IPython observation with mixed image URLs
valid_base64_image = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='
ipython_obs = IPythonRunCellObservation(
content='Some output',
code='print("hello")',
image_urls=['', valid_base64_image, None], # Mix of empty, valid, and None
)
# Create conversation memory with vision enabled
agent_config = AgentConfig(enable_som_visual_browsing=True)
prompt_manager = PromptManager(
prompt_dir='openhands/agenthub/codeact_agent/prompts'
)
conv_memory = ConversationMemory(agent_config, prompt_manager)
# Process the observation with vision enabled
messages = conv_memory._process_observation(
obs=ipython_obs,
tool_call_id_to_message={},
max_message_chars=None,
vision_is_active=True,
enable_som_visual_browsing=True,
current_index=0,
events=[],
)
# Check that only valid image URLs are included and notification text is added
found_image_content = False
has_notification_text = False
for message in messages:
for content in message.content:
if isinstance(content, ImageContent):
found_image_content = True
# Should have exactly one valid image URL
assert len(content.image_urls) == 1, (
f'Should have exactly one image URL, got {len(content.image_urls)}'
)
url = content.image_urls[0]
assert url == valid_base64_image, (
f'Should use the valid image URL: {url}'
)
elif hasattr(content, 'text'):
# Check for notification text about filtered images
if 'invalid or empty image(s) were filtered' in content.text:
has_notification_text = True
assert found_image_content, 'Should have found ImageContent with valid URL'
assert has_notification_text, 'Should have notification text about filtered images'