Skip to content

Commit 17ae0ce

Browse files
abrichrclaude
andcommitted
fix: address round-2 review findings across pipeline and live adapter
Pipeline (run_eval_pipeline.py): - Add timeout=3600 to eval subprocess to prevent indefinite hangs - Guard _ensure_waa_ready against empty vm_ip (skip tunnel reconnect) - Capture demo generation output to prevent thread-interleaved stdout - Make eval_tasks a defensive copy instead of alias Live adapter (live.py): - Decouple _build_type_commands from callers: return body without import prefix, eliminating fragile removeprefix coupling - Escape tab characters in _escape_for_pyautogui Tests (test_waa.py): - Add 18 tests for _escape_for_pyautogui and _build_type_commands covering edge cases: empty text, newlines, tabs, quotes, formulas Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent f742fc8 commit 17ae0ce

3 files changed

Lines changed: 154 additions & 12 deletions

File tree

openadapt_evals/adapters/waa/live.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -221,25 +221,27 @@ def _escape_for_pyautogui(text: str) -> str:
221221
text
222222
.replace("\\", "\\\\")
223223
.replace("'", "\\'")
224+
.replace("\t", "\\t")
224225
.replace("\r", "")
225226
)
226227

227228

228229
def _build_type_commands(text: str) -> str:
229-
"""Build pyautogui commands to type text, handling embedded newlines.
230+
"""Build pyautogui command body to type text, handling embedded newlines.
230231
231232
``pyautogui.write()`` cannot handle literal newline characters — the
232233
generated Python command string becomes an unterminated string literal
233234
when executed via ``exec()``. This function splits the text on newlines
234235
and interleaves ``pyautogui.write()`` with ``pyautogui.press('enter')``.
235236
236237
Returns:
237-
A complete Python command string including ``import pyautogui;``.
238+
A pyautogui command body string (without ``import pyautogui;`` prefix).
239+
Callers must prepend the import themselves.
238240
"""
239241
segments = text.split("\n")
240242
if len(segments) == 1:
241243
escaped = _escape_for_pyautogui(text)
242-
return f"import pyautogui; pyautogui.write('{escaped}', interval=0.02)"
244+
return f"pyautogui.write('{escaped}', interval=0.02)"
243245

244246
commands: list[str] = []
245247
for i, seg in enumerate(segments):
@@ -250,8 +252,7 @@ def _build_type_commands(text: str) -> str:
250252
commands.append(f"pyautogui.write('{escaped}', interval=0.02)")
251253
if i < len(segments) - 1:
252254
commands.append("pyautogui.press('enter')")
253-
body = "; ".join(commands) if commands else "pass"
254-
return f"import pyautogui; {body}"
255+
return "; ".join(commands) if commands else "pass"
255256

256257

257258
@dataclass
@@ -1137,7 +1138,7 @@ def _translate_action(self, action: BenchmarkAction) -> str | None:
11371138

11381139
if action.type == "type":
11391140
text = action.text or ""
1140-
type_cmds = _build_type_commands(text)
1141+
type_body = _build_type_commands(text)
11411142
# If target_node_id is set (from type_element), click element first to focus it
11421143
if action.target_node_id is not None:
11431144
elem_id = str(action.target_node_id)
@@ -1149,11 +1150,11 @@ def _translate_action(self, action: BenchmarkAction) -> str | None:
11491150
f"import pyautogui; import time; "
11501151
f"pyautogui.click({cx}, {cy}); "
11511152
f"time.sleep(0.2); "
1152-
+ type_cmds.removeprefix("import pyautogui; ")
1153+
f"{type_body}"
11531154
)
11541155
else:
11551156
logger.warning(f"Element ID '{elem_id}' not found for type_element, typing without focus")
1156-
return type_cmds
1157+
return f"import pyautogui; {type_body}"
11571158

11581159
if action.type == "key":
11591160
return self._translate_key_action(action)

scripts/run_eval_pipeline.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,15 @@ def _generate_demos(
126126
if model:
127127
cmd.extend(["--model", model])
128128

129-
result = subprocess.run(cmd, timeout=600)
129+
result = subprocess.run(cmd, timeout=600, capture_output=True, text=True)
130130
if result.returncode == 0:
131131
print(f"[demos] -> done")
132132
generated.append(task_id)
133133
else:
134134
print(f"[demos] ERROR: exit code {result.returncode}")
135+
if result.stderr:
136+
for line in result.stderr.strip().splitlines()[-3:]:
137+
print(f"[demos] ! {line}")
135138

136139
return generated
137140

@@ -280,12 +283,16 @@ def _ensure_waa_ready(
280283
281284
Recovery sequence:
282285
1. Probe -> OK: return True
283-
2. Reconnect tunnel -> Probe -> OK: return True
286+
2. Reconnect tunnel -> Probe -> OK: return True (skipped if vm_ip is empty)
284287
3. Wait for probe with timeout
285288
"""
286289
if _probe(server) and (evaluate_url is None or _probe(evaluate_url)):
287290
return True
288291

292+
if not vm_ip:
293+
print(" WAA unreachable and no VM IP available for tunnel reconnect")
294+
return False
295+
289296
print(" WAA unreachable, reconnecting tunnel...")
290297
tunnel_manager.stop_all_tunnels()
291298
time.sleep(1)
@@ -469,7 +476,7 @@ def _run_eval(
469476
if demo_path:
470477
cmd.extend(["--demo", str(demo_path.resolve())])
471478

472-
result = subprocess.run(cmd, capture_output=True, text=True)
479+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
473480
elapsed = time.time() - task_start
474481

475482
# Log captured output to a file and print summary
@@ -645,7 +652,7 @@ def main() -> int:
645652
missing_demos = [t for t in recorded_tasks if t not in existing_demos]
646653

647654
# Tasks eligible for eval = those with demos (or will have demos after generation)
648-
eval_tasks = recorded_tasks # all recorded tasks (demos will be generated if missing)
655+
eval_tasks = list(recorded_tasks) # copy; demos will be generated for missing ones
649656

650657
print(f"Pipeline Configuration")
651658
print(f" Recordings: {recordings_dir} ({len(recorded_tasks)} task(s))")

tests/test_waa.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,3 +458,137 @@ def test_no_false_positive_on_generic_fail_safe_text(self):
458458
from openadapt_evals.adapters.waa.live import _is_failsafe_error
459459

460460
assert not _is_failsafe_error("The application has a fail-safe mechanism")
461+
462+
463+
# ---------------------------------------------------------------------------
464+
# _escape_for_pyautogui
465+
# ---------------------------------------------------------------------------
466+
467+
468+
class TestEscapeForPyautogui:
469+
"""Tests for _escape_for_pyautogui text escaping."""
470+
471+
def test_plain_text(self):
472+
from openadapt_evals.adapters.waa.live import _escape_for_pyautogui
473+
474+
assert _escape_for_pyautogui("hello world") == "hello world"
475+
476+
def test_backslash_escaped(self):
477+
from openadapt_evals.adapters.waa.live import _escape_for_pyautogui
478+
479+
assert _escape_for_pyautogui("path\\to\\file") == "path\\\\to\\\\file"
480+
481+
def test_single_quote_escaped(self):
482+
from openadapt_evals.adapters.waa.live import _escape_for_pyautogui
483+
484+
assert _escape_for_pyautogui("it's") == "it\\'s"
485+
486+
def test_tab_escaped(self):
487+
from openadapt_evals.adapters.waa.live import _escape_for_pyautogui
488+
489+
assert _escape_for_pyautogui("col1\tcol2") == "col1\\tcol2"
490+
491+
def test_carriage_return_stripped(self):
492+
from openadapt_evals.adapters.waa.live import _escape_for_pyautogui
493+
494+
assert _escape_for_pyautogui("line\r") == "line"
495+
496+
def test_empty_string(self):
497+
from openadapt_evals.adapters.waa.live import _escape_for_pyautogui
498+
499+
assert _escape_for_pyautogui("") == ""
500+
501+
def test_combined_special_chars(self):
502+
from openadapt_evals.adapters.waa.live import _escape_for_pyautogui
503+
504+
# \r is stripped, so "with\rtabs" → "withtabs"
505+
result = _escape_for_pyautogui("it's a\\path\twith\rtabs")
506+
assert result == "it\\'s a\\\\path\\twithtabs"
507+
508+
509+
# ---------------------------------------------------------------------------
510+
# _build_type_commands
511+
# ---------------------------------------------------------------------------
512+
513+
514+
class TestBuildTypeCommands:
515+
"""Tests for _build_type_commands which handles newlines in type actions."""
516+
517+
def test_simple_text(self):
518+
from openadapt_evals.adapters.waa.live import _build_type_commands
519+
520+
result = _build_type_commands("hello")
521+
assert result == "pyautogui.write('hello', interval=0.02)"
522+
523+
def test_text_with_newline(self):
524+
from openadapt_evals.adapters.waa.live import _build_type_commands
525+
526+
result = _build_type_commands("abc\ndef")
527+
assert "pyautogui.write('abc', interval=0.02)" in result
528+
assert "pyautogui.press('enter')" in result
529+
assert "pyautogui.write('def', interval=0.02)" in result
530+
531+
def test_trailing_newline(self):
532+
from openadapt_evals.adapters.waa.live import _build_type_commands
533+
534+
result = _build_type_commands("abc\n")
535+
assert "pyautogui.write('abc', interval=0.02)" in result
536+
assert "pyautogui.press('enter')" in result
537+
# Should NOT have a write('') after the enter
538+
assert result.endswith("pyautogui.press('enter')")
539+
540+
def test_leading_newline(self):
541+
from openadapt_evals.adapters.waa.live import _build_type_commands
542+
543+
result = _build_type_commands("\nabc")
544+
assert result.startswith("pyautogui.press('enter')")
545+
assert "pyautogui.write('abc', interval=0.02)" in result
546+
547+
def test_just_newline(self):
548+
from openadapt_evals.adapters.waa.live import _build_type_commands
549+
550+
result = _build_type_commands("\n")
551+
assert result == "pyautogui.press('enter')"
552+
553+
def test_empty_string(self):
554+
from openadapt_evals.adapters.waa.live import _build_type_commands
555+
556+
result = _build_type_commands("")
557+
assert "pyautogui.write('', interval=0.02)" in result
558+
559+
def test_multiple_newlines(self):
560+
from openadapt_evals.adapters.waa.live import _build_type_commands
561+
562+
result = _build_type_commands("a\nb\nc")
563+
parts = result.split("; ")
564+
assert parts[0] == "pyautogui.write('a', interval=0.02)"
565+
assert parts[1] == "pyautogui.press('enter')"
566+
assert parts[2] == "pyautogui.write('b', interval=0.02)"
567+
assert parts[3] == "pyautogui.press('enter')"
568+
assert parts[4] == "pyautogui.write('c', interval=0.02)"
569+
570+
def test_special_chars_escaped(self):
571+
from openadapt_evals.adapters.waa.live import _build_type_commands
572+
573+
result = _build_type_commands("it's a\\path")
574+
assert "it\\'s a\\\\path" in result
575+
576+
def test_no_import_prefix(self):
577+
"""_build_type_commands should return body only, no import statement."""
578+
from openadapt_evals.adapters.waa.live import _build_type_commands
579+
580+
result = _build_type_commands("hello")
581+
assert not result.startswith("import ")
582+
583+
def test_formula_with_newlines(self):
584+
"""Reproduces the original LibreOffice Calc bug."""
585+
from openadapt_evals.adapters.waa.live import _build_type_commands
586+
587+
text = "=(Sheet1.C4-Sheet1.C3)/Sheet1.C3\n=(Sheet1.C5-Sheet1.C4)/Sheet1.C4"
588+
result = _build_type_commands(text)
589+
# Should have two write commands separated by enter
590+
assert "pyautogui.write('=(Sheet1.C4-Sheet1.C3)/Sheet1.C3', interval=0.02)" in result
591+
assert "pyautogui.press('enter')" in result
592+
assert "pyautogui.write('=(Sheet1.C5-Sheet1.C4)/Sheet1.C4', interval=0.02)" in result
593+
# No literal newlines in the result
594+
assert "\n" not in result

0 commit comments

Comments
 (0)