Skip to content

Commit 76804fd

Browse files
tmux human text compression.
1 parent 7892486 commit 76804fd

3 files changed

Lines changed: 360 additions & 2 deletions

File tree

src/smolagents/bp_tools_tmux.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import subprocess
2424
import time
2525

26+
from .bp_utils_readable_compress import readable_compress
2627
from .tools import Tool
2728

2829

@@ -205,14 +206,21 @@ class TmuxReadTool(Tool):
205206
"description": "If True (default), return only new lines since the last read. If False, return the full tail.",
206207
"nullable": True,
207208
},
209+
"compress": {
210+
"type": "boolean",
211+
"description": "If True (default), apply readable compression to the output (strip ANSI, collapse duplicates, remove progress lines, strip timestamps). Defaults to True.",
212+
"nullable": True,
213+
},
208214
}
209215
output_type = "string"
210216

211-
def forward(self, session_name: str, lines: int | None = None, incremental: bool | None = None) -> str:
217+
def forward(self, session_name: str, lines: int | None = None, incremental: bool | None = None, compress: bool | None = None) -> str:
212218
if lines is None:
213219
lines = 20
214220
if incremental is None:
215221
incremental = True
222+
if compress is None:
223+
compress = True
216224
lines = min(lines, _MAX_READ_LINES)
217225
full = _full_name(session_name)
218226
# Always capture the full scrollback; slice in Python afterwards.
@@ -263,7 +271,10 @@ def forward(self, session_name: str, lines: int | None = None, incremental: bool
263271
# Cap at max requested lines.
264272
new_lines = new_lines[-lines:]
265273
_last_read[session_name] = _make_fingerprint(all_lines)
266-
return "\n".join(new_lines)
274+
text = "\n".join(new_lines)
275+
if compress:
276+
text = readable_compress(text)
277+
return text
267278

268279

269280
# ======================================================================
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
# BPSA - Beyond Python SmolAgents
2+
# https://github.com/joaopauloschuler/beyond-python-smolagents
3+
#
4+
# Copyright (c) 2024-2026 Joao Paulo Schwarz Schuler and others.
5+
# Refer to the git commit history for individual authorship.
6+
# Licensed under the Apache License, Version 2.0
7+
8+
"""
9+
Readable text compression for agent output.
10+
11+
Compresses text while keeping it human-readable. Designed for tmux
12+
scrollback and similar command output where verbosity wastes context
13+
tokens without adding information.
14+
15+
Passes (in order):
16+
1. ANSI escape stripping
17+
2. Whitespace normalization (trailing spaces, blank-line runs)
18+
3. Exact consecutive duplicate line collapsing
19+
4. Progress line removal (keep only the final state)
20+
5. Timestamp prefix stripping
21+
"""
22+
23+
import re
24+
25+
# ---------------------------------------------------------------------------
26+
# 1. ANSI escape stripping
27+
# ---------------------------------------------------------------------------
28+
29+
# Matches CSI sequences (e.g. colours, cursor moves) and OSC sequences.
30+
_ANSI_RE = re.compile(r"\x1b\[[0-9;]*[A-Za-z]|\x1b\][^\x07]*\x07|\x1b\[[\d;]*m")
31+
32+
33+
def _strip_ansi(text: str) -> str:
34+
"""Remove ANSI escape sequences."""
35+
return _ANSI_RE.sub("", text)
36+
37+
38+
# ---------------------------------------------------------------------------
39+
# 2. Whitespace normalization
40+
# ---------------------------------------------------------------------------
41+
42+
43+
def _normalize_whitespace(lines: list[str]) -> list[str]:
44+
"""Strip trailing whitespace per line and collapse runs of blank lines."""
45+
result: list[str] = []
46+
prev_blank = False
47+
for line in lines:
48+
stripped = line.rstrip()
49+
is_blank = stripped == ""
50+
if is_blank and prev_blank:
51+
continue
52+
result.append(stripped)
53+
prev_blank = is_blank
54+
return result
55+
56+
57+
# ---------------------------------------------------------------------------
58+
# 3. Exact consecutive duplicate collapsing
59+
# ---------------------------------------------------------------------------
60+
61+
62+
def _collapse_repeated_lines(lines: list[str]) -> list[str]:
63+
"""Collapse runs of identical consecutive lines."""
64+
if not lines:
65+
return lines
66+
result: list[str] = []
67+
run_line = lines[0]
68+
run_count = 1
69+
for line in lines[1:]:
70+
if line == run_line:
71+
run_count += 1
72+
else:
73+
_flush_run(result, run_line, run_count)
74+
run_line = line
75+
run_count = 1
76+
_flush_run(result, run_line, run_count)
77+
return result
78+
79+
80+
def _flush_run(result: list[str], line: str, count: int) -> None:
81+
"""Append a run to *result*, summarising if count > 2."""
82+
if count <= 2:
83+
result.extend([line] * count)
84+
else:
85+
result.append(line)
86+
result.append(f" ... (repeated {count - 1} more times)")
87+
88+
89+
# ---------------------------------------------------------------------------
90+
# 4. Progress line removal
91+
# ---------------------------------------------------------------------------
92+
93+
# Matches lines that look like progress indicators:
94+
# - contain a percentage (e.g. " 45%", "100%")
95+
# - or a progress bar (e.g. [#### ], [=====> ])
96+
# - or common spinners (|, /, -, \)
97+
_PERCENT_RE = re.compile(r"\d{1,3}%")
98+
_PROGRESS_BAR_RE = re.compile(r"\[[\s#=\->]+\]")
99+
_SPINNER_RE = re.compile(r"^[\s]*[|/\\\-][\s]*$")
100+
101+
102+
def _collapse_progress_lines(lines: list[str]) -> list[str]:
103+
"""Remove intermediate progress lines, keeping only the last in a run."""
104+
if not lines:
105+
return lines
106+
result: list[str] = []
107+
progress_run: list[str] = []
108+
for line in lines:
109+
if _is_progress_line(line):
110+
progress_run.append(line)
111+
else:
112+
if progress_run:
113+
result.append(progress_run[-1])
114+
progress_run = []
115+
result.append(line)
116+
if progress_run:
117+
result.append(progress_run[-1])
118+
return result
119+
120+
121+
def _is_progress_line(line: str) -> bool:
122+
"""Heuristic: does *line* look like a progress indicator?"""
123+
if _SPINNER_RE.match(line):
124+
return True
125+
if _PERCENT_RE.search(line) and _PROGRESS_BAR_RE.search(line):
126+
return True
127+
return False
128+
129+
130+
# ---------------------------------------------------------------------------
131+
# 5. Timestamp prefix stripping
132+
# ---------------------------------------------------------------------------
133+
134+
# Common log timestamp patterns at the start of a line:
135+
# 2026-04-10T12:00:01.123Z | 2026-04-10 12:00:01 | [12:00:01] |
136+
# Apr 10 12:00:01
137+
_TIMESTAMP_RE = re.compile(
138+
r"^("
139+
r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}[\.\d]*[Z]?\s*"
140+
r"|\[\d{2}:\d{2}:\d{2}\]\s*"
141+
r"|[A-Z][a-z]{2}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s*"
142+
r")"
143+
)
144+
145+
146+
def _strip_timestamps(lines: list[str]) -> list[str]:
147+
"""Strip common timestamp prefixes and prepend a note if any were found."""
148+
stripped: list[str] = []
149+
count = 0
150+
for line in lines:
151+
new_line, n = _TIMESTAMP_RE.subn("", line, count=1)
152+
count += n
153+
stripped.append(new_line)
154+
if count > 0:
155+
stripped.insert(0, f"[timestamps stripped from {count} lines]")
156+
return stripped
157+
158+
159+
# ---------------------------------------------------------------------------
160+
# Public API
161+
# ---------------------------------------------------------------------------
162+
163+
164+
def readable_compress(text: str) -> str:
165+
"""Apply all readable compression passes and return compressed text."""
166+
text = _strip_ansi(text)
167+
lines = text.splitlines()
168+
lines = _normalize_whitespace(lines)
169+
lines = _collapse_repeated_lines(lines)
170+
lines = _collapse_progress_lines(lines)
171+
lines = _strip_timestamps(lines)
172+
return "\n".join(lines)
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Unit tests for bp_utils_readable_compress.
4+
"""
5+
6+
import os
7+
import sys
8+
9+
import pytest
10+
11+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
12+
13+
from smolagents.bp_utils_readable_compress import (
14+
readable_compress,
15+
_strip_ansi,
16+
_normalize_whitespace,
17+
_collapse_repeated_lines,
18+
_collapse_progress_lines,
19+
_strip_timestamps,
20+
)
21+
22+
23+
# ── ANSI stripping ──────────────────────────────────────────────────────
24+
25+
class TestStripAnsi:
26+
def test_removes_colour_codes(self):
27+
assert _strip_ansi("\x1b[31mERROR\x1b[0m") == "ERROR"
28+
29+
def test_removes_cursor_move(self):
30+
assert _strip_ansi("\x1b[2Ahello") == "hello"
31+
32+
def test_plain_text_unchanged(self):
33+
assert _strip_ansi("hello world") == "hello world"
34+
35+
def test_osc_sequence(self):
36+
assert _strip_ansi("\x1b]0;title\x07rest") == "rest"
37+
38+
39+
# ── Whitespace normalization ────────────────────────────────────────────
40+
41+
class TestNormalizeWhitespace:
42+
def test_strips_trailing_spaces(self):
43+
assert _normalize_whitespace(["hello ", "world "]) == ["hello", "world"]
44+
45+
def test_collapses_blank_runs(self):
46+
lines = ["a", "", "", "", "b"]
47+
assert _normalize_whitespace(lines) == ["a", "", "b"]
48+
49+
def test_single_blank_preserved(self):
50+
lines = ["a", "", "b"]
51+
assert _normalize_whitespace(lines) == ["a", "", "b"]
52+
53+
54+
# ── Duplicate collapsing ────────────────────────────────────────────────
55+
56+
class TestCollapseRepeatedLines:
57+
def test_no_repeats(self):
58+
lines = ["a", "b", "c"]
59+
assert _collapse_repeated_lines(lines) == ["a", "b", "c"]
60+
61+
def test_two_repeats_kept(self):
62+
lines = ["a", "a", "b"]
63+
assert _collapse_repeated_lines(lines) == ["a", "a", "b"]
64+
65+
def test_many_repeats_collapsed(self):
66+
lines = ["x"] * 50
67+
result = _collapse_repeated_lines(lines)
68+
assert len(result) == 2
69+
assert result[0] == "x"
70+
assert "49 more" in result[1]
71+
72+
def test_empty_input(self):
73+
assert _collapse_repeated_lines([]) == []
74+
75+
def test_mixed_runs(self):
76+
lines = ["a", "a", "a", "b", "b", "c"]
77+
result = _collapse_repeated_lines(lines)
78+
assert result == ["a", " ... (repeated 2 more times)", "b", "b", "c"]
79+
80+
81+
# ── Progress line removal ──────────────────────────────────────────────
82+
83+
class TestCollapseProgressLines:
84+
def test_percentage_with_bar(self):
85+
lines = [
86+
"Downloading [## ] 20%",
87+
"Downloading [##### ] 50%",
88+
"Downloading [##########] 100%",
89+
"Done.",
90+
]
91+
result = _collapse_progress_lines(lines)
92+
assert result == ["Downloading [##########] 100%", "Done."]
93+
94+
def test_spinner_lines(self):
95+
lines = ["|", "/", "-", "\\", "finished"]
96+
result = _collapse_progress_lines(lines)
97+
assert result == ["\\", "finished"]
98+
99+
def test_normal_lines_unchanged(self):
100+
lines = ["compiling foo.c", "compiling bar.c"]
101+
assert _collapse_progress_lines(lines) == lines
102+
103+
def test_percentage_without_bar_not_matched(self):
104+
lines = ["Test passed: 100% coverage"]
105+
assert _collapse_progress_lines(lines) == lines
106+
107+
def test_empty(self):
108+
assert _collapse_progress_lines([]) == []
109+
110+
111+
# ── Timestamp stripping ─────────────────────────────────────────────────
112+
113+
class TestStripTimestamps:
114+
def test_iso_timestamp(self):
115+
lines = ["2026-04-10T12:00:01Z hello", "2026-04-10T12:00:02Z world"]
116+
result = _strip_timestamps(lines)
117+
assert result[0].startswith("[timestamps stripped")
118+
assert "hello" in result[1]
119+
assert "world" in result[2]
120+
121+
def test_bracketed_time(self):
122+
lines = ["[12:00:01] info message"]
123+
result = _strip_timestamps(lines)
124+
assert len(result) == 2
125+
assert "info message" in result[1]
126+
127+
def test_syslog_timestamp(self):
128+
lines = ["Apr 10 12:00:01 server msg"]
129+
result = _strip_timestamps(lines)
130+
assert "server msg" in result[1]
131+
132+
def test_no_timestamps(self):
133+
lines = ["hello", "world"]
134+
result = _strip_timestamps(lines)
135+
assert result == ["hello", "world"]
136+
137+
138+
# ── Full pipeline ───────────────────────────────────────────────────────
139+
140+
class TestReadableCompress:
141+
def test_full_pipeline(self):
142+
text = (
143+
"\x1b[32m2026-04-10T12:00:01Z Starting build\x1b[0m\n"
144+
"2026-04-10T12:00:02Z Compiling\n"
145+
"Downloading [## ] 20%\n"
146+
"Downloading [##### ] 50%\n"
147+
"Downloading [##########] 100%\n"
148+
"line\n"
149+
"line\n"
150+
"line\n"
151+
"line\n"
152+
"line\n"
153+
"\n"
154+
"\n"
155+
"\n"
156+
"done"
157+
)
158+
result = readable_compress(text)
159+
# ANSI stripped
160+
assert "\x1b" not in result
161+
# Timestamps stripped
162+
assert "[timestamps stripped" in result
163+
# Progress collapsed — only 100% kept
164+
assert "20%" not in result
165+
assert "100%" in result
166+
# Duplicate lines collapsed
167+
assert "repeated" in result
168+
# Blank line run collapsed
169+
assert "\n\n\n" not in result
170+
# Content preserved
171+
assert "done" in result
172+
173+
def test_plain_text_passthrough(self):
174+
text = "hello\nworld"
175+
assert readable_compress(text) == "hello\nworld"

0 commit comments

Comments
 (0)