Skip to content

Commit f612822

Browse files
fix: prevent sticky progress bar ghost lines from terminal wrapping (#565)
* fix: prevent sticky progress bar ghost lines from terminal wrapping When a formatted bar line exceeded the terminal width, it wrapped to multiple physical lines but _drawn_lines only counted 1 per bar. Subsequent cursor-up clears missed the wrapped portions, leaving ghost lines that accumulated every redraw cycle. - Count physical lines in _redraw based on visible width vs terminal width - Cap rate at 9999.9 and eta at 999s to prevent stats field overflow - Remove max(10, ...) bar_width floor; degrade gracefully on narrow terminals - Add update_many() for batch updates with a single redraw cycle - Use update_many() in AsyncProgressReporter to reduce N redraws to 1 * test: strengthen wrapping and degradation tests per review feedback - Monkeypatch shutil.get_terminal_size to force narrow terminal in tests - Inject oversized lines via _format_bar patch to exercise physical line counting (ceiling division) code path - Assert output line width <= width-1 in graceful degradation test - Assert _drawn_lines == 1 in degradation mode (no false wrapping) * test: flatten test classes and replace private attribute assertions Address review feedback: use flat pytest functions per DEVELOPMENT.md conventions instead of class-based test suites. Move inline imports to module level. Replace most _drawn_lines and _bars assertions with public output proxies (counting CURSOR_UP_CLEAR sequences and checking rendered bar content). Keep _drawn_lines access only where no clean public proxy exists (multi-checkpoint add/remove test, zero-bars-remaining after log_final). * refactor: expose drawn_lines as public read-only property Replace _drawn_lines access in tests with a public property, consistent with the existing is_active pattern.
1 parent 956b8cd commit f612822

3 files changed

Lines changed: 271 additions & 5 deletions

File tree

packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/async_progress_reporter.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,11 @@ def _maybe_report(self) -> None:
101101

102102
def _update_bar(self) -> None:
103103
elapsed = time.perf_counter() - self._start_time
104+
updates: dict[str, tuple[int, int, int]] = {}
104105
for col, tracker in self._trackers.items():
105106
completed, _total, success, failed, _skipped, _pct, _rate, _emoji = tracker.get_snapshot(elapsed)
106-
self._bar.update(col, completed=completed, success=success, failed=failed)
107+
updates[col] = (completed, success, failed)
108+
self._bar.update_many(updates)
107109

108110
def _emit(self) -> None:
109111
current_total = sum(tracker.get_snapshot(0.0)[0] for tracker in self._trackers.values())

packages/data-designer-engine/src/data_designer/engine/dataset_builders/utils/sticky_progress_bar.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from __future__ import annotations
55

66
import logging
7+
import re
78
import shutil
89
import sys
910
import time
@@ -13,6 +14,7 @@
1314

1415
BAR_FILLED = "█"
1516
BAR_EMPTY = "░"
17+
_ANSI_RE = re.compile(r"\033\[[0-9;]*m")
1618

1719

1820
def _compute_stats_width(total: int) -> int:
@@ -67,6 +69,10 @@ def __init__(self, stream: TextIO | None = None) -> None:
6769
def is_active(self) -> bool:
6870
return self._active
6971

72+
@property
73+
def drawn_lines(self) -> int:
74+
return self._drawn_lines
75+
7076
# -- context manager --
7177

7278
def __enter__(self) -> StickyProgressBar:
@@ -108,6 +114,16 @@ def update(
108114
if self._active:
109115
self._redraw()
110116

117+
def update_many(self, updates: dict[str, tuple[int, int, int]]) -> None:
118+
with self._lock:
119+
for key, (completed, success, failed) in updates.items():
120+
if bar := self._bars.get(key):
121+
bar.completed = completed
122+
bar.success = success
123+
bar.failed = failed
124+
if self._active:
125+
self._redraw()
126+
111127
def remove_bar(self, key: str) -> None:
112128
with self._lock:
113129
self._bars.pop(key, None)
@@ -163,23 +179,30 @@ def _redraw(self) -> None:
163179
for bar in self._bars.values():
164180
line = self._format_bar(bar, width, max_label)
165181
self._write(line + "\n")
166-
self._drawn_lines += 1
182+
visible = len(_ANSI_RE.sub("", line))
183+
if width > 0 and visible > width:
184+
self._drawn_lines += (visible + width - 1) // width
185+
else:
186+
self._drawn_lines += 1
167187

168188
def _format_bar(self, bar: _BarState, width: int, label_width: int) -> str:
169189
completed = min(bar.completed, bar.total)
170190
pct = (completed / bar.total * 100) if bar.total > 0 else 100.0
171191
elapsed = time.perf_counter() - bar.start_time
172-
rate = bar.completed / elapsed if elapsed > 0 else 0.0
192+
rate = min(bar.completed / elapsed if elapsed > 0 else 0.0, 9999.9)
173193
remaining = max(0, bar.total - completed)
174-
eta = f"{remaining / rate:.0f}s" if rate > 0 else "?"
194+
eta = f"{min(remaining / rate, 999):.0f}s" if rate > 0 else "?"
175195

176196
label = bar.label.ljust(label_width)
177197
total_w = len(str(bar.total))
178198
count_str = f"{completed:>{total_w}}/{bar.total}"
179199
stats = f" {pct:3.0f}% | {count_str} | {rate:6.1f} rec/s | eta {eta:>4s} | {bar.failed:>{total_w}} failed"
180200
stats = stats.ljust(bar.stats_width)
181201

182-
bar_width = max(10, width - len(label) - bar.stats_width - 4)
202+
bar_width = width - len(label) - bar.stats_width - 4
203+
if bar_width < 1:
204+
return f" {label} {stats}"[: max(0, width - 1)]
205+
183206
filled = int(bar_width * pct / 100)
184207
empty = bar_width - filled
185208

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from __future__ import annotations
5+
6+
import io
7+
import logging
8+
import os
9+
import re
10+
import shutil
11+
from unittest.mock import patch
12+
13+
import pytest
14+
15+
from data_designer.engine.dataset_builders.utils.async_progress_reporter import AsyncProgressReporter
16+
from data_designer.engine.dataset_builders.utils.progress_tracker import ProgressTracker
17+
from data_designer.engine.dataset_builders.utils.sticky_progress_bar import (
18+
StickyProgressBar,
19+
)
20+
21+
CURSOR_UP_CLEAR = "\033[A\033[2K"
22+
HIDE_CURSOR = "\033[?25l"
23+
SHOW_CURSOR = "\033[?25h"
24+
_ALL_ANSI_RE = re.compile(r"\033\[[0-9;?]*[a-zA-Z]")
25+
26+
27+
class FakeTTY(io.StringIO):
28+
"""StringIO that reports itself as a TTY so StickyProgressBar activates."""
29+
30+
def isatty(self) -> bool:
31+
return True
32+
33+
34+
@pytest.fixture
35+
def tty_stream() -> FakeTTY:
36+
return FakeTTY()
37+
38+
39+
def test_no_output_when_not_tty() -> None:
40+
stream = io.StringIO()
41+
with StickyProgressBar(stream=stream) as bar:
42+
bar.add_bar("a", "col_a", 10)
43+
bar.update("a", completed=5, success=5)
44+
assert stream.getvalue() == ""
45+
46+
47+
def test_hides_and_shows_cursor(tty_stream: FakeTTY) -> None:
48+
with StickyProgressBar(stream=tty_stream):
49+
pass
50+
output = tty_stream.getvalue()
51+
assert output.startswith(HIDE_CURSOR)
52+
assert output.endswith(SHOW_CURSOR)
53+
54+
55+
def test_drawn_lines_tracks_add_and_remove(tty_stream: FakeTTY) -> None:
56+
with StickyProgressBar(stream=tty_stream) as bar:
57+
bar.add_bar("a", "col_a", 10)
58+
bar.add_bar("b", "col_b", 10)
59+
bar.add_bar("c", "col_c", 10)
60+
assert bar.drawn_lines == 3
61+
62+
bar.remove_bar("a")
63+
assert bar.drawn_lines == 2
64+
65+
bar.add_bar("d", "col_d", 10)
66+
assert bar.drawn_lines == 3
67+
68+
bar.update("b", completed=5, success=5)
69+
assert bar.drawn_lines == 3
70+
71+
bar.remove_bar("b")
72+
bar.remove_bar("c")
73+
bar.remove_bar("d")
74+
assert bar.drawn_lines == 0
75+
76+
77+
def test_drawn_lines_stable_across_many_updates(tty_stream: FakeTTY) -> None:
78+
with StickyProgressBar(stream=tty_stream) as bar:
79+
bar.add_bar("a", "col_a", 100)
80+
bar.add_bar("b", "col_b", 100)
81+
bar.add_bar("c", "col_c", 100)
82+
for i in range(50):
83+
bar.update("a", completed=i, success=i)
84+
bar.update("b", completed=i, success=i)
85+
bar.update("c", completed=i, success=i)
86+
87+
snapshot = tty_stream.getvalue()
88+
bar.update("a", completed=50, success=50)
89+
assert tty_stream.getvalue()[len(snapshot) :].count(CURSOR_UP_CLEAR) == 3
90+
91+
92+
def test_log_interleaving_preserves_drawn_lines(tty_stream: FakeTTY) -> None:
93+
root_logger = logging.getLogger()
94+
handler = logging.StreamHandler(tty_stream)
95+
handler.setFormatter(logging.Formatter("%(message)s"))
96+
root_logger.addHandler(handler)
97+
98+
try:
99+
with StickyProgressBar(stream=tty_stream) as bar:
100+
bar.add_bar("x", "col_x", 100)
101+
bar.add_bar("y", "col_y", 100)
102+
bar.add_bar("z", "col_z", 100)
103+
104+
for i in range(20):
105+
bar.update("x", completed=i, success=i)
106+
root_logger.info("log at step %d", i)
107+
bar.update("y", completed=i, success=i)
108+
bar.update("z", completed=i, success=i)
109+
110+
snapshot = tty_stream.getvalue()
111+
bar.update("x", completed=20, success=20)
112+
assert tty_stream.getvalue()[len(snapshot) :].count(CURSOR_UP_CLEAR) == 3
113+
finally:
114+
root_logger.removeHandler(handler)
115+
116+
117+
def test_wrapping_counts_physical_lines(tty_stream: FakeTTY) -> None:
118+
narrow = os.terminal_size((40, 24))
119+
with patch.object(shutil, "get_terminal_size", return_value=narrow):
120+
with StickyProgressBar(stream=tty_stream) as bar:
121+
bar.add_bar("a", "col_a", 100)
122+
bar.add_bar("b", "col_b", 100)
123+
124+
original_format = bar._format_bar
125+
126+
def oversized_format(b: object, width: int, label_width: int) -> str:
127+
line = original_format(b, width, label_width)
128+
return line + "X" * 20
129+
130+
with patch.object(bar, "_format_bar", side_effect=oversized_format):
131+
bar.update("a", completed=5, success=5)
132+
133+
snapshot = tty_stream.getvalue()
134+
bar.update("b", completed=1, success=1)
135+
assert tty_stream.getvalue()[len(snapshot) :].count(CURSOR_UP_CLEAR) > 2
136+
137+
138+
def test_wrapping_stable_across_updates(tty_stream: FakeTTY) -> None:
139+
narrow = os.terminal_size((40, 24))
140+
with patch.object(shutil, "get_terminal_size", return_value=narrow):
141+
with StickyProgressBar(stream=tty_stream) as bar:
142+
bar.add_bar("a", "col_a", 100)
143+
bar.add_bar("b", "col_b", 100)
144+
145+
snapshot = tty_stream.getvalue()
146+
bar.update("a", completed=0, success=0)
147+
initial_clears = tty_stream.getvalue()[len(snapshot) :].count(CURSOR_UP_CLEAR)
148+
149+
for i in range(1, 21):
150+
bar.update("a", completed=i, success=i)
151+
bar.update("b", completed=i, success=i)
152+
153+
snapshot = tty_stream.getvalue()
154+
bar.update("a", completed=21, success=21)
155+
assert tty_stream.getvalue()[len(snapshot) :].count(CURSOR_UP_CLEAR) == initial_clears
156+
157+
158+
def test_narrow_terminal_graceful_degradation(tty_stream: FakeTTY) -> None:
159+
narrow = os.terminal_size((30, 24))
160+
with patch.object(shutil, "get_terminal_size", return_value=narrow):
161+
with StickyProgressBar(stream=tty_stream) as bar:
162+
bar.add_bar("a", "column 'verification_1'", 300)
163+
bar.update("a", completed=50, success=50)
164+
165+
snapshot = tty_stream.getvalue()
166+
bar.update("a", completed=51, success=51)
167+
assert tty_stream.getvalue()[len(snapshot) :].count(CURSOR_UP_CLEAR) == 1
168+
169+
output = tty_stream.getvalue()
170+
clean = _ALL_ANSI_RE.sub("", output).replace("\r", "")
171+
for line in clean.split("\n"):
172+
assert len(line) <= 29
173+
174+
175+
def test_update_many_single_redraw(tty_stream: FakeTTY) -> None:
176+
with StickyProgressBar(stream=tty_stream) as bar:
177+
bar.add_bar("a", "col_a", 100)
178+
bar.add_bar("b", "col_b", 100)
179+
before = tty_stream.getvalue()
180+
181+
bar.update_many({"a": (10, 10, 0), "b": (20, 20, 0)})
182+
after = tty_stream.getvalue()
183+
184+
new_output = after[len(before) :]
185+
assert new_output.count(CURSOR_UP_CLEAR) == 2
186+
187+
clean = _ALL_ANSI_RE.sub("", after)
188+
assert "10/100" in clean
189+
assert "20/100" in clean
190+
191+
192+
def test_update_many_ignores_unknown_keys(tty_stream: FakeTTY) -> None:
193+
with StickyProgressBar(stream=tty_stream) as bar:
194+
bar.add_bar("a", "col_a", 100)
195+
bar.update_many({"a": (10, 10, 0), "unknown": (5, 5, 0)})
196+
197+
clean = _ALL_ANSI_RE.sub("", tty_stream.getvalue())
198+
assert "10/100" in clean
199+
assert "unknown" not in clean
200+
201+
snapshot = tty_stream.getvalue()
202+
bar.update("a", completed=11, success=11)
203+
assert tty_stream.getvalue()[len(snapshot) :].count(CURSOR_UP_CLEAR) == 1
204+
205+
206+
def test_reporter_updates_and_logs_keep_drawn_lines_in_sync(tty_stream: FakeTTY) -> None:
207+
root_logger = logging.getLogger()
208+
handler = logging.StreamHandler(tty_stream)
209+
handler.setFormatter(logging.Formatter("%(message)s"))
210+
root_logger.addHandler(handler)
211+
212+
try:
213+
bar = StickyProgressBar(stream=tty_stream)
214+
trackers = {
215+
"col_a": ProgressTracker(total_records=100, label="column 'a'", quiet=True),
216+
"col_b": ProgressTracker(total_records=100, label="column 'b'", quiet=True),
217+
"col_c": ProgressTracker(total_records=100, label="column 'c'", quiet=True),
218+
}
219+
220+
with bar:
221+
reporter = AsyncProgressReporter(trackers, report_interval=0.1, progress_bar=bar)
222+
reporter.log_start(num_row_groups=1)
223+
224+
snapshot = tty_stream.getvalue()
225+
reporter.record_success("col_a")
226+
assert tty_stream.getvalue()[len(snapshot) :].count(CURSOR_UP_CLEAR) == 3
227+
228+
for i in range(49):
229+
if i % 10 == 0:
230+
root_logger.info("Processing batch %d", i)
231+
reporter.record_success("col_b")
232+
reporter.record_success("col_c")
233+
234+
snapshot = tty_stream.getvalue()
235+
reporter.record_success("col_a")
236+
assert tty_stream.getvalue()[len(snapshot) :].count(CURSOR_UP_CLEAR) == 3
237+
238+
reporter.log_final()
239+
assert bar.drawn_lines == 0
240+
finally:
241+
root_logger.removeHandler(handler)

0 commit comments

Comments
 (0)