stderr handling

AdamGS · AdamGS · commit 7ca7c99bc17e · 2026-04-16T14:10:22.000+01:00
Signed-off-by: Adam Gutglick &lt;adam@spiraldb.com&gt;
diff --git a/bench-orchestrator/bench_orchestrator/runner/executor.py b/bench-orchestrator/bench_orchestrator/runner/executor.py
@@ -3,7 +3,9 @@
 
 """Benchmark binary execution."""
 
+import selectors
 import subprocess
+from collections import deque
 from collections.abc import Callable
 from pathlib import Path
 from typing import final
@@ -126,7 +128,8 @@ def run(
         if self.verbose:
             console.print(f"[dim]$ {' '.join(cmd)}[/dim]")
 
-        results = []
+        results: list[str] = []
+        diagnostic_lines: deque[str] = deque(maxlen=200)
 
         with Progress(
             SpinnerColumn(),
@@ -136,28 +139,51 @@ def run(
         ) as progress:
             _task = progress.add_task(f"Running {self.backend.value} {benchmark.value}...", total=None)
 
+            # Merge stderr into stdout so verbose benchmark logs cannot fill a separate pipe and
+            # block the child process before it emits JSON results.
             process = subprocess.Popen(
                 cmd,
                 stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
                 text=True,
+                bufsize=1,
             )
 
             assert process.stdout is not None
-            for line in iter(process.stdout.readline, ""):
-                line = line.strip()
-                if line:
-                    results.append(line)
-                    if on_result:
-                        on_result(line)
+            selector = selectors.DefaultSelector()
+            selector.register(process.stdout, selectors.EVENT_READ)
+
+            try:
+                while selector.get_map():
+                    for key, _mask in selector.select(timeout=0.1):
+                        line = key.fileobj.readline()
+                        if line == "":
+                            selector.unregister(key.fileobj)
+                            continue
+
+                        line = line.rstrip()
+                        if not line:
+                            continue
+
+                        if line.startswith("{"):
+                            results.append(line)
+                            if on_result:
+                                on_result(line)
+                        else:
+                            diagnostic_lines.append(line)
+                            console.print(line, markup=False)
+            finally:
+                selector.close()
 
             ret_code = process.wait()
 
             if ret_code != 0:
-                stderr = process.stderr.read() if process.stderr else ""
                 console.print(f"[red]Benchmark failed with code {process.returncode}[/red]")
-                if stderr:
-                    console.print(f"[red]{stderr}[/red]")
-                raise RuntimeError(f"Benchmark {self.backend.value} {benchmark.value} failed: {stderr}")
+                diagnostics = "\n".join(diagnostic_lines)
+                if diagnostics:
+                    console.print(f"[red]{diagnostics}[/red]")
+                raise RuntimeError(
+                    f"Benchmark {self.backend.value} {benchmark.value} failed: {diagnostics}"
+                )
 
         return results
diff --git a/bench-orchestrator/tests/test_executor.py b/bench-orchestrator/tests/test_executor.py
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright the Vortex contributors
 
+import sys
+import textwrap
 from pathlib import Path
 
 from bench_orchestrator.config import Benchmark, ExecutionBackend, Format
@@ -44,3 +46,33 @@ def test_build_command_omits_formats_for_lance_backend() -> None:
     assert "--formats" not in cmd
     assert "--queries" in cmd
     assert "1,3" in cmd
+
+
+def test_run_streams_logs_without_counting_them(tmp_path: Path) -> None:
+    script = tmp_path / "fake-bench.py"
+    script.write_text(
+        textwrap.dedent(
+            f"""\
+            #!{sys.executable}
+            import sys
+
+            print("preparing duckdb tables", file=sys.stderr, flush=True)
+            print('{{"engine":"duckdb","format":"parquet","query":0}}', flush=True)
+            print("finished query 0", file=sys.stderr, flush=True)
+            """
+        )
+    )
+    script.chmod(0o755)
+
+    executor = BenchmarkExecutor(script, ExecutionBackend.DUCKDB)
+    streamed: list[str] = []
+
+    results = executor.run(
+        benchmark=Benchmark.CLICKBENCH,
+        formats=[Format.PARQUET],
+        iterations=1,
+        on_result=streamed.append,
+    )
+
+    assert results == ['{"engine":"duckdb","format":"parquet","query":0}']
+    assert streamed == results