Skip to content

Commit 6005000

Browse files
committed
removed timeout waiting for NDJSON, change workflow timeout to 30mins
Signed-off-by: Lior Sventitzky <liorsve@amazon.com>
1 parent e7fc12f commit 6005000

2 files changed

Lines changed: 8 additions & 18 deletions

File tree

.github/workflows/benchmark.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ env:
6565
jobs:
6666
benchmark:
6767
runs-on: [self-hosted, Linux, x86, ephemeral, metal]
68-
timeout-minutes: 120
68+
timeout-minutes: 30
6969

7070
steps:
7171
- uses: actions/checkout@v4

.github/workflows/benchmark_orchestrator.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -349,25 +349,10 @@ def __init__(self, metrics_path: Path):
349349
self.watcher_thread: Optional[threading.Thread] = None
350350
self.phase_records: dict = {}
351351

352-
def start(self, timeout_seconds: int = 600,
353-
benchmark_proc: Optional[subprocess.Popen] = None):
352+
def start(self, benchmark_proc: Optional[subprocess.Popen] = None):
354353
print(f"Waiting for metrics file: {self.metrics_path}")
355-
start_time = time.time()
354+
last_status_time = time.time()
356355
while not self.metrics_path.exists():
357-
if time.time() - start_time > timeout_seconds:
358-
# Try to capture benchmark output if process provided
359-
if benchmark_proc:
360-
benchmark_proc.terminate()
361-
try:
362-
stdout, stderr = benchmark_proc.communicate(timeout=5)
363-
print("=== Benchmark stdout ===")
364-
print(stdout.decode() if stdout else "(empty)")
365-
print("=== Benchmark stderr ===")
366-
print(stderr.decode() if stderr else "(empty)")
367-
except Exception as e:
368-
print(f"Could not capture benchmark output: {e}")
369-
raise RuntimeError(
370-
f"Timeout waiting for metrics file: {self.metrics_path}")
371356
# Check if benchmark process died
372357
if benchmark_proc and benchmark_proc.poll() is not None:
373358
stdout, stderr = benchmark_proc.communicate()
@@ -378,6 +363,11 @@ def start(self, timeout_seconds: int = 600,
378363
print(stderr.decode() if stderr else "(empty)")
379364
raise RuntimeError(
380365
f"Benchmark process died with exit code {benchmark_proc.returncode}")
366+
# Print status every 30 seconds
367+
if time.time() - last_status_time > 30:
368+
elapsed = int(time.time() - last_status_time)
369+
print(f"Still waiting for metrics file... (benchmark process running)")
370+
last_status_time = time.time()
381371
time.sleep(0.1)
382372

383373
self.tail_process = subprocess.Popen(

0 commit comments

Comments
 (0)