Skip to content

Commit d37eb65

Browse files
author
Tooru
committed
Coderabbit fixes
1 parent 8e5bb4c commit d37eb65

10 files changed

Lines changed: 120 additions & 69 deletions

File tree

gui/qa/main.js

Lines changed: 69 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,13 @@ function listenToRun(runId) {
244244
};
245245

246246
socket.onmessage = (event) => {
247-
const data = JSON.parse(event.data);
247+
let data;
248+
try {
249+
data = JSON.parse(event.data);
250+
} catch (e) {
251+
console.error('Failed to parse WebSocket message', e);
252+
return;
253+
}
248254
switch (data.type) {
249255
case 'init':
250256
setupRunView(data.metadata, runId);
@@ -329,19 +335,25 @@ function updateAttemptRow(event) {
329335
row = document.createElement('tr');
330336
row.classList.add('fade-in');
331337
const levelLabel = levelKey && levelKey !== 'base' ? levelKey : '—';
332-
row.innerHTML = `
333-
<td>${event.question_number ?? '—'}</td>
334-
<td>${event.model || '—'}</td>
335-
<td>${levelLabel}</td>
336-
<td class="status-cell"></td>
337-
<td>-</td>
338-
<td>-</td>
339-
<td>-</td>
340-
<td>-</td>
341-
<td class="breakable"></td>
342-
<td class="breakable"></td>
343-
<td></td>
344-
`;
338+
const cellData = [
339+
{ text: event.question_number ?? '—' },
340+
{ text: event.model || '—' },
341+
{ text: levelLabel },
342+
{ text: '', className: 'status-cell' },
343+
{ text: '-' },
344+
{ text: '-' },
345+
{ text: '-' },
346+
{ text: '-' },
347+
{ text: '', className: 'breakable' },
348+
{ text: '', className: 'breakable' },
349+
{ text: '' },
350+
];
351+
cellData.forEach((cell) => {
352+
const td = document.createElement('td');
353+
td.textContent = cell.text;
354+
if (cell.className) td.className = cell.className;
355+
row.appendChild(td);
356+
});
345357
// Track ordering metadata
346358
row.dataset.question = String(event.question_number ?? '0');
347359
row.dataset.level = levelKey;
@@ -516,15 +528,26 @@ async function refreshLeaderboard() {
516528
const levelLabel = row.thinking_level && row.thinking_level !== 'base'
517529
? row.thinking_level
518530
: '—';
519-
tr.innerHTML = `
520-
<td>${row.model_id}</td>
521-
<td>${formatAccuracy(row.accuracy)}</td>
522-
<td>${formatCost(row.cost_usd)}</td>
523-
<td>${row.duration_seconds != null ? Number(row.duration_seconds).toFixed(2) : '—'}</td>
524-
<td>${row.runs ?? '—'}</td>
525-
<td>${levelLabel}</td>
526-
<td><button class="danger" data-model="${row.model_id}">Clear</button></td>
527-
`;
531+
const cellValues = [
532+
row.model_id,
533+
formatAccuracy(row.accuracy),
534+
formatCost(row.cost_usd),
535+
row.duration_seconds != null ? Number(row.duration_seconds).toFixed(2) : '—',
536+
row.runs ?? '—',
537+
levelLabel,
538+
];
539+
cellValues.forEach((text) => {
540+
const td = document.createElement('td');
541+
td.textContent = text;
542+
tr.appendChild(td);
543+
});
544+
const btnCell = document.createElement('td');
545+
const btn = document.createElement('button');
546+
btn.className = 'danger';
547+
btn.dataset.model = row.model_id;
548+
btn.textContent = 'Clear';
549+
btnCell.appendChild(btn);
550+
tr.appendChild(btnCell);
528551
leaderboardBody.appendChild(tr);
529552
});
530553
} catch (error) {
@@ -575,25 +598,38 @@ async function refreshHistory() {
575598
const rows = Array.isArray(data.runs) ? data.runs : [];
576599
if (!rows.length) {
577600
const emptyRow = document.createElement('tr');
578-
emptyRow.innerHTML = '<td colspan="6" class="empty-state">No question runs yet.</td>';
601+
const emptyTd = document.createElement('td');
602+
emptyTd.colSpan = 6;
603+
emptyTd.className = 'empty-state';
604+
emptyTd.textContent = 'No question runs yet.';
605+
emptyRow.appendChild(emptyTd);
579606
historyBody.appendChild(emptyRow);
580607
return;
581608
}
582609
rows.forEach((row) => {
583610
const tr = document.createElement('tr');
584-
tr.innerHTML = `
585-
<td>${row.run_id}</td>
586-
<td>${formatTimestamp(row.timestamp_utc)}</td>
587-
<td>${row.model_id || '—'}</td>
588-
<td>${formatAccuracy(row.accuracy)}</td>
589-
<td>${formatCost(row.total_cost_usd)}</td>
590-
<td>${row.total_duration_seconds != null ? Number(row.total_duration_seconds).toFixed(2) : '—'}</td>
591-
`;
611+
const cells = [
612+
row.run_id,
613+
formatTimestamp(row.timestamp_utc),
614+
row.model_id || '—',
615+
formatAccuracy(row.accuracy),
616+
formatCost(row.total_cost_usd),
617+
row.total_duration_seconds != null ? Number(row.total_duration_seconds).toFixed(2) : '—',
618+
];
619+
cells.forEach((text) => {
620+
const td = document.createElement('td');
621+
td.textContent = text;
622+
tr.appendChild(td);
623+
});
592624
historyBody.appendChild(tr);
593625
});
594626
} catch (error) {
595627
const row = document.createElement('tr');
596-
row.innerHTML = `<td colspan="6" class="error">Failed to load history: ${error.message}</td>`;
628+
const td = document.createElement('td');
629+
td.colSpan = 6;
630+
td.className = 'error';
631+
td.textContent = `Failed to load history: ${error.message}`;
632+
row.appendChild(td);
597633
historyBody.appendChild(row);
598634
}
599635
}

harness/async_client.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,23 @@ def __init__(
2929
self._timeout = timeout_seconds
3030
self._session: Optional[aiohttp.ClientSession] = None
3131

32+
async def __aenter__(self) -> "ResilientOpenRouterClient":
33+
if self._session is None:
34+
timeout = aiohttp.ClientTimeout(total=self._timeout)
35+
connector = aiohttp.TCPConnector(limit=10)
36+
self._session = aiohttp.ClientSession(timeout=timeout, connector=connector)
37+
return self
38+
39+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
40+
await self.close()
41+
3242
@asynccontextmanager
3343
async def session(self) -> AsyncIterator[aiohttp.ClientSession]:
3444
if self._session is None:
3545
timeout = aiohttp.ClientTimeout(total=self._timeout)
3646
connector = aiohttp.TCPConnector(limit=10)
3747
self._session = aiohttp.ClientSession(timeout=timeout, connector=connector)
38-
try:
39-
yield self._session
40-
finally:
41-
# caller is responsible for closing via close()
42-
...
48+
yield self._session
4349

4450
async def close(self) -> None:
4551
if self._session is not None:

harness/run_harness.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ def wrap_content(content: str):
637637
else:
638638
try:
639639
data = response.json()
640-
except (requests.exceptions.JSONDecodeError, ValueError) as exc:
640+
except (requests.exceptions.JSONDecodeError, ValueError):
641641
content_type = response.headers.get("content-type", "unknown")
642642
body_preview = response.text.strip()
643643
if len(body_preview) > 512:
@@ -801,8 +801,8 @@ def _run_patch_command(args: List[str], patch_bytes: bytes, workspace_path: Path
801801
check=False,
802802
timeout=timeout,
803803
)
804-
except subprocess.TimeoutExpired:
805-
raise HarnessError("Failed to apply patch: patch command timed out")
804+
except subprocess.TimeoutExpired as exc:
805+
raise HarnessError("Failed to apply patch: patch command timed out") from exc
806806

807807

808808
def _normalize_patch_path(path: str) -> Optional[str]:
@@ -829,7 +829,6 @@ def _extract_full_file_rewrites(cleaned_patch: str) -> Optional[Dict[str, str]]:
829829
if not line.startswith('--- '):
830830
i += 1
831831
continue
832-
old_path = line[4:].strip()
833832
i += 1
834833
if i >= len(lines) or not lines[i].startswith('+++ '):
835834
return None

harness/secure_execution.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22

33
from __future__ import annotations
44

5+
import logging
56
import resource
67
import subprocess
78
from pathlib import Path
89
from typing import Iterable, Optional
910

11+
logger = logging.getLogger(__name__)
12+
1013

1114
def _resolve_command(command: Iterable[str]) -> list[str]:
1215
resolved: list[str] = []
@@ -38,11 +41,11 @@ def set_limits() -> None:
3841
try:
3942
resource.setrlimit(resource.RLIMIT_AS, (memory_bytes, memory_bytes))
4043
except (ValueError, OSError):
41-
pass
44+
logger.warning("Failed to set memory limit to %dMB", max_memory_mb)
4245
try:
4346
resource.setrlimit(resource.RLIMIT_CPU, (timeout, timeout + 1))
4447
except (ValueError, OSError):
45-
pass
48+
logger.warning("Failed to set CPU limit to %ds", timeout)
4649

4750
process_env = env.copy() if env else None
4851

server/caching.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import asyncio
66
from dataclasses import dataclass
7-
from datetime import datetime, timedelta
7+
from datetime import datetime, timedelta, timezone
88
from typing import Any, Dict, Optional
99

1010

@@ -14,7 +14,7 @@ class CacheEntry:
1414
expires_at: datetime
1515

1616
def is_expired(self) -> bool:
17-
return datetime.utcnow() > self.expires_at
17+
return datetime.now(timezone.utc) > self.expires_at
1818

1919

2020
class AsyncCache:
@@ -33,7 +33,7 @@ async def get(self, key: str) -> Optional[Any]:
3333

3434
async def set(self, key: str, value: Any, ttl_seconds: int = 300) -> None:
3535
async with self._lock:
36-
expires_at = datetime.utcnow() + timedelta(seconds=ttl_seconds)
36+
expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl_seconds)
3737
self._cache[key] = CacheEntry(value=value, expires_at=expires_at)
3838

3939
async def invalidate(self, key: str) -> None:

server/database.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import json
55
from contextlib import contextmanager
66
from pathlib import Path
7-
from typing import Any, Dict, Iterable, List, Optional
7+
from typing import Any, Dict, Iterable, Iterator, List, Optional
88

99
from sqlalchemy import Column, DateTime, Float, ForeignKey, Integer, String, Text, create_engine, select
1010
from sqlalchemy.orm import Mapped, Session, declarative_base, mapped_column, relationship, sessionmaker
@@ -70,7 +70,7 @@ def init_db() -> None:
7070

7171

7272
@contextmanager
73-
def get_session() -> Session:
73+
def get_session() -> Iterator[Session]:
7474
session = SessionLocal()
7575
try:
7676
yield session
@@ -233,8 +233,6 @@ def _is_better(candidate: Dict[str, Any], incumbent: Optional[Dict[str, Any]]) -
233233
if not attempts:
234234
continue
235235
default_level = summary.get("thinking_level")
236-
metrics = summary.get("metrics") or {}
237-
accuracy_map = metrics.get("model_accuracy") or {}
238236
# Group attempts per (model, thinking level)
239237
per_model_level: Dict[tuple[str, str], List[Dict[str, Any]]] = {}
240238
for attempt in attempts:

server/qa_database.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,6 @@ class QAAttemptRecord(Base):
6666

6767
engine = create_engine(
6868
f"sqlite:///{DB_PATH}",
69-
pool_size=settings.database.pool_size,
70-
max_overflow=settings.database.max_overflow,
7169
echo=settings.database.echo,
7270
)
7371
SessionLocal = sessionmaker(bind=engine, expire_on_commit=False, class_=Session)

server/qa_progress.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import asyncio
66
import datetime as dt
7+
import threading
78
import uuid
89
from typing import Any, Dict
910

@@ -12,6 +13,7 @@ class ProgressManager:
1213
def __init__(self) -> None:
1314
self._runs: Dict[str, Dict[str, Any]] = {}
1415
self._lock = asyncio.Lock()
16+
self._sync_lock = threading.RLock()
1517

1618
def generate_run_id(self) -> str:
1719
timestamp = dt.datetime.now(dt.timezone.utc).strftime("qa_%Y%m%dT%H%M%SZ")
@@ -52,28 +54,31 @@ async def unsubscribe(self, run_id: str, queue: asyncio.Queue) -> None:
5254
del self._runs[run_id]
5355

5456
def _publish(self, run_id: str, event: Dict[str, Any]) -> None:
55-
run = self._runs.get(run_id)
56-
if not run:
57-
return
58-
run["events"].append(event)
59-
for queue in list(run["queues"]):
60-
queue.put_nowait(event)
57+
with self._sync_lock:
58+
run = self._runs.get(run_id)
59+
if not run:
60+
return
61+
run["events"].append(event)
62+
for queue in list(run["queues"]):
63+
queue.put_nowait(event)
6164

6265
def publish_attempt(self, run_id: str, event: Dict[str, Any]) -> None:
6366
payload = {"type": "attempt", **event}
6467
self._publish(run_id, payload)
6568

6669
def complete(self, run_id: str, summary: Dict[str, Any]) -> None:
67-
self._publish(run_id, {"type": "complete", "summary": summary})
68-
run = self._runs.get(run_id)
69-
if run:
70-
run["done"] = True
70+
with self._sync_lock:
71+
self._publish(run_id, {"type": "complete", "summary": summary})
72+
run = self._runs.get(run_id)
73+
if run:
74+
run["done"] = True
7175

7276
def fail(self, run_id: str, message: str) -> None:
73-
self._publish(run_id, {"type": "error", "message": message})
74-
run = self._runs.get(run_id)
75-
if run:
76-
run["done"] = True
77+
with self._sync_lock:
78+
self._publish(run_id, {"type": "error", "message": message})
79+
run = self._runs.get(run_id)
80+
if run:
81+
run["done"] = True
7782

7883

7984
qa_progress_manager = ProgressManager()

server/routes/qa_router.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
logger = structlog.get_logger(__name__)
2626

2727
router = APIRouter(prefix="/qa", tags=["qa"])
28+
_background_tasks: set = set()
2829

2930

3031
class QARunLaunchResponse(BaseModel):
@@ -221,7 +222,9 @@ async def runner() -> None:
221222
save_run(summary)
222223
qa_progress_manager.complete(run_id, summary)
223224

224-
asyncio.create_task(runner())
225+
task = asyncio.create_task(runner())
226+
_background_tasks.add(task)
227+
task.add_done_callback(_background_tasks.discard)
225228
return QARunLaunchResponse(run_id=run_id)
226229

227230

server/routes/router.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
settings = get_settings()
2424
logger = structlog.get_logger(__name__)
2525
HARNESS_SETTINGS = get_harness_settings()
26+
_background_tasks: set = set()
2627
DEFAULT_ALLOW_INCOMPLETE_DIFFS = HARNESS_SETTINGS.allow_incomplete_diffs
2728
DEFAULT_ALLOW_DIFF_REWRITE_FALLBACK = HARNESS_SETTINGS.allow_diff_rewrite_fallback
2829
DEFAULT_MAX_TOKENS = HARNESS_SETTINGS.default_max_tokens
@@ -203,7 +204,9 @@ async def runner() -> None:
203204
database.save_run(summary)
204205
progress_manager.complete(run_id, summary)
205206

206-
asyncio.create_task(runner())
207+
task = asyncio.create_task(runner())
208+
_background_tasks.add(task)
209+
task.add_done_callback(_background_tasks.discard)
207210
return RunLaunchResponse(run_id=run_id)
208211

209212

0 commit comments

Comments
 (0)