Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions backend/glossa_lab/api/research_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,30 +175,39 @@ async def start_loop(
async def event_stream():
"""Run the loop in a thread via a queue, persist + stream per cycle."""
queue: asyncio.Queue[dict[str, Any] | None] = asyncio.Queue()
producer_error: list[Exception] = [] # capture worker thread errors

def _producer():
"""Runs in worker thread — puts entries on the queue."""
try:
for entry in loop.run():
queue.put_nowait(entry)
except Exception as exc: # noqa: BLE001
producer_error.append(exc)
finally:
queue.put_nowait(None) # sentinel

# Start the producer in a background thread
task = asyncio.get_event_loop().run_in_executor(None, _producer)

import time as _time
t0 = _time.monotonic()
cycles_done = 0
last_experiment = ""
timed_out = False
while True:
# Wait for next entry (with timeout so we don't hang forever)
try:
entry = await asyncio.wait_for(queue.get(), timeout=120)
except asyncio.TimeoutError:
timed_out = True
break

if entry is None: # producer finished
break

cycles_done += 1
last_experiment = entry.get("experiment", last_experiment)
yield f"data: {json.dumps(entry)}\n\n"

# Persist state from async context (no thread issues)
Expand All @@ -214,6 +223,26 @@ def _producer():
# Wait for producer thread to finish
await task

elapsed = _time.monotonic() - t0

# If the loop failed or timed out, emit an error SSE event
if timed_out or producer_error:
reason = "timeout" if timed_out else str(producer_error[0])
err_event = {
"type": "error",
"reason": reason,
"cycles_completed": cycles_done,
"last_experiment": last_experiment,
"elapsed_seconds": round(elapsed, 1),
}
yield f"data: {json.dumps(err_event)}\n\n"
if job_id and db:
try:
await db.update_job_status(job_id, "failed")
except Exception: # noqa: BLE001
pass
return # skip synthesis on failure

# Final persist (before foundation check so history is durable)
await _persist(loop)

Expand Down
89 changes: 89 additions & 0 deletions backend/scripts/cleanup_hypotheses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""Delete 6 noise hypotheses from glossa.db.

These are LLM-generated meta-action entries (e.g. "Plan chain",
"Create Hypothesis Chain") that pollute the hypothesis tracker.

Usage:
python backend/scripts/cleanup_hypotheses.py [--db PATH]

Defaults to backend/data/glossa.db relative to repo root.
"""
from __future__ import annotations

import argparse
import sqlite3
import sys
from pathlib import Path

NOISE_PREFIXES = [
"e6ab5842", # Plan chain
"b8d69b0b", # Create Hypothesis Chain
"0660e244", # Create Hypothesis for Dravidian
"4e25f46c", # Plan experiment chain for Indus
"bf24fa5e", # Propose experiment chain for Indus
"b1f04723", # Plan an experiment chain for astronomical
]

DEFAULT_DB = Path(__file__).resolve().parents[1] / "data" / "glossa.db"


def main() -> int:
parser = argparse.ArgumentParser(description="Delete noise hypotheses")
parser.add_argument("--db", type=Path, default=DEFAULT_DB,
help="Path to glossa.db")
args = parser.parse_args()

db_path: Path = args.db
if not db_path.exists():
print(f"ERROR: database not found at {db_path}", file=sys.stderr)
return 1

conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
cur = conn.cursor()

# Confirm rows exist before deleting
print("=== Pre-delete check ===")
found_ids: list[str] = []
for prefix in NOISE_PREFIXES:
rows = cur.execute(
"SELECT id, substr(title, 1, 80) FROM hypotheses WHERE id LIKE ?",
(prefix + "%",),
).fetchall()
for row in rows:
found_ids.append(row[0])
print(f" FOUND: {row[0]} | {row[1]}")
if not rows:
print(f" NOT FOUND: {prefix}%")

if not found_ids:
print("\nNo matching rows — nothing to delete.")
conn.close()
return 0

# Delete
placeholders = ",".join("?" for _ in found_ids)
cur.execute(f"DELETE FROM hypotheses WHERE id IN ({placeholders})", found_ids)
deleted = cur.rowcount
conn.commit()

# Verify
print(f"\n=== Deleted {deleted} row(s) ===")
for prefix in NOISE_PREFIXES:
remaining = cur.execute(
"SELECT COUNT(*) FROM hypotheses WHERE id LIKE ?",
(prefix + "%",),
).fetchone()[0]
if remaining:
print(f" WARNING: {prefix}% still has {remaining} row(s)")

total = cur.execute("SELECT COUNT(*) FROM hypotheses").fetchone()[0]
print(f"Total hypotheses remaining: {total}")

conn.close()
return 0


if __name__ == "__main__":
sys.exit(main())
Loading