diff --git a/.cursor/rules/hosted-web-contract.mdc b/.cursor/rules/hosted-web-contract.mdc index 2bdc237..001ca41 100644 --- a/.cursor/rules/hosted-web-contract.mdc +++ b/.cursor/rules/hosted-web-contract.mdc @@ -54,7 +54,7 @@ One process pair = one MOTO instance (local or sandbox). Env inputs: - `MOTO_INSTANCE_ID`, `MOTO_BACKEND_HOST`/`HOST`, `MOTO_BACKEND_PORT`/`PORT` - `MOTO_DATA_ROOT`, optional `MOTO_LOG_ROOT`, optional `MOTO_SECRET_NAMESPACE` - optional `MOTO_FRONTEND_STORAGE_PREFIX`, optional `MOTO_CORS_ORIGINS`, optional `MOTO_LM_STUDIO_BASE_URL` -- Default desktop launches bind backend and bundled Vite frontend to loopback and require `MOTO_DESKTOP_API_TOKEN` / `VITE_MOTO_DESKTOP_API_TOKEN` on protected HTTP routes. Desktop WebSockets use one-time tickets minted by authenticated `POST /api/ws-ticket`; hosted generic mode continues to use proxy HMAC auth instead. +- Default desktop launches bind backend and bundled Vite frontend to loopback and require `MOTO_DESKTOP_API_TOKEN` / `VITE_MOTO_DESKTOP_API_TOKEN` on protected HTTP routes, except read-only proof certificate exports (`/api/proofs/{id}/certificate[.lean]`) which may be direct local browser downloads. Desktop WebSockets use one-time tickets minted by authenticated `POST /api/ws-ticket`; hosted generic mode continues to use proxy HMAC auth instead. Hosted sandboxes reuse this exact contract (`MOTO_DATA_ROOT=/app/backend/data`). No separate hosted-only env model. diff --git a/.cursor/rules/part-3-autonomous-research-mode.mdc b/.cursor/rules/part-3-autonomous-research-mode.mdc index 0b4599b..cde0aac 100644 --- a/.cursor/rules/part-3-autonomous-research-mode.mdc +++ b/.cursor/rules/part-3-autonomous-research-mode.mdc @@ -1318,7 +1318,7 @@ This file persists the current workflow state to enable **automatic resume** aft - Before completed-paper proof verification (`paper_phase="paper_proof_verification"`) - **During Tier 3 final answer generation phases** -On **clean stop** (user-initiated via stop button), this file is preserved for pause/resume. Only `clear_all_data()` should clear workflow state. `_save_workflow_state()` must preserve the previous `paper_phase` when called without an explicit phase, and only clear the phase when passed `phase=None` intentionally after successful completion. +On **clean stop** (user-initiated via stop button), this file is preserved for pause/resume. Only `clear_all_data()` should clear workflow state. `clear_all_data()` preserves completed session files for history, marks existing sessions non-resumable/history-only, clears pending child-aggregator queue state, and resets live memory path bindings so the next Start creates a fresh session. `_save_workflow_state()` must preserve the previous `paper_phase` when called without an explicit phase, and only clear the phase when passed `phase=None` intentionally after successful completion. On **restart/crash recovery**, if this file exists with a resumable tier/topic/paper (regardless of `is_running`), the system detects an interrupted workflow and: 1. Restores internal state (topic ID, acceptance counts, model config, etc.) @@ -1327,14 +1327,14 @@ On **restart/crash recovery**, if this file exists with a resumable tier/topic/p 4. Detects completed papers paused before proof verification and resumes `paper_proof_verification` before moving on 5. Broadcasts `auto_research_resumed` WebSocket event -If `workflow_state.json` is stale, idle, or missing, session recovery must conservatively synthesize a resume point from durable `session_stats.json`, brainstorm metadata/database files, and in-progress paper metadata/content. This includes scanning `papers/*_metadata.json` for `status="in_progress"` when stats lost `current_paper_id`; the resume phase is detected from saved paper content rather than defaulting to body. +If `workflow_state.json` is stale, idle, or missing, session recovery must conservatively synthesize a resume point from durable `session_stats.json`, brainstorm metadata/database files, and in-progress paper metadata/content unless the session metadata is marked non-resumable/history-only. This includes scanning `papers/*_metadata.json` for `status="in_progress"` when stats lost `current_paper_id`; the resume phase is detected from saved paper content rather than defaulting to body. **Important Notes:** - The user research prompt is saved in `auto_research_metadata.json`, not the workflow state - Model configuration is saved to allow resuming with the same model settings -- If the workflow state file is corrupted or missing, first try durable session-file recovery; start fresh only if no current topic, in-progress paper, completed unpapered brainstorm, completed papers, or active Tier 3 state can be recovered -- The `clear_all_data` API endpoint clears the workflow state along with all other data +- If the workflow state file is corrupted or missing, first try durable session-file recovery; start fresh only if no current topic, in-progress paper, completed unpapered brainstorm, completed papers, or active Tier 3 state can be recovered, and only when the session is not marked non-resumable/history-only +- The `clear_all_data` API endpoint preserves session files for history, marks sessions `resume_disabled=true` / `status="cleared"`, and must fail if any session cannot be marked non-resumable --- diff --git a/backend/api/middleware.py b/backend/api/middleware.py index 1099165..ab6a3f2 100644 --- a/backend/api/middleware.py +++ b/backend/api/middleware.py @@ -3,6 +3,7 @@ """ import hmac import os +import re from urllib.parse import urlparse from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware @@ -31,6 +32,16 @@ ] DESKTOP_API_TOKEN_HEADER = "X-Moto-Desktop-Token" UNSAFE_HTTP_METHODS = {"POST", "PUT", "PATCH", "DELETE"} +DESKTOP_PUBLIC_PROOF_EXPORT_RE = re.compile(r"^/api/proofs/[^/]+/certificate(?:\.lean)?$") + + +def _is_desktop_public_export(method: str, path: str) -> bool: + """Allow direct local browser downloads for read-only generated artifacts.""" + normalized_method = (method or "").upper() + normalized_path = path or "" + if normalized_method not in {"GET", "HEAD"}: + return False + return bool(DESKTOP_PUBLIC_PROOF_EXPORT_RE.fullmatch(normalized_path)) def _origin_from_url(value: str) -> str: @@ -45,6 +56,8 @@ def _validate_desktop_token(request: Request, allowed_origins: list[str]) -> Non """Require the launcher-provided desktop API token outside public routes.""" if is_proxy_auth_allowlisted(request.method, request.url.path): return + if _is_desktop_public_export(request.method, request.url.path): + return expected = (system_config.desktop_api_token or "").strip() if not expected: diff --git a/backend/api/routes/proofs.py b/backend/api/routes/proofs.py index f46333d..a757965 100644 --- a/backend/api/routes/proofs.py +++ b/backend/api/routes/proofs.py @@ -51,6 +51,23 @@ def _safe_path_label(path_value: str) -> str: return "[configured]" +async def _get_export_proof_or_404(proof_id: str): + try: + proof = await proof_database.get_proof(proof_id) + except ValueError: + raise HTTPException(status_code=404, detail="Proof not found") + if proof is None: + raise HTTPException(status_code=404, detail="Proof not found") + return proof + + +async def _get_export_lean_code(proof_id: str) -> str: + try: + return await proof_database.get_lean_code(proof_id) + except ValueError: + raise HTTPException(status_code=404, detail="Proof not found") + + def _build_model_config(role: ProofRoleConfigSnapshot) -> ModelConfig: return ModelConfig( provider=role.provider, @@ -530,21 +547,19 @@ async def get_library_proof(session_id: str, proof_id: str): @router.get("/{proof_id}/certificate") async def get_proof_certificate(proof_id: str): """Return a machine-readable proof certificate JSON payload.""" - proof = await proof_database.get_proof(proof_id) - if proof is None: - raise HTTPException(status_code=404, detail="Proof not found") + proof = await _get_export_proof_or_404(proof_id) lean_version = "" mathlib_commit = "" if system_config.lean4_enabled: try: client = get_lean4_client() - lean_version = await client.get_version() + lean_version = await asyncio.wait_for(client.get_version(), timeout=5.0) mathlib_commit = client.get_mathlib_commit() - except Exception: - pass + except (asyncio.TimeoutError, Exception) as exc: + logger.warning("Lean 4 certificate metadata lookup timed out or failed: %s", exc) - lean_code = await proof_database.get_lean_code(proof_id) + lean_code = await _get_export_lean_code(proof_id) payload = { "proof_id": proof.proof_id, "theorem_statement": proof.theorem_statement, @@ -574,11 +589,9 @@ async def get_proof_certificate(proof_id: str): @router.get("/{proof_id}/certificate.lean") async def get_proof_certificate_lean(proof_id: str): """Return the raw saved Lean file for a proof.""" - proof = await proof_database.get_proof(proof_id) - if proof is None: - raise HTTPException(status_code=404, detail="Proof not found") + proof = await _get_export_proof_or_404(proof_id) - lean_code = await proof_database.get_lean_code(proof_id) + lean_code = await _get_export_lean_code(proof_id) return PlainTextResponse( content=lean_code or proof.lean_code, headers={ diff --git a/backend/autonomous/core/autonomous_coordinator.py b/backend/autonomous/core/autonomous_coordinator.py index 3805d1c..4d5c764 100644 --- a/backend/autonomous/core/autonomous_coordinator.py +++ b/backend/autonomous/core/autonomous_coordinator.py @@ -6646,9 +6646,11 @@ async def clear_all_data(self) -> None: if self._running or self._state.is_running: raise RuntimeError("Cannot clear data while running") + import json import shutil import time from pathlib import Path + from backend.aggregator.core.queue_manager import queue_manager # Wait briefly for any pending async file operations to complete await asyncio.sleep(0.3) @@ -6677,12 +6679,57 @@ def safe_rmtree(path: Path, max_retries: int = 5) -> bool: raise return False - # Step 0: Clear all session workflow states (prevents resume from old sessions) + # Step 0: Make existing sessions history-only so completed work stays + # browsable but durable recovery will not restart it as live work. try: sessions_dir = Path(system_config.auto_sessions_base_dir) + cleared_session_count = 0 + session_mark_failures = [] if sessions_dir.exists(): for session_dir in sessions_dir.iterdir(): if session_dir.is_dir(): + now = datetime.now().isoformat() + metadata_path = session_dir / "session_metadata.json" + metadata = {} + if metadata_path.exists(): + try: + async with aiofiles.open(metadata_path, 'r', encoding='utf-8') as f: + raw_metadata = await f.read() + metadata = json.loads(raw_metadata) if raw_metadata.strip() else {} + except Exception as e: + logger.warning(f"Could not read session metadata for {session_dir.name}: {e}") + + metadata.setdefault("session_id", session_dir.name) + if not metadata.get("user_prompt") and metadata.get("user_research_prompt"): + metadata["user_prompt"] = metadata.get("user_research_prompt") + metadata["status"] = "cleared" + metadata["resume_disabled"] = True + metadata["cleared_at"] = now + metadata["last_updated"] = now + + try: + async with aiofiles.open(metadata_path, 'w', encoding='utf-8') as f: + await f.write(json.dumps(metadata, indent=2)) + cleared_session_count += 1 + except Exception as e: + message = f"Could not mark session as cleared for {session_dir.name}: {e}" + session_mark_failures.append(message) + logger.error(message) + + stats_path = session_dir / "session_stats.json" + if stats_path.exists(): + try: + async with aiofiles.open(stats_path, 'r', encoding='utf-8') as f: + raw_stats = await f.read() + stats = json.loads(raw_stats) if raw_stats.strip() else {} + stats["current_brainstorm_id"] = None + stats["current_paper_id"] = None + stats["last_updated"] = now + async with aiofiles.open(stats_path, 'w', encoding='utf-8') as f: + await f.write(json.dumps(stats, indent=2)) + except Exception as e: + logger.warning(f"Could not clear active stats for {session_dir.name}: {e}") + workflow_state_file = session_dir / "workflow_state.json" if workflow_state_file.exists(): try: @@ -6691,10 +6738,33 @@ def safe_rmtree(path: Path, max_retries: int = 5) -> bool: except Exception as e: # Non-critical: workflow state files are small logger.warning(f"Could not clear workflow state for {session_dir.name}: {e}") - logger.info("Cleared all session workflow states") + if session_mark_failures: + critical_errors.append( + "Failed to mark one or more sessions non-resumable: " + + "; ".join(session_mark_failures) + ) + else: + successes.append(f"Marked {cleared_session_count} session(s) as history-only") + logger.info("Marked session histories as non-resumable and cleared workflow states") except Exception as e: - errors.append(f"Failed to clear session workflow states: {e}") - logger.error(errors[-1]) + critical_errors.append(f"Failed to mark sessions history-only: {e}") + logger.error(critical_errors[-1]) + + # Step 0b: Reset live path bindings before clearing legacy state. + # Session files remain as history; current Stage 1/2 views should read + # from the empty legacy roots until the next Start creates a new session. + try: + await session_manager.clear() + brainstorm_memory.set_session_manager(None) + paper_library.set_session_manager(None) + research_metadata.set_session_manager(None) + final_answer_memory.set_session_manager(None) + proof_database.set_session_manager(None) + successes.append("Reset live session path bindings") + logger.info("Reset live session path bindings after clear") + except Exception as e: + errors.append(f"Failed to reset live session path bindings: {e}") + logger.warning(errors[-1]) # Step 1: Clear brainstorms directory try: @@ -6774,6 +6844,15 @@ def safe_rmtree(path: Path, max_retries: int = 5) -> bool: # Critical: RAG state affects future operations critical_errors.append(f"Failed to clear RAG state: {e}") logger.error(critical_errors[-1]) + + # Step 7b: Clear any queued submissions left by cancelled child aggregators. + try: + await queue_manager.clear() + successes.append("Cleared pending submission queue") + logger.info("Cleared pending submission queue") + except Exception as e: + errors.append(f"Failed to clear pending submission queue: {e}") + logger.warning(errors[-1]) # Step 8: Reset internal state self._current_topic_id = None @@ -6801,16 +6880,6 @@ def safe_rmtree(path: Path, max_retries: int = 5) -> bool: # Step 9: Reset state object self._state = AutonomousResearchState() - # Step 10: Clear session manager state - try: - await session_manager.clear() - successes.append("Cleared session manager state") - logger.info("Cleared session manager state") - except Exception as e: - # Non-critical: session manager will reset on next start - errors.append(f"Failed to clear session manager: {e}") - logger.warning(errors[-1]) - # Report results with graceful degradation success_count = len(successes) error_count = len(errors) diff --git a/backend/autonomous/memory/brainstorm_memory.py b/backend/autonomous/memory/brainstorm_memory.py index a43fd76..7fbf069 100644 --- a/backend/autonomous/memory/brainstorm_memory.py +++ b/backend/autonomous/memory/brainstorm_memory.py @@ -39,6 +39,9 @@ def set_session_manager(self, session_manager) -> None: if session_manager and session_manager.is_session_active: self._base_dir = session_manager.get_brainstorms_dir() logger.info(f"Brainstorm memory using session path: {self._base_dir}") + else: + self._base_dir = Path(system_config.auto_brainstorms_dir) + logger.info(f"Brainstorm memory using legacy path: {self._base_dir}") async def initialize(self) -> None: """Initialize the brainstorm memory directory.""" diff --git a/backend/autonomous/memory/final_answer_memory.py b/backend/autonomous/memory/final_answer_memory.py index b0b4d5a..74487b6 100644 --- a/backend/autonomous/memory/final_answer_memory.py +++ b/backend/autonomous/memory/final_answer_memory.py @@ -178,6 +178,15 @@ def set_session_manager(self, session_manager) -> None: self._rejections_path = self._base_dir / "tier3_rejections.txt" self._final_volume_path = self._base_dir / "final_volume.txt" logger.info(f"Final answer memory using session path: {self._base_dir}") + else: + self._base_dir = Path(system_config.data_dir) / "auto_final_answer" + self._state_path = self._base_dir / "final_answer_state.json" + self._volume_path = self._base_dir / "volume_organization.json" + self._rejections_path = self._base_dir / "tier3_rejections.txt" + self._final_volume_path = self._base_dir / "final_volume.txt" + logger.info(f"Final answer memory using legacy path: {self._base_dir}") + + self._state = None async def initialize(self) -> None: """Initialize the final answer memory directories and load state.""" diff --git a/backend/autonomous/memory/paper_library.py b/backend/autonomous/memory/paper_library.py index c8b6bb1..e72080a 100644 --- a/backend/autonomous/memory/paper_library.py +++ b/backend/autonomous/memory/paper_library.py @@ -48,6 +48,11 @@ def set_session_manager(self, session_manager) -> None: self._archive_dir = session_manager.get_papers_dir() / "archive" self._pruned_dir = session_manager.get_papers_dir() / "pruned" logger.info("Paper library using session path: %s", redact_log_text(self._base_dir, 240)) + else: + self._base_dir = Path(system_config.auto_papers_dir) + self._archive_dir = Path(system_config.auto_papers_archive_dir) + self._pruned_dir = self._base_dir / "pruned" + logger.info("Paper library using legacy path: %s", redact_log_text(self._base_dir, 240)) async def initialize(self) -> None: """Initialize the paper library directories.""" diff --git a/backend/autonomous/memory/research_metadata.py b/backend/autonomous/memory/research_metadata.py index ee076f5..fee0c8b 100644 --- a/backend/autonomous/memory/research_metadata.py +++ b/backend/autonomous/memory/research_metadata.py @@ -50,6 +50,15 @@ def set_session_manager(self, session_manager) -> None: self._stats_path = session_path / "session_stats.json" self._workflow_state_path = session_path / "workflow_state.json" logger.info(f"Research metadata using session path: {session_path}") + else: + self._metadata_path = Path(system_config.auto_research_metadata_file) + self._stats_path = Path(system_config.auto_research_stats_file) + self._workflow_state_path = Path(system_config.auto_workflow_state_file) + logger.info("Research metadata using legacy paths") + + self._data = None + self._stats = None + self._workflow_state = None def _get_default_stats(self) -> Dict[str, Any]: """Default statistics structure.""" diff --git a/backend/autonomous/memory/session_manager.py b/backend/autonomous/memory/session_manager.py index f166c98..d3f9ff1 100644 --- a/backend/autonomous/memory/session_manager.py +++ b/backend/autonomous/memory/session_manager.py @@ -21,6 +21,9 @@ logger = logging.getLogger(__name__) +NON_RESUMABLE_SESSION_STATUSES = {"cleared", "history_only", "archived", "complete"} + + def _session_paper_has_section(content: str, section_name: str) -> bool: base_patterns = [ rf"##\s*{section_name}", @@ -230,6 +233,17 @@ async def resume_session(self, session_id: str, base_dir: Optional[str] = None) if metadata_path.exists(): async with aiofiles.open(metadata_path, 'r', encoding='utf-8') as f: metadata = json.loads(await f.read()) + session_status = str(metadata.get("status", "")).lower() + if metadata.get("resume_disabled") or session_status in NON_RESUMABLE_SESSION_STATUSES: + logger.error( + "Refusing to resume non-resumable session: %s (status=%s)", + session_id, + session_status or "unknown", + ) + self._session_path = None + self._user_prompt = None + self._session_id = None + return None self._user_prompt = metadata.get("user_prompt", "") self._session_id = metadata.get("session_id", session_id) else: @@ -345,7 +359,24 @@ async def find_interrupted_session(self, base_dir: Optional[str] = None) -> Opti workflow_state_path = session_dir / "workflow_state.json" workflow_state = None + session_metadata = {} + user_prompt = "" try: + session_metadata_path = session_dir / "session_metadata.json" + if session_metadata_path.exists(): + async with aiofiles.open(session_metadata_path, 'r', encoding='utf-8') as f: + session_metadata = json.loads(await f.read()) + user_prompt = session_metadata.get("user_prompt", "") or session_metadata.get("user_research_prompt", "") + + session_status = str(session_metadata.get("status", "")).lower() + if session_metadata.get("resume_disabled") or session_status in NON_RESUMABLE_SESSION_STATUSES: + logger.debug( + "Skipping non-resumable session %s (status=%s)", + session_dir.name, + session_status or "unknown", + ) + continue + if workflow_state_path.exists(): async with aiofiles.open(workflow_state_path, 'r', encoding='utf-8') as f: raw = await f.read() @@ -371,14 +402,6 @@ async def find_interrupted_session(self, base_dir: Optional[str] = None) -> Opti continue if has_tier and (has_topic or has_papers): - # Load session metadata for user prompt - session_metadata_path = session_dir / "session_metadata.json" - user_prompt = "" - if session_metadata_path.exists(): - async with aiofiles.open(session_metadata_path, 'r', encoding='utf-8') as f: - session_metadata = json.loads(await f.read()) - user_prompt = session_metadata.get("user_prompt", "") - resumable_sessions.append({ "session_id": session_dir.name, "path": str(session_dir), diff --git a/frontend/src/components/autonomous/MathematicalProofs.jsx b/frontend/src/components/autonomous/MathematicalProofs.jsx index 4dfca8a..e568b4f 100644 --- a/frontend/src/components/autonomous/MathematicalProofs.jsx +++ b/frontend/src/components/autonomous/MathematicalProofs.jsx @@ -5,6 +5,7 @@ import { buildCurrentProofRuntimeConfig, isProofRuntimeConfigComplete, } from '../../hooks/useProofCheckRuntime'; +import { downloadTextFile } from '../../utils/downloadHelpers'; function formatDate(isoString) { if (!isoString) { @@ -419,6 +420,31 @@ function MathematicalProofs({ api, refreshToken = 0, selectedProofId = null, lat } }; + const handleDownloadLeanProof = async (proof) => { + try { + const leanCode = await api.getProofLeanSource(proof.proof_id); + if (!leanCode) { + throw new Error('Lean source is unavailable for this proof.'); + } + downloadTextFile(leanCode, `${proof.proof_id}.lean`, 'text/plain'); + } catch (err) { + setError(`Failed to download Lean proof: ${err.message}`); + } + }; + + const handleDownloadCertificate = async (proof) => { + try { + const certificate = await api.getProofCertificate(proof.proof_id); + downloadTextFile( + JSON.stringify(certificate, null, 2), + `${proof.proof_id}_certificate.json`, + 'application/json' + ); + } catch (err) { + setError(`Failed to download proof certificate: ${err.message}`); + } + }; + return (
@@ -646,13 +672,13 @@ function MathematicalProofs({ api, refreshToken = 0, selectedProofId = null, lat
- handleDownloadLeanProof(proof)} > Download .lean - +
{proof.theorem_name && ( diff --git a/frontend/src/services/api.js b/frontend/src/services/api.js index 62a2223..c368ade 100644 --- a/frontend/src/services/api.js +++ b/frontend/src/services/api.js @@ -592,13 +592,20 @@ export const autonomousAPI = { return response.json(); }, - // Download URLs for machine-readable proof certificates - getProofCertificateUrl(proofId) { - return `${API_BASE}/proofs/${encodeURIComponent(proofId)}/certificate`; + async getProofCertificate(proofId) { + const response = await fetch(`${API_BASE}/proofs/${encodeURIComponent(proofId)}/certificate`); + if (!response.ok) { + await throwFromResponse(response, `Failed to get proof certificate for ${proofId}`); + } + return response.json(); }, - getProofLeanDownloadUrl(proofId) { - return `${API_BASE}/proofs/${encodeURIComponent(proofId)}/certificate.lean`; + async getProofLeanSource(proofId) { + const response = await fetch(`${API_BASE}/proofs/${encodeURIComponent(proofId)}/certificate.lean`); + if (!response.ok) { + await throwFromResponse(response, `Failed to get Lean source for ${proofId}`); + } + return response.text(); }, async getProofLibrary(novelOnly = true) {