From c995606c7fb02ed0a4a9a20358ad396ee1a66cf2 Mon Sep 17 00:00:00 2001 From: Pat Date: Wed, 8 Apr 2026 17:36:44 -0500 Subject: [PATCH 01/32] MOTO v1.0.5: autonomy efficiency upgrade, title exploration, token tracking, and UI updates --- .cursor/rules/api-key-controls.mdc | 30 +- .cursor/rules/json-prompt-design.mdc | 73 +- .cursor/rules/latex-renderer.mdc | 45 +- ...-aggregator-tool-design-specifications.mdc | 6 +- ...-and-part-2-cointeraction-architecture.mdc | 1 + ...t-2-compiler-tool-design-specification.mdc | 30 +- .../rules/part-3-autonomous-research-mode.mdc | 216 +- ...program-directory-and-file-definitions.mdc | 50 +- .../rules/rag-design-for-overall-program.mdc | 45 +- .cursor/rules/workflow-runtime-updates.mdc | 11 + Press to Launch MOTO.bat | 5 +- README.md | 27 +- _moto_internal_launcher.ps1 | 63 +- backend/aggregator/agents/submitter.py | 10 +- backend/aggregator/core/context_allocator.py | 65 +- backend/aggregator/core/coordinator.py | 53 +- backend/aggregator/core/rag_manager.py | 103 +- backend/aggregator/memory/shared_training.py | 11 +- .../aggregator/prompts/submitter_prompts.py | 2 + backend/api/main.py | 8 +- backend/api/routes/aggregator.py | 31 +- backend/api/routes/autonomous.py | 801 ++- backend/api/routes/boost.py | 110 +- backend/api/routes/compiler.py | 56 +- backend/api/routes/openrouter.py | 45 +- backend/api/routes/workflow.py | 7 + .../final_answer/answer_format_selector.py | 5 + .../agents/final_answer/certainty_assessor.py | 65 +- .../agents/final_answer/volume_organizer.py | 5 + .../autonomous/agents/paper_title_selector.py | 59 +- backend/autonomous/agents/topic_selector.py | 15 +- backend/autonomous/agents/topic_validator.py | 48 +- .../autonomous/core/autonomous_coordinator.py | 1068 +++- .../autonomous/core/autonomous_rag_manager.py | 48 +- .../autonomous/memory/brainstorm_memory.py | 166 + .../autonomous/memory/final_answer_memory.py | 2 +- backend/autonomous/memory/paper_library.py | 177 + .../autonomous/memory/paper_model_tracker.py | 13 +- backend/autonomous/prompts/__init__.py | 8 +- .../prompts/paper_continuation_prompts.py | 300 + .../paper_title_exploration_prompts.py | 87 + .../autonomous/prompts/paper_title_prompts.py | 15 +- .../prompts/topic_exploration_prompts.py | 78 + backend/autonomous/prompts/topic_prompts.py | 21 +- .../validation/paper_redundancy_checker.py | 4 + backend/compiler/agents/critique_submitter.py | 18 + .../compiler/agents/high_context_submitter.py | 112 +- .../compiler/agents/high_param_submitter.py | 31 +- backend/compiler/core/compiler_coordinator.py | 312 +- backend/compiler/core/compiler_rag_manager.py | 11 +- .../compiler/prompts/construction_prompts.py | 75 +- backend/compiler/prompts/critique_prompts.py | 10 + .../compiler/validation/compiler_validator.py | 226 +- backend/shared/api_client_manager.py | 196 +- backend/shared/boost_manager.py | 4 + backend/shared/config.py | 6 +- backend/shared/critique_prompts.py | 156 +- backend/shared/models.py | 34 +- backend/shared/token_tracker.py | 85 + frontend/package-lock.json | 16 +- frontend/package.json | 2 +- frontend/src/App.jsx | 1115 +++- frontend/src/components/BoostControlModal.css | 90 +- frontend/src/components/BoostControlModal.jsx | 189 +- frontend/src/components/BoostLogs.css | 46 +- frontend/src/components/BoostLogs.jsx | 2 +- .../CreditExhaustionNotificationStack.jsx | 259 + .../components/CritiqueNotificationStack.jsx | 24 +- .../HungConnectionNotificationStack.jsx | 188 + frontend/src/components/LatexRenderer.css | 42 +- frontend/src/components/LatexRenderer.jsx | 4 +- .../src/components/OpenRouterApiKeyModal.jsx | 61 +- .../OpenRouterPrivacyWarningModal.jsx | 39 +- .../src/components/PaperCritiqueModal.jsx | 422 +- .../components/StartupProviderSetupModal.jsx | 163 + frontend/src/components/TextFileUploader.css | 14 +- frontend/src/components/WorkflowPanel.css | 275 +- frontend/src/components/WorkflowPanel.jsx | 117 +- .../aggregator/AggregatorInterface.jsx | 16 +- .../components/aggregator/AggregatorLogs.jsx | 13 +- .../aggregator/AggregatorSettings.jsx | 175 +- .../src/components/aggregator/LiveResults.jsx | 5 +- .../autonomous/ArchiveViewerModal.css | 264 + .../autonomous/ArchiveViewerModal.jsx | 197 +- .../autonomous/AutonomousResearch.css | 125 +- .../AutonomousResearchInterface.jsx | 107 +- .../autonomous/AutonomousResearchLogs.jsx | 68 +- .../autonomous/AutonomousResearchSettings.jsx | 1308 ++-- .../components/autonomous/BrainstormList.jsx | 9 +- .../autonomous/FinalAnswerLibrary.css | 156 +- .../autonomous/FinalAnswerLibrary.jsx | 404 +- .../autonomous/LivePaperProgress.jsx | 8 +- .../autonomous/LiveTier3Progress.jsx | 42 +- .../components/autonomous/PaperLibrary.jsx | 6 +- .../autonomous/Stage2PaperHistory.css | 51 + .../autonomous/Stage2PaperHistory.jsx | 534 ++ frontend/src/components/autonomous/index.js | 1 + .../components/compiler/CompilerInterface.jsx | 25 +- .../src/components/compiler/CompilerLogs.jsx | 26 +- .../components/compiler/CompilerSettings.jsx | 260 +- .../src/components/compiler/LivePaper.jsx | 17 +- frontend/src/components/critique-modal.css | 496 ++ frontend/src/components/settings-common.css | 827 +++ frontend/src/index.css | 1039 ++-- frontend/src/services/api.js | 101 +- frontend/src/utils/autonomousProfiles.js | 424 ++ frontend/src/utils/disclaimerHelper.js | 67 + frontend/src/utils/downloadHelpers.js | 14 +- frontend/src/utils/researchRunHistory.js | 129 + package-lock.json | 4 +- package.json | 2 +- random dev log.txt | 5266 ----------------- requirements.txt | 4 +- 113 files changed, 11546 insertions(+), 9076 deletions(-) create mode 100644 .cursor/rules/workflow-runtime-updates.mdc create mode 100644 backend/autonomous/prompts/paper_continuation_prompts.py create mode 100644 backend/autonomous/prompts/paper_title_exploration_prompts.py create mode 100644 backend/autonomous/prompts/topic_exploration_prompts.py create mode 100644 backend/shared/token_tracker.py create mode 100644 frontend/src/components/CreditExhaustionNotificationStack.jsx create mode 100644 frontend/src/components/HungConnectionNotificationStack.jsx create mode 100644 frontend/src/components/StartupProviderSetupModal.jsx create mode 100644 frontend/src/components/autonomous/ArchiveViewerModal.css create mode 100644 frontend/src/components/autonomous/Stage2PaperHistory.css create mode 100644 frontend/src/components/autonomous/Stage2PaperHistory.jsx create mode 100644 frontend/src/components/critique-modal.css create mode 100644 frontend/src/components/settings-common.css create mode 100644 frontend/src/utils/autonomousProfiles.js create mode 100644 frontend/src/utils/disclaimerHelper.js create mode 100644 frontend/src/utils/researchRunHistory.js delete mode 100644 random dev log.txt diff --git a/.cursor/rules/api-key-controls.mdc b/.cursor/rules/api-key-controls.mdc index f5e74dd..d247fc1 100644 --- a/.cursor/rules/api-key-controls.mdc +++ b/.cursor/rules/api-key-controls.mdc @@ -1,7 +1,6 @@ --- -alwaysApply: true +alwaysApply: false --- - # API Key Controls & Workflow Management System ## Overview @@ -37,11 +36,12 @@ Enables OpenRouter integration with automatic LM Studio fallback, plus a dynamic - App Attribution Headers: `HTTP-Referer: https://intrafere.com/moto-autonomous-home-ai/`, `X-Title: MOTO Deep Research Harness` - Credit exhaustion detection: HTTP 402 OR error messages containing "credit", "insufficient", "balance", "quota", "key limit", "limit exceeded" - Raises `CreditExhaustionError` on exhaustion (no retries). Retries transient errors (max 3). -- Temperature=0.0 default. Stop sequences on all requests: `\n}\n\n`, `\n]\n\n`, `\n}\n\n\n`, `\n]\n\n\n` +- Temperature=0.0 default. No stop sequences (removed — caused premature truncation with certain models). #### APIClientManager (`backend/shared/api_client_manager.py`) -- Central router for all API calls: boost check → role's OpenRouter (with permanent fallback) → LM Studio +- Central router for all API calls: boost check → role's OpenRouter (with resettable fallback) → LM Studio - Tracks fallback state per role: `_role_fallback_state: Dict[str, str]` +- `reset_openrouter_fallbacks()`: Resets all roles originally configured for OpenRouter back from LM Studio fallback. Called automatically on API key set, or manually via reset endpoint. - Lazy initialization: OpenRouter client initializes from `rag_config.openrouter_api_key` when first needed **CRITICAL REQUIREMENT - Role Configuration:** @@ -55,7 +55,7 @@ Enables OpenRouter integration with automatic LM Studio fallback, plus a dynamic **Counter Decrement:** `boost_next_count` decrements ONLY on successful boost API calls. Failed/exhausted calls do NOT decrement. -**Permanent Fallback:** Once a role falls back to LM Studio due to credit exhaustion, it NEVER retries OpenRouter for that session. Each role has independent fallback state. If no fallback configured: raises RuntimeError. +**Resettable Fallback:** When a role hits credit exhaustion, it falls back to LM Studio for subsequent calls. User can reset all fallen-back roles via `POST /api/openrouter/reset-exhaustion` or by re-setting the API key (auto-resets). Each role has independent fallback state. If no fallback configured: raises RuntimeError. **Categories from role_id:** - `aggregator_submitter_*` → "Aggregator Submitters" @@ -77,7 +77,7 @@ Enables OpenRouter integration with automatic LM Studio fallback, plus a dynamic Coordinators read actual agent `task_sequence` counters — no prediction. Task IDs exactly match what agents will generate. - Aggregator: `agg_sub{N}_{seq:03d}`, `agg_val_{seq:03d}` - Compiler: `comp_hc_{seq:03d}`, `comp_hp_{seq:03d}`, `comp_val_{seq:03d}` -- Autonomous: `auto_ts_{seq:03d}`, `auto_tv_{seq:03d}` +- Autonomous: `auto_te_{seq:03d}`, `auto_tev_{seq:03d}`, `auto_ts_{seq:03d}`, `auto_tv_{seq:03d}` --- @@ -93,11 +93,13 @@ Predictions refresh: after initialization, each task completion, mode switches, ## WebSocket Events -**Workflow:** `workflow_updated` (tasks+mode), `task_started` (task_id), `task_completed` (task_id+sequence) +**Workflow:** `workflow_updated` (tasks+mode), `task_started` (task_id), `task_completed` (task_id+sequence), `token_usage_updated` (total_input, total_output, by_model, elapsed_seconds) + +**Boost:** `boost_enabled` (model_id, provider, context_window, max_output_tokens), `boost_disabled`, `task_boost_toggled` (task_id, boosted), `boost_next_count_updated` (count), `category_boost_toggled` (category, boosted), `boost_credits_exhausted` (task_id, message) -**Boost:** `boost_enabled` (model_id, provider, context_window, max_output_tokens), `boost_disabled`, `task_boost_toggled` (task_id, boosted), `boost_next_count_set` (count), `category_boost_toggled` (category, boosted), `boost_credits_exhausted` (task_id, message) +**Fallback:** `openrouter_fallback` (role_id, reason, message, fallback_model), `openrouter_fallback_failed` (role_id, reason, message), `openrouter_fallbacks_reset` (reset_roles, message) -**Fallback:** `openrouter_fallback` (role_id, reason, message, fallback_model), `openrouter_fallback_failed` (role_id, reason, message) +**Hung Connection:** `hung_connection_alert` (role_id, model, provider, elapsed_minutes, message) — fires after 15 minutes of no API response. Amber notification stack (bottom-left, offset from credit exhaustion stack). Auto-cleared on research stop and fallbacks reset. **Rate Limit:** `openrouter_rate_limit` (model, role_id, retry_after, message) @@ -118,11 +120,12 @@ Predictions refresh: after initialization, each task completion, mode switches, - `GET /api/boost/openrouter-models` — Fetch OpenRouter models (Bearer key header) - `GET /api/boost/model-providers?model_id=` — Providers for a model - `GET /api/boost/logs?limit=` — Recent boost logs -- `POST /api/boost/logs/clear` — Clear logs +- `POST /api/boost/clear-logs` — Clear logs ### OpenRouter (`backend/api/routes/openrouter.py`) - `GET /api/openrouter/lm-studio-availability` — LM Studio availability check -- `POST /api/openrouter/set-api-key` — Set and validate global OpenRouter key +- `POST /api/openrouter/set-api-key` — Set and validate global OpenRouter key (auto-resets exhaustion flags) +- `POST /api/openrouter/reset-exhaustion` — Reset all credit exhaustion flags + role fallback states mid-session - `DELETE /api/openrouter/api-key` — Clear key - `GET /api/openrouter/api-key-status` — `{ has_key, enabled }` - `GET /api/openrouter/models` — Available models (also caches free models for rotation) @@ -135,12 +138,13 @@ Predictions refresh: after initialization, each task completion, mode switches, ### Workflow (`backend/api/routes/workflow.py`) - `GET /api/workflow/predictions` — Next 20 predicted tasks - `GET /api/workflow/history?limit=` — Completed tasks +- `GET /api/token-stats` — Cumulative token usage (total_input, total_output, by_model, elapsed_seconds) --- ## Error Handling -**Credit Exhaustion:** HTTP 402 or keywords "credit"/"insufficient"/"balance"/"quota"/"key limit"/"limit exceeded" → `CreditExhaustionError` → permanent LM Studio fallback for that role (or RuntimeError if no fallback). +**Credit Exhaustion:** HTTP 402 or keywords "credit"/"insufficient"/"balance"/"quota"/"key limit"/"limit exceeded" → `CreditExhaustionError` → LM Studio fallback for that role (or RuntimeError if no fallback). Fallback is resettable via `POST /api/openrouter/reset-exhaustion` or by re-setting the API key. **Boost Exhaustion:** Falls back to primary for that task; boost stays enabled; counter NOT decremented. @@ -170,7 +174,7 @@ Predictions refresh: after initialization, each task completion, mode switches, - Aggregator submitters: per-submitter pause (others continue); validator loop pauses entire validator - Prevents infinite retry loops (the 2000+ attempt bug) -**Account Exhaustion:** HTTP 402 on any `:free` model sets `_account_credits_exhausted` flag. All subsequent free model calls short-circuit immediately. Flag clears on next successful free model call. +**Account Exhaustion:** HTTP 402 on any `:free` model sets `_account_credits_exhausted` flag. All subsequent free model calls short-circuit immediately. Flag clears on next successful free model call, or via `POST /api/openrouter/reset-exhaustion`, or automatically when the API key is re-set. **Error Classes:** - `FreeModelExhaustedError` — all options exhausted, contains `soonest_retry` timestamp diff --git a/.cursor/rules/json-prompt-design.mdc b/.cursor/rules/json-prompt-design.mdc index a1a7e5b..bae4719 100644 --- a/.cursor/rules/json-prompt-design.mdc +++ b/.cursor/rules/json-prompt-design.mdc @@ -1,5 +1,5 @@ --- -alwaysApply: true +alwaysApply: false --- # Enhance AI Role Prompts with Complete Context Assembly @@ -217,12 +217,14 @@ WHEN IN DOUBT: Verify independently. Do not assume. Do not trust unverified inte - `backend/compiler/prompts/outline_prompts.py` - `backend/compiler/prompts/review_prompts.py` - `backend/compiler/prompts/rigor_prompts.py` +- `backend/compiler/prompts/critique_prompts.py` - `backend/compiler/validation/compiler_validator.py` - `backend/autonomous/prompts/topic_prompts.py` - `backend/autonomous/prompts/completion_prompts.py` - `backend/autonomous/prompts/paper_reference_prompts.py` - `backend/autonomous/prompts/paper_title_prompts.py` - `backend/autonomous/prompts/paper_redundancy_prompts.py` +- `backend/autonomous/prompts/paper_continuation_prompts.py` - `backend/autonomous/prompts/final_answer_prompts.py` **Note:** The prompt structure examples in the sections below show the core task-specific content. The INTERNAL CONTENT WARNING block is ALWAYS inserted between the role description and the "YOUR TASK:" section in the actual code. @@ -1347,6 +1349,28 @@ The coordinator tracks the current phase via `autonomous_section_phase`: **Autonomous Mode Phase Synchronization:** When the compiler runs in autonomous mode (Part 3), the autonomous coordinator polls `autonomous_section_phase` every 3 seconds and syncs it to the workflow state (`paper_phase` field). This ensures accurate crash recovery and prevents the bug where workflow state showed "outline" even though the compiler had progressed through body/conclusion/introduction phases. +### Retroactive Brainstorm Operation (Optional, Autonomous Mode Only) + +During autonomous paper compilation, the construction JSON includes an optional `brainstorm_operation` field: + +```json +{ + ... (standard construction fields) ..., + "brainstorm_operation": { + "action": "edit | delete | add", + "submission_number": 5, + "new_content": "corrected or new content (empty for delete)", + "reasoning": "Independent justification" + } +} +``` + +**Validation**: Brainstorm operations are validated by the compiler validator with brainstorm-only context. The validator never sees the paper operation when validating a brainstorm operation. + +**Independent Validity Principle**: Each operation must be justified on its own merits. Paper content must not depend on a brainstorm correction. Brainstorm corrections must not depend on paper content. + +**Models**: `BrainstormRetroactiveOperation` in `models.py`. Parsed in `high_context_submitter.py`. Handled in `compiler_coordinator._handle_brainstorm_retroactive_operation()`. Validated in `compiler_validator.validate_brainstorm_operation()`. + --- ## 5. COMPILER-Submitter OUTLINE CREATION (PHASE 1: ITERATIVE REFINEMENT) @@ -2064,7 +2088,7 @@ The rigor submitter uses a **2-step planning-then-execution process**: - Submitters provide exact text (`old_string`) to identify edit locations - The `old_string` is pre-validated to exist verbatim (exactly) in the document - The `old_string` is pre-validated to be unique (appear only once) -- If exact match fails, system tries in order: Unicode normalization → whitespace normalization → backslash normalization (collapses `\\\\cmd` → `\\cmd`, handles over-escaping) → consecutive fuzzy matching (conservative last resort) +- If exact match fails, system tries in order: Unicode normalization → whitespace normalization (multi-spaces) → all-whitespace normalization (collapses newlines/spaces/tabs to single space) → backslash normalization (collapses `\\\\cmd` → `\\cmd`, handles over-escaping) → consecutive fuzzy matching (conservative last resort) - If match fails or is ambiguous after all attempts, pre-validation rejects immediately with clear feedback (before LLM validation) - LLM validation then focuses on placement context and semantic appropriateness - Industry-standard approach used by Cursor, Claude Code, and similar tools @@ -2094,7 +2118,7 @@ All compiler modes (construction, review, rigor, outline_update) use this schema ### Validation Rules -- **REJECT** if `old_string` is not found after trying: exact match → Unicode normalization → whitespace normalization → backslash normalization → consecutive fuzzy matching +- **REJECT** if `old_string` is not found after trying: exact match → Unicode normalization → whitespace normalization → all-whitespace normalization → backslash normalization → consecutive fuzzy matching - **REJECT** if `old_string` matches multiple locations (not unique) - **REJECT** if `operation` doesn't match the intent (e.g., using "replace" for new content) - Validator confirms content flows naturally at the edit location @@ -2199,6 +2223,18 @@ Part 3 introduces autonomous topic selection, brainstorm-to-paper workflows, and --- +### 0. TOPIC EXPLORATION (Pre-Selection Candidate Brainstorm) + +**File:** `backend/autonomous/prompts/topic_exploration_prompts.py` + +**Purpose:** Before topic selection, collect 5 validated candidate brainstorm questions using the full Part 1 aggregator infrastructure (parallel submitters, batch validation up to 3). Uses `build_exploration_user_prompt()` to frame the standard aggregator as a candidate question generator. + +**Architecture:** Reuses `AggregatorCoordinator` — no custom JSON schemas. Standard aggregator submitter/validator prompts handle generation and validation. The exploration user prompt provides the framing context (research goal, existing brainstorms/papers, diversity requirement). + +**Standard Aggregator JSON Schemas Apply** (from Part 1 submitter/validator prompts). + +--- + ### 1. TOPIC SELECTION SUBMITTER **File:** `backend/autonomous/prompts/topic_prompts.py` @@ -2499,6 +2535,32 @@ No Removal: ### PART 3 PROMPT ASSEMBLY PATTERNS +### 9. BRAINSTORM CONTINUATION DECISION + +**File:** `backend/autonomous/prompts/paper_continuation_prompts.py` + +**Function:** `get_continuation_decision_json_schema()` + +``` +REQUIRED JSON FORMAT: +{ + "decision": "write_another_paper | move_on", + "reasoning": "string - Detailed explanation of assessment" +} + +FIELD REQUIREMENTS: +- decision: MUST be either "write_another_paper" or "move_on" +- reasoning: ALWAYS required +``` + +**Context**: User prompt + brainstorm topic + brainstorm DB + prior papers from this brainstorm (title/abstract/outline) + paper count ("N of 3 maximum"). Does NOT include cross-topic reference papers. + +**Validation**: Topic validator validates with `build_continuation_validation_prompt()` via `override_prompt` parameter. + +--- + +### PART 3 PROMPT ASSEMBLY PATTERNS (continued) + All Part 3 prompts follow similar assembly patterns to Part 1 and Part 2: ```python @@ -2517,7 +2579,12 @@ All Part 3 prompts follow similar assembly patterns to Part 1 and Part 2: **Context Variations by Role:** +**Topic Exploration (uses Part 1 Aggregator):** +- Aggregator user prompt = `build_exploration_user_prompt()` containing research goal, existing brainstorms, completed papers, diversity framing +- Standard aggregator submitter/validator prompts + context handling (shared training DB, rejection logs, RAG cycling) + **Topic Selection Submitter:** +- **5 validated candidate brainstorm questions** from topic exploration (direct injection) - All brainstorm topics with metadata - All completed papers with title + abstract + word count - Topic selection rejection history (last 5) diff --git a/.cursor/rules/latex-renderer.mdc b/.cursor/rules/latex-renderer.mdc index 650cc92..6fc03ba 100644 --- a/.cursor/rules/latex-renderer.mdc +++ b/.cursor/rules/latex-renderer.mdc @@ -1,7 +1,6 @@ --- -alwaysApply: true +alwaysApply: false --- - # LaTeX Renderer System ## 🔒 CRITICAL SECURITY REQUIREMENTS @@ -66,7 +65,7 @@ Dual rendering: **Rendered LaTeX View** (KaTeX math, dark theme on screen, white @@ -121,26 +120,43 @@ Must execute in this exact order in `renderLatexToHtml()`: ## Component Integration -| Component | Location | PDF | Toggle | Notes | -|-----------|----------|-----|--------|-------| -| LivePaper.jsx | compiler/ | ✅ | ✅ | Real-time paper viewing; auto-switches to raw >50K chars | -| PaperLibrary.jsx | autonomous/ | ✅ | ✅ | Paper library cards | -| FinalAnswerView.jsx | autonomous/ | ✅ | ✅ | Tier 3 final answer (defaults to raw for performance) | -| FinalAnswerLibrary.jsx | autonomous/ | ✅ | ✅ | Final answer library (all sessions) | -| LivePaperProgress.jsx | autonomous/ | ✅ | ✅ | Live Tier 2 paper in progress | -| LiveTier3Progress.jsx | autonomous/ | ✅ | ✅ | Live Tier 3 paper in progress | -| LiveResults.jsx | aggregator/ | ❌ | ✅ | Aggregator submissions (defaults to raw) | -| BrainstormList.jsx | autonomous/ | ❌ | ✅ | Brainstorm content viewer | +| Component | Location | PDF | Toggle | Disclaimer | Notes | +|-----------|----------|-----|--------|------------|-------| +| LivePaper.jsx | compiler/ | ✅ | ✅ | paper | Real-time paper viewing; auto-switches to raw >50K chars | +| PaperLibrary.jsx | autonomous/ | ✅ | ✅ | baked-in | Paper library cards (backend embeds disclaimer at save) | +| FinalAnswerView.jsx | autonomous/ | ✅ | ✅ | baked-in | Tier 3 final answer (defaults to raw for performance) | +| FinalAnswerLibrary.jsx | autonomous/ | ✅ | ✅ | paper | Final answer library (all sessions) | +| LivePaperProgress.jsx | autonomous/ | ✅ | ✅ | paper | Live Tier 2 paper in progress | +| LiveTier3Progress.jsx | autonomous/ | ✅ | ✅ | paper | Live Tier 3 paper in progress | +| LiveResults.jsx | aggregator/ | ❌ | ✅ | brainstorm | Aggregator submissions (defaults to raw) | +| BrainstormList.jsx | autonomous/ | ❌ | ✅ | brainstorm | Brainstorm content viewer | ### PDF Download Usage ```javascript // Pass raw text content — backend handles rendering and PDF generation -await downloadPDFViaBackend(rawContent, metadata, sanitizeFilename(title), outline); +// disclaimerType ('paper'|'brainstorm'|null) auto-prepends disclaimer if content lacks one +await downloadPDFViaBackend(rawContent, metadata, sanitizeFilename(title), outline, onStart, onComplete, onError, 'paper'); ``` --- +## Disclaimer Injection (`frontend/src/utils/disclaimerHelper.js`) + +**Purpose:** Hallucination/AI-generated-content disclaimers are shown on every brainstorm and paper view and included in every download — but NEVER injected into the model's context window. + +**Approach:** Frontend-only. `prependDisclaimer(content, type)` prepends a disclaimer block unless one already exists (detects both the backend-embedded `AUTONOMOUS AI SOLUTION` header on completed papers and the frontend `DISCLAIMER` header). + +**Two variants:** `PAPER_DISCLAIMER` (for papers) and `BRAINSTORM_DISCLAIMER` (for brainstorm/aggregator databases). + +**Completed papers** (`PaperLibrary`, `FinalAnswerView`) already carry a richer backend-embedded disclaimer with model attribution; the `hasDisclaimer()` check prevents double-prepending. + +**Download helpers** (`downloadRawText`, `downloadPDFViaBackend`) accept an optional `disclaimerType` param that triggers the same `prependDisclaimer` logic before writing. + +**Critical invariant:** Backend brainstorm files and in-progress paper files remain disclaimer-free so models never waste context tokens on disclaimer text. + +--- + ## Paper Critique Modal (`PaperCritiqueModal.jsx`) Ratings: Novelty, Correctness, Impact (1-10 scale). Up to 10 history entries. Regeneration with custom prompt. @@ -167,3 +183,4 @@ Ratings: Novelty, Correctness, Impact (1-10 scale). Up to 10 history entries. Re 12. IntersectionObserver root margin MUST be ≥600px — prevents visible pop-in 13. Debounce delay applies ONLY to rendered mode — raw mode updates instantly 14. Chunk `key` MUST include content hash (`simpleHash`) — prevents React reusing stale DOM on content change +15. Disclaimer MUST appear on all brainstorm/paper display and download paths — injected at frontend layer only, never stored in backend files consumed by models diff --git a/.cursor/rules/part-1-aggregator-tool-design-specifications.mdc b/.cursor/rules/part-1-aggregator-tool-design-specifications.mdc index 098f082..e89f88e 100644 --- a/.cursor/rules/part-1-aggregator-tool-design-specifications.mdc +++ b/.cursor/rules/part-1-aggregator-tool-design-specifications.mdc @@ -56,7 +56,7 @@ No context carryover between prompts (only system-intended DB/submission transfe **RAG Offload Priority — Submitter:** Shared Training DB → Local Submitter DB → Rejection Log → User Upload Files -**RAG Offload Priority — Validator:** Shared Training DB → User Upload Files → Submission under review (last resort) +**RAG Offload Priority — Validator:** Shared Training DB → User Upload Files (submission under review is always direct injected) ## Role Selection @@ -85,7 +85,9 @@ Every 7th acceptance (`total_acceptances % 7 == 0`, minimum 7 before first revie **Phase 1**: Validator reviews ALL accepted submissions, identifies AT MOST ONE for removal (redundant, contradicted, superseded, or provides no unique value). -**Phase 2** (only if removal proposed): Validator self-validates its removal proposal. Conservative default: if uncertain, reject removal. If validated: execute removal + trigger RAG rechunk. +**Phase 2** (only if removal proposed): Validator self-validates its removal proposal. Conservative default: if uncertain, reject removal. If validated: execute removal + full RAG rebuild (all shared-training sources are dropped and re-indexed from the post-removal file so deleted content is no longer retrievable). + +**Cleanup toggle**: `enable_cleanup_review` (default True) can be set False at `Coordinator.initialize()` to disable cleanup for short-lived mini-brainstorm phases (topic exploration, title exploration). **Critical selection rule**: When multiple submissions redundant, remove the WEAKEST (least unique value). Never remove a more complete submission. diff --git a/.cursor/rules/part-1-and-part-2-cointeraction-architecture.mdc b/.cursor/rules/part-1-and-part-2-cointeraction-architecture.mdc index c0eb38c..04a0207 100644 --- a/.cursor/rules/part-1-and-part-2-cointeraction-architecture.mdc +++ b/.cursor/rules/part-1-and-part-2-cointeraction-architecture.mdc @@ -70,6 +70,7 @@ The live-constructing compiler-written paper should be viewable in one tab and a - **Compiler defaults**: validator=25000, high-context=25000 (for outline_create/outline_update/construction/review), high-param=25000 (for rigor mode) - **GUI tooltips** provide recommended minimums: Submitter "25000+ for reasoning models", Validator "25000+", High-context "25000+ (outline needs 15K+)", High-param "25000+" - Settings are passed through API routes and stored in `rag_config` (aggregator) and `system_config` (compiler) +- **CRITICAL**: `system_config.compiler_*` settings (context windows, max output tokens) are the single source of truth for all compiler modules. ANY code that creates a `CompilerCoordinator` MUST write its context/token settings to `system_config` BEFORE init. The manual `/api/compiler/start` route does this; autonomous mode (`autonomous_coordinator.py`) must do it explicitly before each `CompilerCoordinator()` creation. - Reasoning models (e.g., those with `` tags or separate `reasoning` fields) may use significant tokens on internal reasoning before generating JSON output ## Other Notes diff --git a/.cursor/rules/part-2-compiler-tool-design-specification.mdc b/.cursor/rules/part-2-compiler-tool-design-specification.mdc index a7ff80e..4559443 100644 --- a/.cursor/rules/part-2-compiler-tool-design-specification.mdc +++ b/.cursor/rules/part-2-compiler-tool-design-specification.mdc @@ -167,6 +167,7 @@ Submission JSON: `operation`, `old_string` (exact, pre-validated), `new_string`. 1. Exact match 2. Unicode hyphen normalization (en-dash, em-dash variants) 3. Whitespace normalization (2+ spaces → single space) +3b. All-whitespace normalization (collapses newlines/spaces/tabs → single space) 4. Backslash normalization (`\\mathbb` → `\mathbb`) 5. Consecutive fuzzy matching: 85% consecutive chars + last 5% exact tail anchor + unique (≥20 char minimum) @@ -196,16 +197,19 @@ Prevents models' fake placeholder text (e.g., "XI. Conclusion\n*placeholder*") f Per-role context windows (all user-configurable, default 131072): - Validator, High-Context Submitter, High-Parameter Submitter: 131072 tokens each +- **Settings flow**: All compiler modules read from `system_config.compiler_*` at runtime. The caller that creates `CompilerCoordinator` MUST write settings to `system_config` before init (manual mode: `/api/compiler/start`; autonomous mode: `autonomous_coordinator.py` before `CompilerCoordinator()` creation). - Rigor mode dynamically adjusts RAG budget if outline + system prompts exceed available context +- Construction mode (autonomous) dynamically adjusts RAG budget when brainstorm content is present: `rag_budget = max(5000, max_allowed - outline_tokens - paper_tokens - brainstorm_tokens - 5000_overhead)`. Brainstorm always direct-injected at full fidelity; RAG evidence scales to fit remaining budget. **Context rules:** User prompt ALWAYS direct injected. Direct injection first; RAG only when doesn't fit. ~85% RAG retrieval, ~15% direct injections. Halt with error if user prompt exceeds context_window - minimum_RAG_allocation. **Prompt Size Validation** (all submitters before LLM call): -- `outline_create`, `outline_update`, `rigor`: raises ValueError if exceeds -- `construction`, `review`: returns None if exceeds +- `outline_create`, `outline_update`, `rigor`, `construction`, `review`: raises ValueError if exceeds - `validator`: rejects submission if exceeds -**Rigor Mode context**: no aggregator database; outline fully injected; paper content RAG-retrieved. +**Rigor Mode context**: no aggregator database; outline fully injected; paper content RAG-retrieved. RAG excludes `compiler_outline.txt` (already direct-injected). + +**RAG source exclusion (anti-duplication)**: All compiler RAG calls pass `exclude_sources` to skip chunks from content already direct-injected. Construction excludes outline + paper + brainstorm sources; outline_update excludes outline + paper; rigor excludes outline. See `rag-design-for-overall-program.mdc` for full table. --- @@ -215,6 +219,26 @@ Per-role context windows (all user-configurable, default 131072): --- +## Retroactive Brainstorm Correction (Autonomous Mode Only) + +During paper compilation in autonomous mode (Part 3), the compiler submitter sees both the paper AND the source brainstorm database as a unified editable workspace. The submitter may optionally propose a brainstorm operation alongside its paper operation each turn. + +**Brainstorm operations**: `edit` (correct submission content), `delete` (remove submission), `add` (new insight discovered during synthesis). + +**Independent Validation Principle**: Paper and brainstorm operations are validated SEPARATELY. The validator sees ONLY the paper when validating paper edits, ONLY the brainstorm when validating brainstorm operations. Each must stand on its own merits. Neither can depend on the other for correctness. + +**Acceptance is independent**: Paper accepted + brainstorm rejected = valid state. Brainstorm accepted + paper rejected = valid state. No combination produces incoherence. + +**Brainstorm content**: Passed to construction prompts with full submission numbers. Submitter references entries by `#N` for edit/delete. + +**RAG refresh**: After accepted brainstorm modification, RAG is refreshed with updated brainstorm content so subsequent construction turns see corrected context. + +**Files**: `brainstorm_memory.py` (edit_submission, remove_submission, add_submission_retroactive), `compiler_validator.py` (validate_brainstorm_operation), `compiler_coordinator.py` (_handle_brainstorm_retroactive_operation), `construction_prompts.py` (brainstorm_operation JSON schema). + +**WebSocket events**: `brainstorm_retroactive_accepted`, `brainstorm_retroactive_rejected`. + +--- + ## Other Notes - JSON validation failure: reject submission, send reason to submitter's local failure feedback diff --git a/.cursor/rules/part-3-autonomous-research-mode.mdc b/.cursor/rules/part-3-autonomous-research-mode.mdc index c668b67..fb31b54 100644 --- a/.cursor/rules/part-3-autonomous-research-mode.mdc +++ b/.cursor/rules/part-3-autonomous-research-mode.mdc @@ -2,11 +2,11 @@ alwaysApply: true --- -# Part 3 - Autonomous Research Mode Design Specification +# Part 3 (Adding an Autonomous-Controlling Tier in Hierarchy Over Part 1 and 2) - Autonomous Research Mode Design Specification ## Overview -The Autonomous Research Mode is Part 3 of the MOTO Math Variant system. It is a self-directing two-tier research system that autonomously generates brainstorm topics, builds knowledge databases, and produces complete mathematical research papers based on a high-level research topic centered around the user prompt. +The Autonomous Research Mode is Part 3 of the MOTO Math Variant system. It is a self-directing three-tier research system that autonomously generates brainstorm topics, builds knowledge databases, produces complete mathematical research papers, and can synthesize a final answer based on a high-level research topic centered around the user prompt. **Example User Prompt**: "Solve the Langlands Bridge problem" or "Advance understanding of the Riemann Hypothesis" @@ -15,9 +15,10 @@ The Autonomous Research Mode is Part 3 of the MOTO Math Variant system. It is a - Part 2 (Compiler) requires user-directed paper compilation prompts - Part 3 (Autonomous Research) self-directs topic selection, brainstorming, and paper generation -**Two-Tier Architecture**: +**Three-Tier Architecture**: - **Tier 1**: Brainstorm aggregation databases (mathematical concept exploration) - **Tier 2**: Finished mathematical research papers (compiled from brainstorm databases) +- **Tier 3**: Final answer synthesis (short-form answer or long-form volume from Tier 2 papers) ## Design Philosophy @@ -29,7 +30,7 @@ The Autonomous Research Mode is Part 3 of the MOTO Math Variant system. It is a **Model Weight Exploration**: Completion review uses SPECIAL SELF-VALIDATION MODE because only the same model can assess whether its own weights have been exhausted for a given topic. -**Internal Knowledge Only**: The autonomous system operates solely on the model's pre-trained mathematical knowledge, RAG context from prior work, and user prompt. No external data access. +**External Verification Allowed**: The autonomous system may use the model's pre-trained mathematical knowledge, RAG context from prior work, user prompt, and external verification/search when the selected model/provider supports it. Internal AI-generated context remains non-authoritative and should be treated skeptically. --- @@ -64,6 +65,7 @@ The autonomous coordinator USES actual Part 2 compiler infrastructure for paper - Extracts abstract from completed paper for metadata storage **Critical Implementation Details**: +- **system_config propagation (REQUIRED)**: Before creating `CompilerCoordinator`, autonomous mode MUST write all compiler context/token settings to `system_config` (e.g., `system_config.compiler_high_context_context_window = self._high_context_context`). Compiler modules read from `system_config` at init — the manual `/api/compiler/start` route does this, but autonomous mode bypasses that route and must do it explicitly. Applies to both `_compile_paper_from_brainstorm()` and `_compile_tier3_paper()`. - Constrains section order: Body → Conclusion → Introduction → Abstract - Paper is considered complete when abstract is detected in paper content - Uses regex patterns to detect and extract abstract section @@ -75,14 +77,55 @@ The autonomous coordinator USES actual Part 2 compiler infrastructure for paper ## Workflow Overview **Tier 1 → Tier 2 → Tier 3 Loop:** -1. **Topic Selection** → Validator → Pre-Brainstorm Reference Selection (if papers exist) +0. **Topic Exploration** — Mini-aggregation: collect 5 validated candidate brainstorm questions (submit→validate→accumulate loop with rejection feedback). Broadens exploration landscape before committing to a direction. +1. **Topic Selection** (sees all 5 candidates + existing topics) → Validator → Pre-Brainstorm Reference Selection (if papers exist) 2. **Brainstorm Aggregation** (1-10 submitters, 1 validator, pruning every 7, with reference papers) 3. **Completion Review** every 10 acceptances (SPECIAL SELF-VALIDATION) → Continue or Write Paper -4. If Write Paper: **Additional Reference Selection** → **Paper Title Selection** → **Paper Compilation** (Body→Conclusion→Introduction→Abstract) +4. If Write Paper: **Additional Reference Selection** → **Paper Title Exploration** (5 candidates) → **Paper Title Selection** → **Paper Compilation** (Body→Conclusion→Introduction→Abstract) 5. **Paper Complete** → Log to Tier 2, cache brainstorm 6. **Paper Redundancy Review** every 3 papers -7. **Tier 3 Final Answer** every 5 papers (Certainty→Format→Short-form or Long-form volume) -8. Loop back to Topic Selection (or STOP if Tier 3 complete) +7. **Brainstorm Continuation Decision** (if papers < 3): write another paper or move on. If write another: new title → compilation with prior brainstorm papers as auto-refs → loop to step 5 +8. **Tier 3 Final Answer** every 5 papers (Certainty→Format→Short-form or Long-form volume) +9. Loop back to Topic Selection (or STOP if Tier 3 complete) + +--- + +## PHASE 0: Topic Exploration (Pre-Selection Candidate Brainstorm) + +### Purpose +Before committing to a brainstorm direction, the system runs a full aggregation using the Part 1 infrastructure that collects 5 validated candidate brainstorm questions. This broadens the exploration landscape using all configured submitters in parallel with batch validation. + +### Why This Exists (Top-p Exploration at Strategic Level) +Without exploration, the topic selector samples from the model's highest-probability region — the most obvious topic. By forcing 5 distinct, validated candidate directions first, the system maps the exploration landscape before committing: +- Breaks greedy single-sample selection +- Validator enforces diversity (rejects redundant candidates) +- Final selector sees the full landscape of options +- Uses full Part 1 aggregator infrastructure (parallel submitters, batch validation up to 3) + +### Architecture +- **Uses `AggregatorCoordinator`** from Part 1 — same parallel submitters + batch validator as normal brainstorms, but with **cleanup/pruning disabled** (`enable_cleanup_review=False`) since target is only 5 candidates +- **Prompts** (`backend/autonomous/prompts/topic_exploration_prompts.py`): `build_exploration_user_prompt()` frames the aggregation task for candidate question generation +- **Temp DB**: `exploration_candidates.txt` in brainstorms directory (cleaned up after phase) +- **Target**: 5 accepted candidates per exploration cycle +- **Safety valve**: 15 consecutive rejections → proceed with whatever candidates collected + +### Workflow +1. Aggregator starts with all configured submitters running in parallel +2. Submitters generate candidate brainstorm questions as standard submissions +3. Validator batch-validates (up to 3 at a time) checking quality, relevance, and DIVERSITY +4. Accepted candidates accumulate in temp exploration database +5. Coordinator monitors aggregator stats, stops at 5 acceptances +6. Reads exploration DB, formats as candidate list for topic selector + +### WebSocket Events +Standard aggregator events (`submission_accepted`, `submission_rejected`) flow through during exploration. +Additionally: `topic_exploration_started`, `topic_exploration_progress`, `topic_exploration_complete` + +### Crash Recovery +On resume, exploration restarts fresh (short phase, no state to preserve). + +### Every Brainstorm Starts This Way +Topic exploration runs before EVERY new topic selection cycle — no exceptions. --- @@ -96,6 +139,7 @@ The autonomous topic submitter decides what to work on next. It can: ### Topic Submitter Context The submitter receives: +- **5 validated candidate brainstorm questions** from Topic Exploration phase (direct injected) - User's high-level research prompt (PRIMARY context, always direct injected) - List of all existing brainstorm topics with metadata: - Topic ID @@ -261,16 +305,16 @@ The autonomous brainstorm aggregator inherits batch validation from Part 1 infra - **Validator processes up to 3 submissions at once**: Uses batch-specific prompts for 1, 2, or 3 submissions - **Independent assessment of each submission's value**: Each submission evaluated against existing database independently - **Intra-batch redundancy prevention**: If multiple submissions would be accepted but are redundant with each other, only the strongest is accepted -- **Queue overflow handling**: If 10+ submissions queued, takes latest 3 and clears rest (older submissions were generated against outdated database state) +- **Queue overflow handling**: If 10+ submissions queued, submitters are paused by the coordinator until queue drops below threshold - **Accelerated brainstorm exploration**: Batch validation increases throughput while maintaining quality through redundancy checks ### Key Differences from Part 1 Aggregator 1. **Topic-Specific Database**: Writes to `data/auto_brainstorms/brainstorm_{topic_id}.txt` instead of `rag_shared_training.txt` 2. **No User-Provided Topic Prompt**: Uses the AI-generated brainstorm topic prompt 3. **Completion Tracking**: Tracks acceptance count (including removals) for completion review trigger -4. **Hard Limit**: 80 accepted submissions (FORCE transition to paper writing, no completion review) +4. **Hard Limit**: 30 accepted submissions (FORCE transition to paper writing, no completion review) - Purpose: Prevents runaway brainstorms from accumulating indefinitely - - Trigger: After each acceptance, check if count >= 80 + - Trigger: After each acceptance, check if count >= 30 - Behavior: Immediately transition to paper writing, skip completion review - WebSocket event: `brainstorm_hard_limit_reached` 5. **Rejection Hard Limit**: 10 consecutive rejections (with minimum 5 acceptances) FORCE transition to paper writing @@ -300,12 +344,12 @@ The autonomous brainstorm aggregator inherits batch validation from Part 1 infra ### Regular Trigger Runs every 10 accepted submissions (includes both new acceptances AND pruning removals), AFTER the pruner has had its chance to run. -**Hard Limit Override**: If brainstorm reaches 80 accepted submissions, completion review is SKIPPED and paper writing is forced. +**Hard Limit Override**: If brainstorm reaches 30 accepted submissions, completion review is SKIPPED and paper writing is forced. **Example trigger points**: - Acceptances at 10, 20, 30, 40... trigger completion review - If prune removal happens at acceptance 9, the next acceptance (10th total) still triggers review -- At 80 acceptances: Hard limit triggers, completion review skipped, paper writing forced +- At 30 acceptances: Hard limit triggers, completion review skipped, paper writing forced ### Manual Paper Writing Trigger (User Override) @@ -348,6 +392,7 @@ Runs every 10 accepted submissions (includes both new acceptances AND pruning re - Does NOT require self-validation (user decision is final) - Brainstorm is marked complete regardless of acceptance count - Subsequent paper compilation proceeds normally with all selected reference papers +- **Race condition guard**: `_brainstorm_aggregation_loop()` checks `_manual_paper_writing_triggered` before calling `start()` on the aggregator (catches override during async init). The monitoring loop also stops the aggregator before returning on manual override. ### Purpose Assess whether the current brainstorm has been sufficiently explored relative to THIS MODEL'S internal knowledge (weights) and decide whether to continue brainstorming or begin writing a paper. @@ -424,6 +469,30 @@ Same two-step browsing workflow as pre-brainstorm selection (expand request → **Final Reference List**: Already-selected papers + newly-selected papers (max 6 total) +### Paper Title Exploration (Pre-Title Candidate Brainstorm) + +**Purpose**: Before committing to a paper title, the system collects 5 validated candidate titles using the Part 1 aggregator infrastructure. The final title selection then chooses from candidates, synthesizes them, or proposes a new title with justification. + +**Architecture**: Uses `AggregatorCoordinator` from Part 1 — same parallel submitters + batch validator, but with **cleanup/pruning disabled** (`enable_cleanup_review=False`) since target is only 5 candidates. + +**Applies to EVERY paper creation**: Tier 2 papers (1/2/3 from brainstorm), Tier 3 short-form, Tier 3 gap/intro/conclusion chapters. + +**Workflow**: +1. Aggregator starts with all configured submitters running in parallel +2. Submitters generate candidate paper titles as standard submissions +3. Validator checks quality, relevance, and DIVERSITY (rejects near-duplicates) +4. Accepted candidates accumulate in temp title DB +5. Coordinator stops at 5 acceptances (or 15 consecutive rejections safety valve) +6. Reads title DB, formats as candidate list for final title selection + +**Temp DB**: `title_candidates_{topic_id}.txt` in brainstorms dir (cleaned up after phase) + +**WebSocket Events**: `paper_title_exploration_started`, `paper_title_exploration_progress`, `paper_title_exploration_complete` + +**Crash Recovery**: On resume, exploration restarts fresh (short phase, no state to preserve). + +**Prompts**: `paper_title_exploration_prompts.py` — `build_title_exploration_user_prompt()` frames the aggregation task for candidate title generation with context: user prompt, topic, brainstorm summary, existing papers, reference papers. + ### Paper Title Selection **Context**: @@ -432,8 +501,9 @@ Same two-step browsing workflow as pre-brainstorm selection (expand request → - Selected reference papers (if any, RAG) - ALL existing paper titles from THIS brainstorm topic (direct injection) - ALL existing paper abstracts from THIS brainstorm topic (if any, direct injection) +- **5 validated candidate titles from Paper Title Exploration phase** (direct injection) -**Purpose**: Choose a title for the paper that will be compiled from this brainstorm. +**Purpose**: Choose a title for the paper that will be compiled from this brainstorm. The selector sees 5 pre-validated candidate titles and may select one, synthesize, or propose a new title with justification. JSON schema defined in `json-prompt-design.mdc`. Fields: `paper_title`, `reasoning`. @@ -462,6 +532,18 @@ JSON schema defined in `json-prompt-design.mdc`. Fields: `paper_title`, `reasoni Once paper title is selected and validated, paper compilation begins using the FULL Part 2 compiler workflow. +### Retroactive Brainstorm Correction (Unified Workspace) + +During paper compilation, the compiler submitter sees both the paper AND the source brainstorm database simultaneously. On each construction turn, the submitter may optionally propose a brainstorm edit/delete/add alongside its paper operation. + +**Key design**: Submitter sees full workspace (paper + brainstorm). Validator sees ONLY the specific operation being validated (paper OR brainstorm, never both). Each operation must be independently justified. + +**Operations**: edit (correct entry), delete (remove entry), add (new insight). Each validated independently by the compiler validator with brainstorm-only context. + +**Independent acceptance**: Paper and brainstorm results are independent. Paper accepted + brainstorm rejected = valid. RAG refreshed after accepted brainstorm modifications. + +**Not available in manual Part 2 mode** — only during autonomous paper compilation where `_current_topic_id` is set. + ### Compilation Workflow **Uses existing Part 2 (Compiler) infrastructure**: @@ -523,11 +605,13 @@ The validator will REJECT any outline missing these required sections or with in **Skip Critique Phase (User Override)**: - **Purpose**: Allow users to manually skip the critique/rewrite phase and proceed directly to conclusion - **API Endpoint**: `POST /api/auto-research/skip-critique` -- **Availability**: Only during Tier 2 paper writing when actively in critique phase -- **Behavior**: Immediately ends critique phase, transitions to conclusion phase, broadcasts `critique_phase_skipped` with `reason: "user_override"` -- **Cannot be undone**: Once executed, critique phase for current paper version is permanently skipped -- **Frontend**: "Skip Critique & Continue" button appears in paper status banner during critique phase (requires confirmation) -- **Error Conditions**: 400 if not running, 400 if not in Tier 2 paper writing, 400 if not in critique phase +- **Availability**: Any time during Tier 2 paper writing +- **Behavior**: + - If already in critique phase: immediately ends critique and transitions to conclusion + - If critique phase has not started yet: queues a pre-emptive skip and auto-skips when critique is reached +- **Cannot be undone**: Once executed or queued, critique for the current paper version is bypassed +- **Frontend**: The paper status banner supports both immediate skip and pre-emptive queued skip +- **Error Conditions**: 400 if not running, 400 if not in Tier 2 paper writing **Outline Updates**: Outline can be updated at any time during the cycle (same as Part 2) @@ -686,9 +770,24 @@ JSON schema defined in `json-prompt-design.mdc`. Fields: `should_remove` (bool), - Update metadata to mark as "archived" - Update statistics -### Return to Topic Selection +### Return to Topic Selection / Brainstorm Multi-Paper Continuation + +After paper completion and redundancy review, the system enters a **continuation decision loop** (max 3 papers per brainstorm): + +1. If `papers_from_brainstorm < 3`: Run continuation decision (submitter + topic validator) + - **write_another_paper**: New title selection + compilation (skip reference re-selection, auto-inject prior brainstorm papers) + - **move_on**: Proceed to Tier 3 check, then Topic Selection +2. If 3 papers reached (hard limit): Skip decision, proceed to Tier 3 check + +**Continuation Decision Context**: User prompt + brainstorm topic + brainstorm DB + all prior papers (title/abstract/outline). Does NOT include cross-topic reference papers. -After paper completion (and redundancy review if triggered), the workflow checks if **Tier 3 Final Answer** should trigger. +**Prior Brainstorm Papers as References**: For paper 2/3, all prior papers from the same brainstorm are auto-loaded into compiler RAG as `is_user_file=True` (high priority). These are separate from the 6-paper cross-topic reference limit. + +**Reference Selection**: Runs ONCE per brainstorm cycle. Papers 2/3 reuse the same cross-topic references. + +**WebSocket Events**: `brainstorm_continuation_started`, `brainstorm_continuation_decided`, `brainstorm_paper_limit_reached` + +**Crash Recovery**: `brainstorm_paper_count` and `current_brainstorm_paper_ids` persisted in workflow state. --- @@ -846,11 +945,15 @@ The system implements **two tiers of model tracking**: ================================================================================ AUTONOMOUS AI SOLUTION -Disclaimer: This content is for informational purposes only. This paper was -autonomously generated with the novelty-seeking MOTO harness without peer review -or user oversight beyond the original prompt. AI-generated content may contain -fabricated or unverified claims presented with high confidence. All content -should be viewed with extreme scrutiny and independently verified before use. +Disclaimer: This content is provided for informational and experimental purposes +only. This paper was autonomously generated with the novelty-seeking MOTO +harness without peer review or user oversight beyond the original prompt. It +may contain incorrect, incomplete, misleading, or fabricated claims presented +with high confidence. Use of this content is at your own risk. You are solely +responsible for reviewing and independently verifying any output before relying +on it, and the developers, operators, and contributors are not responsible for +errors, omissions, decisions made from this content, or any resulting loss, +damage, cost, or liability. User's Research Prompt: [user's original prompt here] @@ -939,11 +1042,15 @@ Tier 3 tracks all models used during final answer generation for author attribut ================================================================================ AUTONOMOUS AI SOLUTION -Disclaimer: This content is for informational purposes only. This paper was -autonomously generated with the novelty-seeking MOTO harness without peer review -or user oversight beyond the original prompt. AI-generated content may contain -fabricated or unverified claims presented with high confidence. All content -should be viewed with extreme scrutiny and independently verified before use. +Disclaimer: This content is provided for informational and experimental purposes +only. This paper was autonomously generated with the novelty-seeking MOTO +harness without peer review or user oversight beyond the original prompt. It +may contain incorrect, incomplete, misleading, or fabricated claims presented +with high confidence. Use of this content is at your own risk. You are solely +responsible for reviewing and independently verifying any output before relying +on it, and the developers, operators, and contributors are not responsible for +errors, omissions, decisions made from this content, or any resulting loss, +damage, cost, or liability. User's Research Prompt: [user's original prompt here] @@ -1053,7 +1160,7 @@ Returns: is_long_form, volume_title, outline_complete, current/total/completed c #### FinalAnswerView.jsx Main component for displaying Tier 3 status and content: -- Status badge: "FINAL ANSWER IN PROGRESS" (yellow) or "FINAL ANSWER" (green) +- Status badge: "FINAL ANSWER IN PROGRESS" uses the active Tier 3 accent state; completion uses the green success state - Certainty assessment display - Format selection display - Volume organization with chapter status (long form) @@ -1273,7 +1380,7 @@ Main interface component: Brainstorm management component: - List of all brainstorm topics with status indicators - Expandable to show brainstorm database content -- Status badges: In Progress (yellow), Complete (green) +- Status badges: In Progress uses the active accent state; Complete uses the green success state - Submission counts per brainstorm - Papers generated from each brainstorm - **Delete button**: Removes brainstorm and all associated files @@ -1367,7 +1474,7 @@ Real-time Tier 3 final answer display component (embedded in AutonomousResearchI ### FinalAnswerView.jsx Tier 3 Final Answer display component (separate tab for completed/overall final answer status): -- Status badge: "FINAL ANSWER IN PROGRESS" (yellow) or "FINAL ANSWER ✓" (green) +- Status badge: "FINAL ANSWER IN PROGRESS" uses the active Tier 3 accent state; "FINAL ANSWER ✓" uses the green success state - Certainty assessment summary display - Format selection indicator (Short Form / Long Form) - Volume organization outline with chapter status (for long form): @@ -1382,7 +1489,7 @@ Tier 3 Final Answer display component (separate tab for completed/overall final **Tab Styling**: - Tab appears in "Final Answer" section of navigation -- Yellow highlight when Tier 3 is active (in progress) +- Active Tier 3 tab uses the in-progress highlight state with pulse animation - Green highlight with checkmark when complete @@ -1413,16 +1520,17 @@ Tier 3 Final Answer display component (separate tab for completed/overall final - Brainstorm database as primary source ### Running Modes -- **Part 1, Part 2, Part 3 can run INDEPENDENTLY** (user chooses which mode) -- **Part 1 and Part 2 can run SIMULTANEOUSLY** (as originally designed) -- **Part 3 runs ALONE** (it internally uses Part 1 and Part 2 components but manages them autonomously) -- User cannot manually start Part 1 or Part 2 while Part 3 is running (Part 3 controls them) +- **Part 1, Part 2, and Part 3 remain user-selectable modes** +- **Only ONE workflow mode may be active at a time** — Aggregator, Compiler, and Autonomous Research are now mutually exclusive at runtime +- **Part 3 internally controls Part 1 and Part 2 components** during autonomous execution +- Starting any mode while another mode is running must be blocked until the active mode is stopped --- ## Prerequisites -- At least one LM Studio model must be available for each role +- Either an OpenRouter API key or at least one LM Studio model must be available to begin +- LM Studio is highly recommended even with OpenRouter enabled because local embeddings/RAG are free and faster - User must provide high-level research prompt - No dependency on prior Part 1 or Part 2 usage - Fresh start with empty brainstorm/paper libraries @@ -1432,8 +1540,8 @@ Tier 3 Final Answer display component (separate tab for completed/overall final ## Error Handling ### JSON Parse Failure (Topic Selection) -- Retry up to 3 times with same context -- If persistent, log error and retry with refreshed context (include prior failure in context) +- Retry indefinitely with rejection feedback (same as other agents) +- Each retry includes prior failure context ### JSON Parse Failure (Brainstorm Aggregation) - Same as Part 1 Aggregator: reject submission, feedback to submitter @@ -1470,10 +1578,16 @@ Tier 3 Final Answer display component (separate tab for completed/overall final - High-context submitter max tokens: 25000 - High-parameter submitter max tokens: 25000 - Completion review interval: 10 acceptances (includes removals) -- Topic selection retry limit: 3 - Max brainstorms in parallel: 1 (sequential brainstorm → paper cycle) - Max reference papers for context: 6 +### Token Tracking & Research Timer +- `token_tracker` singleton resets and starts timer on `autonomous_coordinator.start()`, stops on stop/finally +- Cumulative input/output tokens tracked per model from every successful LLM completion call (6 code paths in `api_client_manager`) +- `token_usage_updated` WebSocket event broadcast after each tracked call; `GET /api/token-stats` for initial fetch +- Displayed in WorkflowPanel sidebar (timer, totals, expandable per-model breakdown) +- Also activated for standalone aggregator/compiler via API route start/stop + --- ## Critical Invariants @@ -1500,11 +1614,23 @@ Tier 3 Final Answer display component (separate tab for completed/overall final 20. **Model tracking is ENABLED during Tier 3** - All API calls tracked for author attribution and model credits 21. **Same model = single author** - Model used in multiple instances counts as ONE author entry, but all API calls tallied 22. **Paper redundancy is DISABLED during Tier 3** - `_tier3_active` flag prevents redundancy checks from purging papers being used in the final volume -23. **Brainstorm hard limit is 80 acceptances** - After 80 acceptances, paper writing is forced (no completion review) +23. **Brainstorm hard limit is 30 acceptances** - After 30 acceptances, paper writing is forced (no completion review) 24. **Maximum 1 completed rewrite per paper** - Rewrite counts as "completed" only after first successful body acceptance; prevents infinite loops from failed rewrite attempts 25. **Partial revision option available** - Allows targeted edits without full body rewrite 26. **Total rewrite is last resort** - Only for catastrophic issues that can't be fixed with targeted edits 27. **Rejection hard limit is 10 consecutive rejections (with 5+ acceptances)** - Prevents infinite rejection loops +28. **Retroactive brainstorm corrections during Tier 2 paper compilation** - Submitter sees unified paper+brainstorm workspace; operations validated independently by validator (paper-only context for paper ops, brainstorm-only context for brainstorm ops); each operation must stand alone without requiring the other for correctness +29. **Max 3 papers per brainstorm** - hard limit, continuation decision skipped after 3rd paper +30. **Prior brainstorm papers ALWAYS auto-included** for paper 2/3 as `is_user_file=True` in RAG, separate from 6-paper cross-topic reference limit +31. **Reference selection runs ONCE per brainstorm cycle** - papers 2/3 reuse same cross-topic references +32. **Topic validator validates continuation decisions** - not self-validation (strategic decision, not weight assessment) +33. **Tier 3 checks after brainstorm cycle completes** (move_on or hard limit), not between papers +34. **No brainstorm re-opening during continuation** - strictly write_another_paper or move_on +35. **Topic exploration runs before EVERY topic selection** — Uses full Part 1 aggregator with all submitters in parallel and batch validation to collect 5 candidate questions. No exceptions. +36. **Topic exploration uses standard aggregator (cleanup disabled)** — Same parallel submitters, batch validation (up to 3), queue management as normal brainstorms. Cleanup/pruning is disabled because the phase is capped at 5 candidates and the temp DB is deleted afterwards. +37. **Paper title exploration runs before EVERY title selection** — Uses full Part 1 aggregator to collect 5 candidate titles before every paper creation (Tier 2 papers 1/2/3, Tier 3 short-form, Tier 3 gap/intro/conclusion chapters). No exceptions. +38. **Title exploration uses standard aggregator (cleanup disabled)** — Same parallel submitters, batch validation, queue management. Cleanup/pruning is disabled because the phase is capped at 5 candidates and the temp DB is deleted afterwards. +39. **Final title selection sees candidate titles** — The 6th selection can choose a candidate, synthesize, or propose new. Must justify divergence from all candidates. --- @@ -1540,7 +1666,7 @@ Each role in autonomous research mode supports OpenRouter model selection with h **Fallback Behavior**: - If OpenRouter is selected and has a fallback configured: Automatically falls back to LM Studio on credit exhaustion - If no LM Studio available: OpenRouter-only operation (system works without LM Studio) -- Fallback is per-role and permanent for the session (no retries after fallback) +- Fallback is per-role and resettable via `POST /api/openrouter/reset-exhaustion` or by re-setting the API key ## Other Notes diff --git a/.cursor/rules/program-directory-and-file-definitions.mdc b/.cursor/rules/program-directory-and-file-definitions.mdc index e93a590..4dea653 100644 --- a/.cursor/rules/program-directory-and-file-definitions.mdc +++ b/.cursor/rules/program-directory-and-file-definitions.mdc @@ -7,6 +7,9 @@ LM Studio and its pre-loaded models can be reached at "http://127.0.0.1:1234". ## Complete Project Directory Structure and File Descriptions project-root/ +├── .github/ # GitHub community health files +│ ├── ISSUE_TEMPLATE/ # Public issue forms + security contact routing +│ └── pull_request_template.md # Default pull request template ├── backend/ │ ├── shared/ # SHARED RESOURCES │ │ ├── __init__.py # Package initialization @@ -20,6 +23,7 @@ project-root/ │ │ ├── workflow_predictor.py # Predicts next 20 API calls (mode-specific algorithms) │ │ ├── free_model_manager.py # Free model rotation/cooldown singleton (looping + auto-selector backup) │ │ ├── rag_lock.py # Global RAG operation lock (prevents Aggregator/Compiler collision) +│ │ ├── token_tracker.py # Cumulative input/output token tracker singleton with per-model breakdown and research timer │ │ ├── wolfram_alpha_client.py # Wolfram Alpha API client │ │ ├── utils.py # Common utilities │ │ ├── json_parser.py # JSON parsing with sanitization for LLM quirks @@ -66,7 +70,8 @@ project-root/ │ │ ├── agents/ │ │ │ ├── __init__.py # Package initialization │ │ │ ├── high_context_submitter.py # 3 modes: construction, outline, review -│ │ │ └── high_param_submitter.py # Rigor enhancement mode +│ │ │ ├── high_param_submitter.py # Rigor enhancement mode +│ │ │ └── critique_submitter.py # Critique phase submitter (peer review) │ │ ├── validation/ │ │ │ ├── __init__.py # Package initialization │ │ │ └── compiler_validator.py # Validates coherence, rigor, placement @@ -88,7 +93,7 @@ project-root/ │ │ ├── __init__.py # Package initialization │ │ ├── core/ │ │ │ ├── __init__.py # Package initialization -│ │ │ ├── autonomous_coordinator.py # Orchestrates two-tier workflow (brainstorm → paper) +│ │ │ ├── autonomous_coordinator.py # Orchestrates the Tier 1 → Tier 2 → Tier 3 autonomous workflow │ │ │ └── autonomous_rag_manager.py # Autonomous-specific RAG wrapper │ │ ├── agents/ │ │ │ ├── __init__.py # Package initialization @@ -108,17 +113,23 @@ project-root/ │ │ ├── prompts/ │ │ │ ├── __init__.py # Package initialization │ │ │ ├── topic_prompts.py # Topic selection & validation prompts +│ │ │ ├── topic_exploration_prompts.py # Builds aggregator user prompt for topic exploration phase │ │ │ ├── completion_prompts.py # Completion review & self-validation prompts │ │ │ ├── paper_reference_prompts.py # Reference selection prompts +│ │ │ ├── paper_title_exploration_prompts.py # Builds aggregator user prompt for paper title exploration phase │ │ │ ├── paper_title_prompts.py # Paper title selection prompts │ │ │ ├── paper_redundancy_prompts.py # Paper redundancy review prompts +│ │ │ ├── paper_continuation_prompts.py # Brainstorm multi-paper continuation decision prompts │ │ │ └── final_answer_prompts.py # TIER 3 - Final answer assessment/selection/volume prompts │ │ └── memory/ │ │ ├── __init__.py # Package initialization -│ │ ├── brainstorm_memory.py # Per-brainstorm database management +│ │ ├── brainstorm_memory.py # Per-brainstorm database management (includes retroactive edit/remove/add during paper compilation) │ │ ├── paper_library.py # Paper library management (Tier 2) │ │ ├── research_metadata.py # Research metadata (brainstorms + papers associations) │ │ ├── autonomous_rejection_logs.py # Topic selection & completion feedback logs +│ │ ├── topic_exploration_memory.py # In-memory candidate DB for topic exploration phase +│ │ ├── paper_model_tracker.py # Per-paper model usage tracking and author attribution +│ │ ├── autonomous_api_logger.py # Autonomous API call logger singleton │ │ ├── final_answer_memory.py # TIER 3 - Final answer state & volume management │ │ └── session_manager.py # Prompt-based session folder organization │ │ @@ -137,7 +148,7 @@ project-root/ │ │ ├── boost.py # Boost API endpoints (enable/disable/toggle/status) │ │ ├── workflow.py # Workflow API endpoints (predictions/history) │ │ ├── download.py # PDF generation endpoint via Playwright (POST /api/download/pdf) -│ │ ├── openrouter.py # OpenRouter API endpoints (global key, models, providers, LM Studio availability, **GET /api/model-cache** for model ID caching) +│ │ ├── openrouter.py # OpenRouter API endpoints (global key, models, providers, LM Studio availability, **GET /api/model-cache** for model ID caching, **POST /api/openrouter/reset-exhaustion** to reset credit exhaustion mid-session) │ │ └── websocket.py # WebSocket for real-time updates │ │ │ ├── data/ # Persistent data storage @@ -205,6 +216,7 @@ project-root/ │ │ │ │ │ │ │ └── autonomous/ # AUTONOMOUS RESEARCH │ │ │ ├── AutonomousResearchInterface.jsx # Main control: research prompt, start/stop, current tier +│ │ │ ├── AutonomousResearch.css # Autonomous research styles │ │ │ ├── BrainstormList.jsx # List all brainstorm topics with status │ │ │ ├── PaperLibrary.jsx # Grid view of completed papers (title + abstract) │ │ │ ├── AutonomousResearchSettings.jsx # Model configs for all roles @@ -213,16 +225,27 @@ project-root/ │ │ │ ├── LiveTier3Progress.jsx # Real-time Tier 3 final answer display (embedded in interface) │ │ │ ├── FinalAnswerView.jsx # TIER 3 - Final answer tab (separate tab for completed answers) │ │ │ ├── FinalAnswerLibrary.jsx # All sessions final answer library viewer -│ │ │ └── ArchiveViewerModal.jsx # Research lineage archive viewer (papers + brainstorms) +│ │ │ ├── FinalAnswerLibrary.css # Final answer library styles +│ │ │ ├── ArchiveViewerModal.jsx # Research lineage archive viewer (papers + brainstorms) +│ │ │ └── ArchiveViewerModal.css # Archive viewer styles │ │ │ +│ │ ├── StartupProviderSetupModal.jsx # Post-disclaimer startup chooser for OpenRouter vs LM Studio setup │ │ ├── OpenRouterApiKeyModal.jsx # Modal for global OpenRouter API key configuration │ │ ├── PaperCritiqueModal.jsx # Modal for displaying validator paper critiques (ratings, feedback, history) │ │ ├── CritiqueNotificationStack.jsx # Persistent popup notifications for high-scoring critiques (≥7.0 avg) +│ │ ├── CreditExhaustionNotificationStack.jsx # Persistent red notifications for OpenRouter credit exhaustion with "Retry OpenRouter" reset button +│ │ ├── HungConnectionNotificationStack.jsx # Persistent amber notifications for API calls exceeding 15 minutes (possible hung connections) │ │ ├── BoostLogs.jsx # Boost API call log viewer +│ │ ├── BoostLogs.css # Boost logs styles │ │ ├── BoostControlModal.jsx # Modal for boost configuration (next-X, category, per-task) +│ │ ├── BoostControlModal.css # Boost control modal styles │ │ ├── WorkflowPanel.jsx # Workflow prediction panel (next 20 API calls display) +│ │ ├── WorkflowPanel.css # Workflow panel styles │ │ ├── TextFileUploader.jsx # User file upload component +│ │ ├── TextFileUploader.css # File uploader styles │ │ ├── OpenRouterPrivacyWarningModal.jsx # Privacy policy error modal (OpenRouter data sharing) +│ │ ├── settings-common.css # Shared settings panel styles +│ │ ├── critique-modal.css # Paper critique modal styles │ │ │ │ │ ├── services/ │ │ │ ├── api.js # Backend API calls (includes openRouterAPI) @@ -230,9 +253,11 @@ project-root/ │ │ │ │ │ ├── utils/ │ │ │ ├── downloadHelpers.js # PDF/raw download helpers (Playwright backend PDF) -│ │ │ └── modelCache.js # Frontend model cache utilities (display_name → api_id lookup) +│ │ │ ├── modelCache.js # Frontend model cache utilities (display_name → api_id lookup) +│ │ │ ├── autonomousProfiles.js # Shared autonomous recommended-profile definitions and persistence helpers +│ │ │ └── disclaimerHelper.js # Frontend-only disclaimer injection for brainstorm/paper views │ │ │ -│ │ ├── App.jsx # Main app with tab navigation +│ │ ├── App.jsx # Main app shell with top-level mode switch (Autonomous ASI S.T.E.M. / Advanced Manual ASI S.T.E.M.) and tab navigation │ │ ├── index.css # Styles │ │ └── index.jsx # React entry point │ │ @@ -241,6 +266,7 @@ project-root/ │ ├── requirements.txt # Python dependencies ├── package.json # Root scripts +├── SECURITY.md # Security policy and private vulnerability reporting ├── Press to Launch MOTO.bat # The user's one-click program launcher. └── _moto_internal_launcher.ps1 # Internal PowerShell launcher (not for direct user use) @@ -259,6 +285,7 @@ project-root/ - `free_model_manager.py`: Free model rotation/cooldown singleton (looping, auto-selector `openrouter/free`, account exhaustion detection) - `wolfram_alpha_client.py`: Wolfram Alpha API client for rigor verification - `rag_lock.py`: Global RAG operation lock (prevents collision, retry logic for reads) +- `token_tracker.py`: Cumulative input/output token tracker singleton with per-model breakdown and research timer. Reset on session start, timer start/stop tied to coordinator lifecycle. Stats broadcast via `token_usage_updated` WebSocket event after each successful LLM call. - `utils.py`: Token counting, text compression, file I/O - `json_parser.py`: JSON parsing with sanitization for LLM responses; sanitizes reasoning tokens, markdown blocks, control tokens, LaTeX escapes, control characters; **rejects truncated JSON** (raises ValueError with diagnostics) to prevent corrupted content from passing validation - `critique_memory.py`: Paper critique persistence (ratings, feedback, history, session-aware) @@ -282,8 +309,8 @@ project-root/ - Agents: `topic_selector.py`, `topic_validator.py`, `completion_reviewer.py`, `reference_selector.py`, `paper_title_selector.py` - Tier 3 Agents: `certainty_assessor.py`, `answer_format_selector.py`, `volume_organizer.py` - `paper_redundancy_checker.py`: Library quality maintenance (every 3 papers) -- Prompts: `topic_prompts.py`, `completion_prompts.py`, `paper_reference_prompts.py`, `paper_title_prompts.py`, `paper_redundancy_prompts.py`, `final_answer_prompts.py` -- Memory: `brainstorm_memory.py`, `paper_library.py`, `research_metadata.py`, `session_manager.py`, `autonomous_rejection_logs.py`, `final_answer_memory.py` (model tracking, archival) +- Prompts: `topic_prompts.py`, `topic_exploration_prompts.py`, `completion_prompts.py`, `paper_reference_prompts.py`, `paper_title_exploration_prompts.py`, `paper_title_prompts.py`, `paper_redundancy_prompts.py`, `paper_continuation_prompts.py`, `final_answer_prompts.py` +- Memory: `brainstorm_memory.py`, `paper_library.py`, `research_metadata.py`, `session_manager.py`, `autonomous_rejection_logs.py`, `topic_exploration_memory.py` (in-memory candidate DB), `paper_model_tracker.py` (per-paper model usage tracking and author attribution), `autonomous_api_logger.py` (API call logging singleton), `final_answer_memory.py` (model tracking, archival) ### API Routes @@ -292,8 +319,9 @@ project-root/ ### Frontend Components +- `App.jsx`: Top-level GUI shell. Default mode is `Autonomous ASI S.T.E.M.` for Part 3 screens; `Advanced Manual ASI S.T.E.M.` contains the manual Part 1 Aggregator + Part 2 Compiler workspace. Shared utility controls (Boost, OpenRouter, WorkflowPanel) remain global. - **Aggregator**: `AggregatorInterface.jsx`, `AggregatorSettings.jsx`, `AggregatorLogs.jsx`, `LiveResults.jsx` - **Compiler**: `CompilerInterface.jsx`, `CompilerSettings.jsx`, `CompilerLogs.jsx`, `LivePaper.jsx` - **Autonomous**: `AutonomousResearchInterface.jsx`, `BrainstormList.jsx`, `PaperLibrary.jsx`, `AutonomousResearchSettings.jsx`, `AutonomousResearchLogs.jsx`, `LivePaperProgress.jsx`, `LiveTier3Progress.jsx`, `FinalAnswerView.jsx`, `FinalAnswerLibrary.jsx`, `ArchiveViewerModal.jsx` -- **Shared**: `OpenRouterApiKeyModal.jsx`, `PaperCritiqueModal.jsx`, `CritiqueNotificationStack.jsx`, `BoostLogs.jsx`, `BoostControlModal.jsx`, `WorkflowPanel.jsx`, `TextFileUploader.jsx`, `OpenRouterPrivacyWarningModal.jsx`, `LatexRenderer.jsx` (dual view, KaTeX, theorem parsing), `LatexRenderer.css` -- **Utils**: `downloadHelpers.js` (PDF/raw download), `modelCache.js` (display_name → api_id lookup), `api.js`, `websocket.js` +- **Shared**: `StartupProviderSetupModal.jsx`, `OpenRouterApiKeyModal.jsx`, `PaperCritiqueModal.jsx`, `CritiqueNotificationStack.jsx`, `CreditExhaustionNotificationStack.jsx`, `HungConnectionNotificationStack.jsx`, `BoostLogs.jsx`, `BoostControlModal.jsx`, `WorkflowPanel.jsx`, `TextFileUploader.jsx`, `OpenRouterPrivacyWarningModal.jsx`, `LatexRenderer.jsx` (dual view, KaTeX, theorem parsing), `LatexRenderer.css` +- **Utils**: `downloadHelpers.js` (PDF/raw download), `modelCache.js` (display_name → api_id lookup), `autonomousProfiles.js` (shared recommended-profile definitions + persistence helpers), `disclaimerHelper.js` (frontend-only disclaimer injection), `api.js`, `websocket.js` diff --git a/.cursor/rules/rag-design-for-overall-program.mdc b/.cursor/rules/rag-design-for-overall-program.mdc index f365deb..d3095b1 100644 --- a/.cursor/rules/rag-design-for-overall-program.mdc +++ b/.cursor/rules/rag-design-for-overall-program.mdc @@ -1,5 +1,5 @@ --- -alwaysApply: true +alwaysApply: false --- ## Important Notes When Editing RAG Systems The RAG system in this program is very advanced, be certain that any changes you make to the RAG system are correct changes. @@ -12,7 +12,7 @@ If an item is direct injected, its RAG counterpart must NOT also be included. **RAG Offload Priority — Submitter:** Shared Training DB → Local Submitter DB → Rejection Log → User Upload Files -**RAG Offload Priority — Validator:** Shared Training DB → User Upload Files → Submission under review (last resort) +**RAG Offload Priority — Validator:** Shared Training DB → User Upload Files (submission under review is always direct injected) ## Further RAG Specifications @@ -62,7 +62,7 @@ User-uploaded files: pre-generate ALL 4 configurations. Dynamic files (training **Stage C — Reranking + MMR**: Blend vector (60%) + BM25 (40%); MMR λ=0.8 (80% relevance, 20% diversity); removes near-duplicates (similarity > 0.85); hard cap at context budget. -**Stage D — Packing + Compression**: Assembles evidence with headers; priority: document → section → relevance. Compresses only if over budget (preserves entities, numbers, dates). Returns `ContextPack` with evidence tracking. +**Stage D — Packing**: Assembles evidence with headers; priority: document → section → relevance. Packs chunks incrementally until budget is reached (no compression — disabled as unreliable). Skips chunks from `exclude_sources` (content already direct-injected in prompt). Returns `ContextPack` with evidence tracking. --- @@ -70,7 +70,7 @@ User-uploaded files: pre-generate ALL 4 configurations. Dynamic files (training **User Files (Permanent Cache)**: Pre-generates all 4 configs; NEVER evicted; supports submitter chunk size cycling. -**Dynamic Files (Re-Chunked on Update)**: Shared training DB, rejection logs, outline, paper. Single config (current submitter cycle or validator constant). LRU eviction when > 10000 documents. +**Dynamic Files (Re-Chunked on Update)**: Shared training DB, rejection logs, outline, paper. Single config (current submitter cycle or validator constant). LRU eviction when > 10000 documents. Per-size chunk cap: oldest non-permanent chunks trimmed when any bucket exceeds 10,000 chunks. --- @@ -82,7 +82,7 @@ User-uploaded files: pre-generate ALL 4 configurations. Dynamic files (training **Re-Chunking Triggers:** -*Aggregator* (immediate after each acceptance): Acquires global RAG lock → incrementally adds only new submission → releases lock. New chunks: `rag_shared_training_update_{chunk_size}`. Historical chunks accumulate (not removed). MAX_CHUNKS_PER_SIZE = 10,000. +*Aggregator* (immediate after each acceptance): Acquires global RAG lock → incrementally adds only new submission → releases lock. New chunks: `rag_shared_training_update_{chunk_size}`. Historical chunks accumulate (not removed). `max_chunks_per_size = 10,000` enforced per bucket after each add; oldest non-permanent chunks trimmed on overflow. *Compiler* (every 10 aggregator acceptances): Monitors count every 30s → acquires global RAG lock → removes old chunks, re-adds entire file with all 4 configs → releases lock. @@ -104,16 +104,26 @@ User-uploaded files: pre-generate ALL 4 configurations. Dynamic files (training **Overflow handling**: User prompt always direct injected; if exceeds `context_window - minimum_RAG_allocation`: HALT with error. Content too large: offload to RAG. Still doesn't fit: compress (NEVER truncate). +**Source Exclusion (anti-duplication)**: `rag_manager.retrieve(exclude_sources=[...])` filters chunks from named sources during Stage D packing. Callers pass source names of content already direct-injected so RAG budget goes entirely to non-duplicated content. + +| Mode | Excluded Sources | Reason | +|---|---|---| +| Compiler construction | `compiler_outline.txt`, `compiler_paper.txt`, brainstorm source (when direct-injected) | All three always direct-injected in construction prompts | +| Compiler outline_update | `compiler_outline.txt`, `compiler_paper.txt` | Both direct-injected in outline update prompts | +| Compiler rigor | `compiler_outline.txt` | Outline always direct-injected; paper intentionally RAG'd (smaller context) | +| Aggregator submitter/validator | Direct-injected user file names + direct-injected shared-training sources (current training file + `rag_shared_training_update_*`) | Prevents RAG returning chunks already in direct context when only some content is offloaded | +| Aggregator cleanup review | Same as above, when full submissions DB is direct-injected | Prevents cleanup RAG evidence from repeating already-injected submissions | + --- ## Memory Management **Cache limits:** - Query rewrite cache: 500 entries, 30-min TTL -- Embedding cache: 500 entries, LRU eviction - BM25 cache: 1000 entries, 1-hour TTL - Context pack cache: 300 entries - Document LRU: removes oldest non-permanent document when > 10000 docs +- Per-size chunk cap: 10,000 chunks per size bucket; oldest non-permanent trimmed on overflow; embeddings nulled before removal **Training data limits**: No shared DB size limit. Local rejections: max 5 per submitter. Observability: 1000 retrieval history, 50 gating events, 100 hard negatives. @@ -143,6 +153,25 @@ User-uploaded files: pre-generate ALL 4 configurations. Dynamic files (training --- +## Agents Intentionally Without RAG + +These agents use ONLY direct injection (no RAG fallback) by design. Each operates on compact metadata summaries where RAG is unnecessary. Documented in each file's module docstring. + +| Agent | Inputs | Why No RAG | +|---|---|---| +| Topic selector | Brainstorm metadata, paper titles/abstracts | Strategic "what to work on" decision — summaries suffice; abstracts truncated as overflow fallback | +| Topic validator | Same as topic selector | Validates strategic decision, same compact metadata | +| Paper title selector | Brainstorm summary, existing paper titles/abstracts | Title selection needs topic overview, not full content | +| Paper redundancy checker | Paper titles/abstracts | Redundancy detected at abstract level, not full-content comparison | +| Answer format selector | Certainty assessment, paper summaries | Short-form vs long-form is a structural decision, not content-deep | +| Volume organizer | Paper summaries, certainty assessment, volume state | Chapter ordering uses paper-level metadata, not full content | +| Compiler review mode | Outline + paper only | Evaluates paper on its own merits without external source bias | +| Brainstorm continuation | Brainstorm summary, prior paper titles/abstracts | "Write another or move on" uses summary, not full DB | + +**Known oversight**: Certainty assessor Step 2 drops expanded papers when they don't fit instead of RAG'ing them. Should use RAG fallback like reference_selector does. + +--- + ## Critical Invariants 1. User prompt and JSON context NEVER RAG'd — always direct injected (Parts 1, 2, 3) @@ -156,3 +185,7 @@ User-uploaded files: pre-generate ALL 4 configurations. Dynamic files (training 9. Contradiction check pre-acceptance 10. Autonomous mode prompt validation before all LLM calls 11. Read operations retry on HNSW index errors (3 attempts, 0.5s → 1s → 2s backoff) +12. Document count tracks unique source names — repeated `add_text`/`add_document` with same source name does not inflate count +13. Per-size chunk cap (`max_chunks_per_size`) enforced after every add — prevents unbounded in-memory embedding growth +14. Agents that use only metadata summaries (topic selector, title selector, redundancy checker, etc.) intentionally skip RAG — see "Agents Intentionally Without RAG" table above +15. If content is already direct-injected, it must NOT also appear in RAG retrieval results — no duplication diff --git a/.cursor/rules/workflow-runtime-updates.mdc b/.cursor/rules/workflow-runtime-updates.mdc new file mode 100644 index 0000000..116fcae --- /dev/null +++ b/.cursor/rules/workflow-runtime-updates.mdc @@ -0,0 +1,11 @@ +--- +alwaysApply: true +--- + +## Workflow Runtime Updates + +- Aggregator, Compiler, and Autonomous Research are mutually exclusive runtime modes. Only one workflow mode may be active at a time, and starting any mode while another is running must be blocked. +- This rule explicitly supersedes any older wording in `part-1-and-part-2-cointeraction-architecture.mdc` that describes Aggregator and Compiler as concurrently runnable. + +- Compiler critique skip and autonomous critique skip both support pre-emptive use during active paper-writing. If critique is already active, the skip happens immediately. If critique has not started yet, the skip is queued and auto-applies when critique is reached. +- This rule explicitly supersedes any older wording in `part-2-compiler-tool-design-specification.mdc` that describes compiler critique skip as active-critique-only. diff --git a/Press to Launch MOTO.bat b/Press to Launch MOTO.bat index 77e2692..52710c8 100644 --- a/Press to Launch MOTO.bat +++ b/Press to Launch MOTO.bat @@ -1,5 +1,5 @@ @echo off -setlocal enabledelayedexpansion +setlocal cls echo ================================================================ @@ -83,7 +83,8 @@ REM ================================================================ echo [4/8] Installing Python dependencies... echo This may take a few minutes if this is your first time... echo. -pip install -r requirements.txt +python -m pip install --upgrade pip >nul 2>&1 +pip install --upgrade -r requirements.txt if errorlevel 1 ( echo. echo ============================================================ diff --git a/README.md b/README.md index 4ad799c..a737b0a 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,24 @@ # MOTO - S.T.E.M. Mathematics Variant # Autonomous AI/ASI Deep Research Harness -**Version: 1.0.4** +**Version: 1.0.5** [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/) [![Node.js 16+](https://img.shields.io/badge/node-16+-green.svg)](https://nodejs.org/) -**An autonomous AI or ASI research system that generates novel mathematical research papers through multi-agent aggregation and compilation. Useful for any discipline that has an interest in novel math: physicists, engineers, mathematicians, chemists, etc. This harness can easily be modified for other research topics such as general academic research, chat bots, niche research, robotics, or anything requiring useful creative output and general autonomy. MOTO's novel brainstorming and rejection/validation stage allows autonomous long-term runtime without user intervention - if desired, research can be conducted for days or weeks without user input.** +**A breakthrough in AI and autonmated theorem generation. An autonomous AI/ASI research system that generates novel and publication-worthy research papers autonomously powered by Intrafere Research Group's new ASI discovery of [Top-P Exploration Through Structured Brainstorming & Validated Feedback](https://intrafere.com/structured-brainstorming-validated-feedback/). Top-P exploration changes how we look at AI weights, a specific combination of reiterative brainstorming, validation, feedback, pruning allow for superintelligence exploration and data extraction from nearly any combination of AI models. This is useful for any discipline with an interest in creative and novel solution generation for mathematics: physicists, engineers, mathematicians, chemists, etc. This harness can also easily be modified for other research topics such as general academic research, chat bots, niche research, robotics, or anything requiring creative output and/or general autonomy. MOTO's novel brainstorming and rejection/validation stage allows autonomous long-term runtime without user intervention — if desired, research can be conducted for days or weeks without user input.** -The differentiator that allows us to call this early artificial superintelligence is the brainstorming mechanism when considered in context with how generative transformers (GPT LLMs) operate. Brainstorming may sound trivial; however, transformers predict what tokens will come next. If you provide the transformer with a bunch of previous ideas that it had, then it is more likely to produce something new that it hasn't said before. MOTO maintains and "cleans" or "purges" its brainstorm database as it runs. Once a later idea makes earlier ones redundant, the redundant idea is then removed from the database - this allows for a type of recursive improvement. Once brainstorming is complete, MOTO writes the research paper about its solution. This step then repeats, now the papers become a new "tier 2" brainstorm. Previous papers are referenced in future brainstorms and future papers. This set of tier 2 papers is another higher order brainstorm-like database, which also cleans/prunes itself of newly discovered incorrect or redundant papers just like the tier 1 short-hand idea brainstorm database does for each paper. A 3rd tier, slightly more complex mechanism exists for the final tier 3 answer generation that allows for the book-length volumes. +### The Core Discovery: Top-P Exploration (Solution Basin Aggregation) + +MOTO is built on a [key insight](https://intrafere.com/structured-brainstorming-validated-feedback/) about how generative transformers operate: **transformers predict what tokens come next, so providing them with their own prior ideas enables deeper probing of the solution space**. This is called **solution basin aggregation** — each brainstorming pass explores a richer, more informed landscape, and the cross-recombination of "mined" knowledge compounds to create new insights that do not exist from the model's training alone. Intrafere considers this the mechanism that produces [ASI-like results](https://intrafere.com/structured-brainstorming-validated-feedback/) in practice. MOTO essentially "mines" creativity from a transformer's knowledge set, and this compounding effect is what differentiates it from traditional single-pass AI. + +The brainstorming phase runs **multiple submitters in parallel**, each independently exploring the solution space, funneled into a **single bottleneck validator** — a completely separate model instance whose only job is to decide whether each submission genuinely advances the knowledge base. This architectural separation between creative exploration and critical evaluation mitigates the hallucination loops and drift that plague single-model autonomous agents. Every rejection carries specific feedback that steers the next round of exploration, so failure is never wasted. Iterative pruning continuously removes entries that become redundant as stronger ideas emerge, producing an ever-denser, self-refining knowledge base. [View the learning curve data](https://intrafere.com/motos-brainstorming-potential-data/) for empirical evidence of this approach. + +### How Research Compounds Across Tiers + +Once a brainstorm is sufficiently explored, MOTO writes a research paper from it. This step then repeats — papers become a new "Tier 2" brainstorm. Previous papers are referenced in future brainstorms and future papers. This set of Tier 2 papers is another higher-order brainstorm-like database, which also self-prunes newly discovered incorrect or redundant papers just like the Tier 1 short-hand idea brainstorm does. A third tier generates the final answer, capable of producing book-length volumes. + +MOTO may produce many brilliant papers as it runs; these intermediate papers are answers that rival traditional paid cloud deep research. As the user, observe MOTO as often or as little as you'd like — skip its autonomy and force it into final answer generation, or stop it early and select one of its highly creative pre-final answer papers. If the operator allows, let MOTO run for many hours and produce a final answer from its experimental mode. MOTO autonomously decides whether to output a short-form answer or collect existing papers into a long-form academic volume. With models over 131,000 token context limits, the harness easily produces final volumes exceeding 40,000 words autonomously. The built-in "critique" feature allows the user to direct-inject the full volume into nearly any AI model of their choice for evaluation. MOTO writes papers in reverse order — body first, conclusion second, introduction last — to avoid constraining the creative process with premature structural commitments. MOTO is an experimental system; the AI(s) are producing this content partially unguided and all papers should be judged with extreme scrutiny. MOTO may produce many brilliant papers as it runs, these start-up papers are answers that rival traditional paid cloud deep research functions. As the user, observe MOTO as often or as little as you'd like - skip its autonomy and force it into final answer generation, or stop it early and select one of its highly creative pre-final answer papers. If the operator allows, let MOTO run for many hours and produce a final answer from its experimental mode, a final answer beyond the growing collection of related papers. In this mode, we have found quality can vary - however that is because MOTO is capable of autonomously deciding if it should output a shortform answer, like traditional cloud deep research - or whether to collect existing pre-final answer papers and write a long-form academic volume. With models over 131,000 token context limits, the harness is easily capable of producing final volumes with over 40,000 words, and MOTO produces this book-length content autonomously. It should be noted that this long-form answer is often so detailed that for any AI critique it requires the system to fully direct-inject the paper content. Providing an attempt at a novel 40,000 academic volume to a cloud agent using RAG (summarization-like techniques) may provide mixed results. MOTO strives to direct inject all content where possible. The built in “critique” feature allows the user to direct inject the full 40,000+ word volume into nearly any AI model of their choice. If the system decides to do a long form answer it will collect any relevant papers it has written, take only the best and related ones, organize them into chapters, write any gap papers, write the conclusion, and then lastly write the introduction chapter. This reverse order, body first, conclusion second, then introduction portion(s) last is crucial to allow MOTO the ability to be creative as it write. The traditional introduction -> body -> conclusion writing style ensures we limit any non-Markovian constraints on the creative process. In other words, if the system wrote its introduction first, the body paragraphs are locked in and bound to the rules the introduction set. Writing the body section first gives the system greater freedom to discover as it writes as it is not bound by a detailed introduction. MOTO is an experimental system, the AI(s) are producing this content partially unguided and all papers should be judged with extreme scrutiny. @@ -45,15 +55,20 @@ Before installation, you need: - **Load the LM Studio RAG agent [optional but HIGHLY recommended for much faster outputs/answers]**: Load the embedding model `nomic-ai/nomic-embed-text-v1.5` in your LM studio "Developer" tab (server tab) (search for "nomic-ai/nomic-embed-text-v1.5" to download it in the LM studio downloads center). Please note: you may need to enable "Power User" or "Developer" to see this developer tab - this server will let you load the amount and capacity of simultaneous models that your PC will suport. In this develop tab is where you load both your nomic-ai embedding agent and any optional local hosted agents you want to use in the program (I.e. GPT OSS 20b, DeepSeek 32B, etc). **If you do not not download LM studio and enable the Nomic agent the system will run much slower and cost a slightly more due to having to use the paid service OpenRouter for RAG calls.** - Start the local server (port 1234) 4. **If using cloud AI - Get an OpenRouter API key**: Sign up at OpenRouter.ai and get a paid or free API key to use the most powerful cloud models available from your favorite providers. OpenRouter may also offer a certain amount of free API calls per day with your account key. When you download the MOTO deep research harness, you can see which models are free by checking the "show only free models" check box(es) in the MOTO app settings. +5. **On first startup, pick your provider path**: After you acknowledge the disclaimer, MOTO will prompt you to either enter an OpenRouter key or confirm that LM Studio is running. If you save an OpenRouter key there, the recommended default autonomous profile is applied immediately so you can open Settings and see it already selected. ### Installation #### Windows (One-Click Launcher) 1. Clone or download this repository -2. Start LM Studio and load your models and "nomic-embed-text-v1.5" agent **and/or** enter OpenRouter API key +2. Start LM Studio and load your models and "nomic-embed-text-v1.5" agent **and/or** have your OpenRouter API key ready 3. **Double-click `Press to Launch MOTO.bat`** -4. The launcher will: +4. After acknowledging the disclaimer, choose one of the startup setup paths: + - Enter your OpenRouter API key + - Confirm that LM Studio is already running with a loaded model + - Then open Settings to keep the recommended profile or switch to your saved team profile / another default profile +5. The launcher will: - Check all prerequisites - Install Python and Node.js dependencies automatically - Create necessary directories @@ -338,6 +353,8 @@ All content generated by this system is for informational purposes only. Papers ## 🔗 Links - **Website**: https://intrafere.com +- **Top-P Exploration (ASI Discovery)**: https://intrafere.com/structured-brainstorming-validated-feedback/ +- **Learning Curve Data**: https://intrafere.com/motos-brainstorming-potential-data/ - **Program Info**: https://intrafere.com/moto-autonomous-home-ai/ - **News & Updates**: https://intrafere.com/moto-news/ - **Donate**: https://intrafere.com/donate/ diff --git a/_moto_internal_launcher.ps1 b/_moto_internal_launcher.ps1 index beb0146..8e21bba 100644 --- a/_moto_internal_launcher.ps1 +++ b/_moto_internal_launcher.ps1 @@ -28,7 +28,7 @@ try { } # Check for Python - Write-Host "[1/6] Checking Python installation..." -ForegroundColor Yellow + Write-Host "[1/8] Checking Python installation..." -ForegroundColor Yellow if (-not (Test-Command python)) { Write-Host "" Write-Host "============================================================" -ForegroundColor Red @@ -46,7 +46,7 @@ try { Write-Host "" # Check for Node.js - Write-Host "[2/6] Checking Node.js installation..." -ForegroundColor Yellow + Write-Host "[2/8] Checking Node.js installation..." -ForegroundColor Yellow if (-not (Test-Command node)) { Write-Host "" Write-Host "============================================================" -ForegroundColor Red @@ -63,8 +63,8 @@ try { Write-Host "npm: $npmVersion" -ForegroundColor Green Write-Host "" - # Create necessary directories - Write-Host "[3/6] Creating necessary directories..." -ForegroundColor Yellow + # Create necessary directories & clean ChromaDB + Write-Host "[3/8] Creating necessary directories..." -ForegroundColor Yellow $directories = @( "backend\data", "backend\data\user_uploads", @@ -77,35 +77,38 @@ try { } } Write-Host "Directories ready!" -ForegroundColor Green + + # Clean ChromaDB on startup to prevent corruption issues + $chromaPath = "backend\data\chroma_db" + if (Test-Path $chromaPath) { + Write-Host "Cleaning ChromaDB database..." -ForegroundColor Yellow + Remove-Item -Path $chromaPath -Recurse -Force + Write-Host "ChromaDB cleaned!" -ForegroundColor Green + } Write-Host "" - # Check/Install Python dependencies - Write-Host "[4/6] Checking Python dependencies..." -ForegroundColor Yellow - $pipList = pip list 2>&1 - if ($pipList -notmatch "fastapi") { - Write-Host "Installing Python dependencies..." -ForegroundColor Yellow - Write-Host "This may take a few minutes..." -ForegroundColor Yellow + # Install/Update Python dependencies + Write-Host "[4/8] Installing Python dependencies..." -ForegroundColor Yellow + Write-Host "Upgrading pip and checking packages..." -ForegroundColor Yellow + Write-Host "" + python -m pip install --upgrade pip 2>&1 | Out-Null + pip install --upgrade -r requirements.txt + if ($LASTEXITCODE -ne 0) { Write-Host "" - pip install -r requirements.txt - if ($LASTEXITCODE -ne 0) { - Write-Host "" - Write-Host "============================================================" -ForegroundColor Red - Write-Host "ERROR: Failed to install Python dependencies" -ForegroundColor Red - Write-Host "============================================================" -ForegroundColor Red - Write-Host "" - Write-Host "Please check:" -ForegroundColor Yellow - Write-Host "- Internet connection is working" -ForegroundColor Yellow - Write-Host "- You have permission to install packages" -ForegroundColor Yellow - Exit-WithPause -ExitCode 1 - } - Write-Host "Python dependencies installed successfully" -ForegroundColor Green - } else { - Write-Host "Python dependencies already installed" -ForegroundColor Green + Write-Host "============================================================" -ForegroundColor Red + Write-Host "ERROR: Failed to install Python dependencies" -ForegroundColor Red + Write-Host "============================================================" -ForegroundColor Red + Write-Host "" + Write-Host "Please check:" -ForegroundColor Yellow + Write-Host "- Internet connection is working" -ForegroundColor Yellow + Write-Host "- You have permission to install packages" -ForegroundColor Yellow + Exit-WithPause -ExitCode 1 } + Write-Host "Python dependencies up to date" -ForegroundColor Green Write-Host "" # Install Playwright Chromium browser (one-time ~150MB download for PDF generation) - Write-Host "[4b/6] Installing Playwright Chromium browser for PDF generation..." -ForegroundColor Yellow + Write-Host "[4b/8] Installing Playwright Chromium browser for PDF generation..." -ForegroundColor Yellow Write-Host "This is a one-time download (~150MB) and may take a few minutes..." -ForegroundColor Yellow Write-Host "" python -m playwright install chromium 2>&1 @@ -121,7 +124,7 @@ try { Write-Host "" # Check/Install Node.js dependencies - Write-Host "[5/6] Checking Node.js dependencies..." -ForegroundColor Yellow + Write-Host "[5/8] Checking Node.js dependencies..." -ForegroundColor Yellow if (-not (Test-Path "frontend")) { Write-Host "" Write-Host "============================================================" -ForegroundColor Red @@ -160,7 +163,7 @@ try { Write-Host "" # Check for LM Studio (optional - OpenRouter is an alternative) - Write-Host "[6/6] Checking LM Studio..." -ForegroundColor Yellow + Write-Host "[6/8] Checking LM Studio..." -ForegroundColor Yellow Write-Host "" # Check if LM Studio is responding @@ -194,7 +197,7 @@ try { Write-Host "" # Clean up any existing processes on ports 8000 and 5173 - Write-Host "[7/7] Cleaning up existing processes on ports 8000 and 5173..." -ForegroundColor Yellow + Write-Host "[7/8] Cleaning up existing processes on ports 8000 and 5173..." -ForegroundColor Yellow Write-Host "" # Kill processes on port 8000 @@ -247,6 +250,8 @@ try { Write-Host "" # Start the system + Write-Host "[8/8] Starting services..." -ForegroundColor Yellow + Write-Host "" Write-Host "================================================================" -ForegroundColor Cyan Write-Host " All checks passed! Starting system..." -ForegroundColor Cyan Write-Host "================================================================" -ForegroundColor Cyan diff --git a/backend/aggregator/agents/submitter.py b/backend/aggregator/agents/submitter.py index f72e613..de2e5b3 100644 --- a/backend/aggregator/agents/submitter.py +++ b/backend/aggregator/agents/submitter.py @@ -273,9 +273,17 @@ async def _generate_submission(self) -> Optional[Submission]: self.task_tracking_callback("completed", task_id) return None # Return None instead of crashing + except FreeModelExhaustedError: + raise + except RuntimeError as e: + if "credits exhausted" in str(e).lower(): + raise FreeModelExhaustedError(str(e), soonest_retry=None) + logger.error(f"Submitter {self.submitter_id}: Unexpected error during completion: {e}") + if self.task_tracking_callback: + self.task_tracking_callback("completed", task_id) + return None except Exception as e: logger.error(f"Submitter {self.submitter_id}: Unexpected error during completion: {e}") - # Notify task completed (failed but still completed) if self.task_tracking_callback: self.task_tracking_callback("completed", task_id) return None diff --git a/backend/aggregator/core/context_allocator.py b/backend/aggregator/core/context_allocator.py index 6e1952a..36d8178 100644 --- a/backend/aggregator/core/context_allocator.py +++ b/backend/aggregator/core/context_allocator.py @@ -4,6 +4,7 @@ """ from typing import Dict, List, Optional import logging +from pathlib import Path from backend.shared.config import rag_config from backend.shared.models import ContextPack @@ -45,6 +46,32 @@ def set_context_windows(self, submitter_context: int, validator_context: int, if validator_max_output is not None: self.validator_max_output_tokens = validator_max_output logger.info(f"Context windows updated - Submitter: {submitter_context}, Validator: {validator_context}") + + def _get_shared_training_rag_sources(self) -> List[str]: + """ + Get RAG source names that map to shared-training content. + + Used to exclude shared-training chunks from RAG when the same + shared-training content is already direct-injected into the prompt. + """ + sources: List[str] = [] + + # Current shared-training file source (manual mode: rag_shared_training.txt, + # autonomous mode: brainstorm_.txt) + try: + from backend.aggregator.memory.shared_training import shared_training_memory + current_source = Path(shared_training_memory.file_path).name + if current_source: + sources.append(current_source) + except Exception as e: + logger.debug(f"Could not resolve shared-training source name for exclusion: {e}") + + # Incremental re-RAG sources used by aggregator background updates + for chunk_size in rag_config.submitter_chunk_intervals: + sources.append(f"rag_shared_training_update_{chunk_size}") + + # De-dup while preserving insertion order + return list(dict.fromkeys(sources)) async def allocate_submitter_context( self, @@ -184,6 +211,15 @@ async def allocate_submitter_context( # Perform RAG retrieval ONLY if content was offloaded rag_context = None if any([needs_shared_training_rag, needs_local_training_rag, needs_rejection_log_rag, needs_user_files_rag]): + # Build exclusion list: sources that were direct-injected should not appear in RAG + exclude_sources = [] + if not needs_shared_training_rag and shared_training_content: + exclude_sources.extend(self._get_shared_training_rag_sources()) + if not needs_user_files_rag and user_files_content: + exclude_sources.extend(user_files_content.keys()) + if exclude_sources: + exclude_sources = list(dict.fromkeys(exclude_sources)) + # FIXED: Calculate RAG budget from REMAINING space after direct injection # This ensures we maximize context usage without exceeding limits direct_content_temp = "\n\n".join(direct_parts) @@ -216,7 +252,8 @@ async def allocate_submitter_context( rag_context = await rag_manager.retrieve( query=user_prompt, chunk_size=chunk_size, # Cycles: 256→512→768→1024 - max_tokens=rag_max_tokens + max_tokens=rag_max_tokens, + exclude_sources=exclude_sources if exclude_sources else None ) if rag_context and rag_context.text: @@ -343,6 +380,15 @@ async def allocate_validator_context( # Perform RAG retrieval ONLY if content was offloaded rag_context = None if needs_shared_training_rag or needs_user_files_rag: + # Build exclusion list: sources that were direct-injected should not appear in RAG + exclude_sources = [] + if not needs_shared_training_rag and shared_training_content: + exclude_sources.extend(self._get_shared_training_rag_sources()) + if not needs_user_files_rag and user_files_content: + exclude_sources.extend(user_files_content.keys()) + if exclude_sources: + exclude_sources = list(dict.fromkeys(exclude_sources)) + # FIXED: Calculate RAG budget from REMAINING space after direct injection # This ensures we maximize context usage without exceeding limits direct_content_temp = "\n\n".join(direct_parts) @@ -375,7 +421,8 @@ async def allocate_validator_context( rag_context = await rag_manager.retrieve( query=user_prompt, chunk_size=chunk_size, # Always 512 for validator - max_tokens=rag_max_tokens + max_tokens=rag_max_tokens, + exclude_sources=exclude_sources if exclude_sources else None ) if rag_context and rag_context.text: @@ -503,6 +550,15 @@ async def allocate_cleanup_review_context( # Perform RAG retrieval if content was offloaded rag_context = None if needs_submissions_rag or needs_user_files_rag: + # Build exclusion list: sources that were direct-injected should not appear in RAG + exclude_sources = [] + if not needs_submissions_rag and all_submissions_formatted: + exclude_sources.extend(self._get_shared_training_rag_sources()) + if not needs_user_files_rag and user_files_content: + exclude_sources.extend(user_files_content.keys()) + if exclude_sources: + exclude_sources = list(dict.fromkeys(exclude_sources)) + # Calculate RAG budget from remaining space direct_content_temp = "\n\n".join(direct_parts) direct_content_tokens = count_tokens(direct_content_temp) @@ -520,12 +576,11 @@ async def allocate_cleanup_review_context( f"direct_content={direct_content_tokens}" ) - # Use the user prompt as query for RAG - this will retrieve relevant submissions - # For cleanup, we want to find similar/redundant content rag_context = await rag_manager.retrieve( query=user_prompt, chunk_size=512, # Use validator's standard chunk size - max_tokens=rag_max_tokens + max_tokens=rag_max_tokens, + exclude_sources=exclude_sources if exclude_sources else None ) if rag_context and rag_context.text: diff --git a/backend/aggregator/core/coordinator.py b/backend/aggregator/core/coordinator.py index 3c640ab..3767c89 100644 --- a/backend/aggregator/core/coordinator.py +++ b/backend/aggregator/core/coordinator.py @@ -80,6 +80,9 @@ def __init__(self): # Submitter pause control (queue overflow prevention) self.should_pause_submitters = False # Flag to pause submitters when queue >= 10 + + # Cleanup review toggle (disabled for short-lived mini-brainstorm phases) + self.enable_cleanup_review = True async def _load_stats(self) -> None: """Load persisted stats from file.""" @@ -128,7 +131,8 @@ async def initialize( validator_max_tokens: Optional[int] = None, validator_provider: str = "lm_studio", validator_openrouter_provider: Optional[str] = None, - validator_lm_studio_fallback: Optional[str] = None + validator_lm_studio_fallback: Optional[str] = None, + enable_cleanup_review: bool = True ) -> None: """ Initialize the coordinator with configuration. @@ -147,6 +151,9 @@ async def initialize( """ logger.info("Initializing coordinator...") + # Store cleanup review toggle + self.enable_cleanup_review = enable_cleanup_review + # Validate submitter count num_submitters = len(submitter_configs) if not (system_config.min_submitters <= num_submitters <= system_config.max_submitters): @@ -793,7 +800,7 @@ async def _handle_acceptance(self, submission: Submission, result: ValidationRes await self._save_stats() # Trigger cleanup review every 7 acceptances - if self.total_acceptances % 7 == 0 and self.total_acceptances > 0: + if self.enable_cleanup_review and self.total_acceptances % 7 == 0 and self.total_acceptances > 0: await self._perform_cleanup_review() async def _handle_rejection(self, submission: Submission, result: ValidationResult) -> None: @@ -938,7 +945,7 @@ async def _perform_cleanup_review(self) -> None: # Phase 4: Execute the removal logger.info(f"CLEANUP DEBUG: >>> PHASE 4: Executing removal of submission #{submission_number}...") - removal_success = await shared_training_memory.remove_submission(submission_number) + removal_success = await shared_training_memory.remove_submission(submission_number, trigger_rechunk=False) logger.info(f"CLEANUP DEBUG: <<< PHASE 4 Complete: removal_success={removal_success}") if removal_success: @@ -956,6 +963,9 @@ async def _perform_cleanup_review(self) -> None: "total_removals": self.removals_executed }) + # Full RAG rebuild so deleted content is no longer retrievable + await self._rebuild_shared_training_rag_after_cleanup() + # Log key event to persistent log await event_log.add_event( "cleanup_submission_removed", @@ -1079,6 +1089,43 @@ async def _rechunk_training_data(self) -> None: # ALWAYS RELEASE LOCK rag_operation_lock.release() + async def _rebuild_shared_training_rag_after_cleanup(self) -> None: + """Full RAG rebuild of shared-training content after a cleanup removal. + + The normal incremental rechunk path is append-only and cannot remove + deleted content from RAG. After a prune we must drop all shared-training + RAG sources and re-add the current (post-removal) file so retrieval + results stay consistent with the live database. + """ + current_path = Path(shared_training_memory.file_path) + current_count = await shared_training_memory.get_insights_count() + + await rag_operation_lock.acquire("Aggregator cleanup full re-rag") + try: + # Collect every source name that could contain shared-training chunks + candidate_sources = [current_path.name, current_path.with_suffix(".tmp").name] + for size in rag_config.submitter_chunk_intervals: + candidate_sources.append(f"rag_shared_training_update_{size}") + + for source in dict.fromkeys(candidate_sources): + if source in rag_manager.document_access_order: + await rag_manager.remove_document(source) + + if current_count > 0 and current_path.exists(): + await rag_manager.add_document( + str(current_path), + chunk_sizes=rag_config.submitter_chunk_intervals, + is_user_file=False, + ) + + await shared_training_memory.mark_submissions_ragged(current_count) + logger.info(f"Cleanup full re-RAG complete: {current_count} live submissions re-indexed") + except Exception as e: + logger.error(f"Cleanup full re-RAG failed: {e}", exc_info=True) + raise + finally: + rag_operation_lock.release() + async def get_status(self) -> SystemStatus: """Get current system status.""" queue_size = await queue_manager.size() diff --git a/backend/aggregator/core/rag_manager.py b/backend/aggregator/core/rag_manager.py index 766cd6f..156a046 100644 --- a/backend/aggregator/core/rag_manager.py +++ b/backend/aggregator/core/rag_manager.py @@ -91,10 +91,11 @@ async def add_document( for chunk_size, chunks in chunks_by_size.items(): await self._add_chunks(chunks, chunk_size) - # Track document + # Track document (only increment count for genuinely new sources) source_name = Path(file_path).name - self.document_count += 1 - self.document_access_order[source_name] = time.time() # LRU tracking + if source_name not in self.document_access_order: + self.document_count += 1 + self.document_access_order[source_name] = time.time() if is_user_file: self.permanent_documents.add(source_name) @@ -102,6 +103,9 @@ async def add_document( if self.document_count > rag_config.max_documents: await self._evict_lru_document() + # Enforce per-size chunk cap + await self._enforce_chunk_cap() + logger.info(f"Added document: {file_path}") except Exception as e: @@ -137,9 +141,10 @@ async def add_text( for chunk_size, chunks in chunks_by_size.items(): await self._add_chunks(chunks, chunk_size) - # Track document - self.document_count += 1 - self.document_access_order[source_name] = time.time() # LRU tracking + # Track document (only increment count for genuinely new sources) + if source_name not in self.document_access_order: + self.document_count += 1 + self.document_access_order[source_name] = time.time() if is_permanent: self.permanent_documents.add(source_name) @@ -147,6 +152,9 @@ async def add_text( if self.document_count > rag_config.max_documents: await self._evict_lru_document() + # Enforce per-size chunk cap + await self._enforce_chunk_cap() + logger.info(f"Added text: {source_name}") except Exception as e: @@ -157,7 +165,8 @@ async def retrieve( self, query: str, chunk_size: int = 512, - max_tokens: int = None + max_tokens: int = None, + exclude_sources: Optional[List[str]] = None ) -> ContextPack: """ 4-stage retrieval pipeline. @@ -166,6 +175,7 @@ async def retrieve( query: Search query chunk_size: Chunk size to retrieve from max_tokens: Maximum tokens in result + exclude_sources: Source names to skip during packing (already direct-injected) Returns: ContextPack with retrieved context @@ -189,7 +199,9 @@ async def retrieve( # Stage D: Packing + Compression logger.debug(f"RAG Stage 4/4: Packing and compression (max_tokens={max_tokens})") - context_pack = await self._pack_and_compress(ranked_chunks, query, max_tokens) + if exclude_sources: + logger.info(f"RAG Stage 4/4: Excluding sources already direct-injected: {exclude_sources}") + context_pack = await self._pack_and_compress(ranked_chunks, query, max_tokens, exclude_sources) logger.debug(f"RAG Stage 4/4 complete: Packed {len(context_pack.evidence)} evidence items, coverage={context_pack.coverage:.2f}") return context_pack @@ -309,11 +321,9 @@ async def _vector_search( if not chunks: return [] + query_embeddings = await api_client_manager.get_embeddings(queries) all_results = [] - for query in queries: - # Get query embedding - query_embedding = await api_client_manager.get_embeddings([query]) - + for query_embedding in query_embeddings: # Search with retry logic for transient HNSW errors during concurrent writes max_retries = 3 retry_delay = 0.5 # Start with 500ms delay @@ -322,7 +332,7 @@ async def _vector_search( for attempt in range(max_retries): try: results = collection.query( - query_embeddings=query_embedding, + query_embeddings=[query_embedding], n_results=min(rag_config.hybrid_recall_top_k, len(chunks)) ) break # Success - exit retry loop @@ -480,13 +490,16 @@ async def _pack_and_compress( self, chunks: List[DocumentChunk], query: str, - max_tokens: int + max_tokens: int, + exclude_sources: Optional[List[str]] = None ) -> ContextPack: """ Stage D: Pack chunks into ContextPack with strict token limit enforcement. CRITICAL: This function MUST NOT exceed max_tokens. We pack chunks incrementally until we hit the limit, then stop. Compression is NOT used because it's unreliable. + + Chunks from exclude_sources are skipped (already direct-injected in the prompt). """ if not chunks: return ContextPack( @@ -498,32 +511,43 @@ async def _pack_and_compress( needs_more_context=True ) + exclude_set = set(exclude_sources) if exclude_sources else set() + skipped_count = 0 + # Assemble evidence INCREMENTALLY until we hit max_tokens evidence = [] source_map = {} assembled_text = [] current_tokens = 0 + evidence_idx = 0 - for idx, chunk in enumerate(chunks, start=1): + for chunk in chunks: + # Skip chunks from excluded sources (already direct-injected) + if chunk.source_file in exclude_set: + skipped_count += 1 + continue + + evidence_idx += 1 + # Format this chunk's evidence entry - chunk_entry = f"[Evidence {idx} from {chunk.source_file}]\n{chunk.text}\n" + chunk_entry = f"[Evidence {evidence_idx} from {chunk.source_file}]\n{chunk.text}\n" chunk_tokens = count_tokens(chunk_entry) # Check if adding this chunk would exceed limit if current_tokens + chunk_tokens > max_tokens: # Stop here - we've hit the limit - logger.debug(f"RAG packing stopped at {idx-1}/{len(chunks)} chunks ({current_tokens} tokens, limit={max_tokens})") + logger.debug(f"RAG packing stopped at {evidence_idx-1} packed chunks ({current_tokens} tokens, limit={max_tokens})") break # Add this chunk evidence_entry = { - "id": idx, + "id": evidence_idx, "source": chunk.source_file, "text": chunk.text, "position": chunk.position } evidence.append(evidence_entry) - source_map[f"E{idx}"] = chunk.source_file + source_map[f"E{evidence_idx}"] = chunk.source_file assembled_text.append(chunk_entry) current_tokens += chunk_tokens @@ -531,6 +555,9 @@ async def _pack_and_compress( if chunk.source_file in self.document_access_order: self.document_access_order[chunk.source_file] = time.time() + if skipped_count > 0: + logger.info(f"RAG packing: Skipped {skipped_count} chunks from excluded sources (already direct-injected)") + full_text = "\n".join(assembled_text) token_count = current_tokens # We already counted during packing @@ -556,6 +583,38 @@ async def _pack_and_compress( needs_more_context=coverage < rag_config.coverage_threshold ) + async def _enforce_chunk_cap(self) -> None: + """Trim oldest non-permanent chunks when any size bucket exceeds max_chunks_per_size.""" + cap = rag_config.max_chunks_per_size + for chunk_size in rag_config.submitter_chunk_intervals: + chunks = self.chunks_by_size[chunk_size] + if len(chunks) <= cap: + continue + + overflow = len(chunks) - cap + evict_ids = [] + keep = [] + removed = 0 + + for chunk in chunks: + if removed < overflow and not chunk.is_permanent: + evict_ids.append(chunk.chunk_id) + chunk.embedding = None + removed += 1 + else: + keep.append(chunk) + + if evict_ids: + collection = self.collections[chunk_size] + try: + collection.delete(ids=evict_ids) + except Exception as e: + logger.error(f"ChromaDB delete during chunk cap enforcement (size={chunk_size}): {e}") + + self.chunks_by_size[chunk_size] = keep + self.bm25_index[chunk_size] = None + logger.info(f"Chunk cap enforced for size={chunk_size}: removed {len(evict_ids)} oldest non-permanent chunks ({len(keep)} remaining)") + async def _evict_lru_document(self) -> None: """Evict least recently used document (except permanent ones).""" # Find oldest non-permanent document @@ -585,6 +644,8 @@ async def _evict_lru_document(self) -> None: async def remove_document(self, source_name: str) -> None: """Remove a document from all collections.""" + was_tracked = source_name in self.document_access_order + for chunk_size in rag_config.submitter_chunk_intervals: # Remove from memory self.chunks_by_size[chunk_size] = [ @@ -602,7 +663,8 @@ async def remove_document(self, source_name: str) -> None: # Invalidate BM25 self.bm25_index[chunk_size] = None - self.document_count -= 1 + if was_tracked: + self.document_count = max(0, self.document_count - 1) # Clean up LRU tracking if source_name in self.document_access_order: @@ -664,6 +726,7 @@ def clear_all_documents(self) -> None: # Reset counters self.document_count = 0 self.permanent_documents.clear() + self.document_access_order.clear() if collection_errors: logger.warning(f"RAG cleared with {len(collection_errors)} non-critical warnings: {'; '.join(collection_errors)}") diff --git a/backend/aggregator/memory/shared_training.py b/backend/aggregator/memory/shared_training.py index d3e5499..5460d66 100644 --- a/backend/aggregator/memory/shared_training.py +++ b/backend/aggregator/memory/shared_training.py @@ -94,8 +94,9 @@ async def reload_insights_from_current_path(self) -> None: default=0 ) self.submission_count = max_number - # Set last_ragged to current count so new submissions start from here - self.last_ragged_submission_count = self.submission_count + # Use entry count (not max number) so post-prune gaps + # don't cause the next acceptance to be skipped from RAG + self.last_ragged_submission_count = len(self.insights) else: self.submission_count = 0 @@ -263,7 +264,7 @@ async def get_submission_content(self, submission_number: int) -> Optional[str]: return insight['content'] return None - async def remove_submission(self, submission_number: int) -> bool: + async def remove_submission(self, submission_number: int, trigger_rechunk: bool = True) -> bool: """ Remove a submission from the shared training database. @@ -272,6 +273,8 @@ async def remove_submission(self, submission_number: int) -> bool: Args: submission_number: The submission number to remove + trigger_rechunk: Whether to fire the incremental rechunk callback. + Set False when the caller will do a full RAG rebuild instead. Returns: True if submission was found and removed, False otherwise @@ -292,7 +295,7 @@ async def remove_submission(self, submission_number: int) -> bool: await self._save() # Trigger re-chunking callback to update RAG - if self.rechunk_callback: + if trigger_rechunk and self.rechunk_callback: try: logger.info(f"Triggering re-chunking callback after removal of submission #{submission_number}") await self.rechunk_callback() diff --git a/backend/aggregator/prompts/submitter_prompts.py b/backend/aggregator/prompts/submitter_prompts.py index 6a9f5bd..78ffd08 100644 --- a/backend/aggregator/prompts/submitter_prompts.py +++ b/backend/aggregator/prompts/submitter_prompts.py @@ -40,6 +40,8 @@ def get_submitter_system_prompt() -> str: YOUR TASK: Generate a novel mathematical insight that advances the user's goal. +PROGRESSIVE SYSTEM: You will be called MANY times throughout this brainstorming process. Each call should produce ONE deep, well-developed mathematical insight. Do not try to cover everything at once — focus on thoroughly developing a single avenue per submission with full rigor. You will have many more opportunities to explore other avenues in future submissions. + Focus on mathematical concepts, theorems, techniques, and proofs that may provide an avenue towards solving or understanding the mathematical problem in the prompt. Use all available resources including web search if available. WHAT MAKES A VALUABLE SUBMISSION - Consider: diff --git a/backend/api/main.py b/backend/api/main.py index 50469a5..4639098 100644 --- a/backend/api/main.py +++ b/backend/api/main.py @@ -77,6 +77,10 @@ async def lifespan(app: FastAPI): from backend.shared.boost_manager import boost_manager boost_manager.set_broadcast_callback(websocket.broadcast_event) + # Set API client manager broadcaster (token tracking, rate limits, fallbacks) + from backend.shared.api_client_manager import api_client_manager + api_client_manager.set_broadcast_callback(websocket.broadcast_event) + logger.info("ASI Aggregator System ready") yield @@ -94,7 +98,7 @@ async def lifespan(app: FastAPI): app = FastAPI( title="ASI Aggregator System", description="AI-powered aggregator with RAG and multi-agent validation", - version="1.0.4", + version="1.0.5", lifespan=lifespan ) @@ -117,7 +121,7 @@ async def root(): """Root endpoint.""" return { "name": "ASI Aggregator System", - "version": "1.0.4", + "version": "1.0.5", "status": "running" } diff --git a/backend/api/routes/aggregator.py b/backend/api/routes/aggregator.py index 9aae65e..50715b9 100644 --- a/backend/api/routes/aggregator.py +++ b/backend/api/routes/aggregator.py @@ -2,7 +2,7 @@ Aggregator API routes. """ from fastapi import APIRouter, HTTPException, UploadFile, File -from typing import List +from typing import List, Optional import logging from pathlib import Path import aiofiles @@ -10,19 +10,41 @@ from backend.shared.models import AggregatorStartRequest, SystemStatus, ModelInfo from backend.shared.lm_studio_client import lm_studio_client from backend.shared.config import system_config, rag_config +from backend.shared.token_tracker import token_tracker from backend.aggregator.core.coordinator import coordinator from backend.aggregator.core.context_allocator import context_allocator from backend.aggregator.memory.event_log import event_log +from backend.compiler.core.compiler_coordinator import compiler_coordinator +from backend.autonomous.core.autonomous_coordinator import autonomous_coordinator logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/aggregator", tags=["aggregator"]) +def _get_start_conflict() -> Optional[str]: + """Return a user-facing conflict message if another workflow is active.""" + if coordinator.is_running: + return "Aggregator is already running" + + if compiler_coordinator.is_running: + return "Cannot start Aggregator while Compiler is running. Stop Compiler first." + + autonomous_state = autonomous_coordinator.get_state() + if autonomous_state.is_running: + return "Cannot start Aggregator while Autonomous Research is running. Stop Autonomous Research first." + + return None + + @router.post("/start") async def start_aggregator(request: AggregatorStartRequest): """Start the aggregator system.""" try: + conflict = _get_start_conflict() + if conflict: + raise HTTPException(status_code=400, detail=conflict) + # Validate submitter configs num_submitters = len(request.submitter_configs) if not (system_config.min_submitters <= num_submitters <= system_config.max_submitters): @@ -50,10 +72,10 @@ async def start_aggregator(request: AggregatorStartRequest): # Log submitter configurations for config in request.submitter_configs: label = "(Main Submitter)" if config.submitter_id == 1 else "" - logger.info( + logger.info( f"Submitter {config.submitter_id} {label}: model={config.model_id}, " f"context={config.context_window}, max_tokens={config.max_output_tokens}" - ) + ) logger.info( f"Validator: model={request.validator_model}, " f"context={request.validator_context_size}, max_tokens={request.validator_max_output_tokens}" @@ -74,6 +96,8 @@ async def start_aggregator(request: AggregatorStartRequest): ) # Start coordinator + token_tracker.reset() + token_tracker.start_timer() await coordinator.start() return { @@ -98,6 +122,7 @@ async def stop_aggregator(): """Stop the aggregator system.""" try: await coordinator.stop() + token_tracker.stop_timer() return {"status": "stopped", "message": "Aggregator system stopped"} except Exception as e: logger.error(f"Failed to stop aggregator: {e}") diff --git a/backend/api/routes/autonomous.py b/backend/api/routes/autonomous.py index 9f2a925..e101281 100644 --- a/backend/api/routes/autonomous.py +++ b/backend/api/routes/autonomous.py @@ -4,23 +4,380 @@ """ import asyncio import logging -from typing import Optional +import os +from pathlib import Path +from typing import Optional, Any, Dict from fastapi import APIRouter, HTTPException, BackgroundTasks from backend.shared.models import AutonomousResearchStartRequest, CritiqueRequest from backend.autonomous.core.autonomous_coordinator import autonomous_coordinator -from backend.autonomous.memory.research_metadata import research_metadata -from backend.autonomous.memory.brainstorm_memory import brainstorm_memory -from backend.autonomous.memory.paper_library import paper_library +from backend.autonomous.memory.research_metadata import research_metadata, ResearchMetadata +from backend.autonomous.memory.brainstorm_memory import brainstorm_memory, BrainstormMemory +from backend.autonomous.memory.paper_library import paper_library, PaperLibrary from backend.autonomous.memory.final_answer_memory import final_answer_memory from backend.autonomous.memory.session_manager import session_manager from backend.autonomous.memory.autonomous_api_logger import autonomous_api_logger +from backend.aggregator.core.coordinator import coordinator +from backend.compiler.core.compiler_coordinator import compiler_coordinator logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/auto-research", tags=["autonomous"]) +def _get_active_autonomous_session_id() -> str: + """Return the active autonomous session identifier, falling back to legacy mode.""" + return session_manager.session_id if session_manager.is_session_active else "legacy" + + +def _validate_history_session_id(session_id: str) -> None: + """Reject malformed history session identifiers before building any filesystem paths.""" + if not session_id: + raise HTTPException(status_code=400, detail="Session ID is required") + + if session_id == "legacy": + return + + if session_id in {".", ".."} or "/" in session_id or "\\" in session_id: + raise HTTPException(status_code=400, detail=f"Invalid session ID: {session_id}") + + +def _get_start_conflict() -> Optional[str]: + """Return a user-facing conflict message if another workflow is active.""" + autonomous_state = autonomous_coordinator.get_state() + if autonomous_state.is_running: + return "Autonomous research is already running" + + if coordinator.is_running: + return "Cannot start Autonomous Research while Aggregator is running. Stop Aggregator first." + + if compiler_coordinator.is_running: + return "Cannot start Autonomous Research while Compiler is running. Stop Compiler first." + + return None + + +def _resolve_history_session_paths(session_id: str) -> Dict[str, Path]: + """Resolve all session-specific paths needed for Stage 2 paper history operations.""" + from backend.shared.config import system_config + + _validate_history_session_id(session_id) + + if session_id == "legacy": + paths = { + "papers_dir": Path(system_config.auto_papers_dir), + "brainstorms_dir": Path(system_config.auto_brainstorms_dir), + "metadata_path": Path(system_config.auto_research_metadata_file), + "stats_path": Path(system_config.auto_research_stats_file), + "workflow_state_path": Path(system_config.auto_workflow_state_file), + } + else: + sessions_root = Path(system_config.auto_sessions_base_dir).resolve() + session_root = (sessions_root / session_id).resolve() + + if session_root.parent != sessions_root: + raise HTTPException(status_code=400, detail=f"Invalid session ID: {session_id}") + + if not session_root.exists(): + raise HTTPException(status_code=404, detail=f"Session not found: {session_id}") + + paths = { + "papers_dir": session_root / "papers", + "brainstorms_dir": session_root / "brainstorms", + "metadata_path": session_root / "session_metadata.json", + "stats_path": session_root / "session_stats.json", + "workflow_state_path": session_root / "workflow_state.json", + } + + if not paths["papers_dir"].exists(): + raise HTTPException( + status_code=404, + detail=f"No Stage 2 papers directory found for session: {session_id}" + ) + + return paths + + +def _build_scoped_paper_library(paths: Dict[str, Path]) -> PaperLibrary: + """Create a temporary PaperLibrary rooted at one legacy/session papers directory.""" + scoped_library = PaperLibrary() + scoped_library._base_dir = paths["papers_dir"] + scoped_library._archive_dir = paths["papers_dir"] / "archive" + return scoped_library + + +def _build_scoped_brainstorm_memory(paths: Dict[str, Path]) -> BrainstormMemory: + """Create a temporary BrainstormMemory rooted at one legacy/session brainstorms directory.""" + scoped_memory = BrainstormMemory() + scoped_memory._base_dir = paths["brainstorms_dir"] + return scoped_memory + + +async def _ensure_history_paper_is_visible( + scoped_paper_library: PaperLibrary, + *, + session_id: str, + paper_id: str, +) -> Any: + """Ensure a history paper matches the completed/non-archived contract of the history UI.""" + metadata = await scoped_paper_library.get_metadata(paper_id) + if not metadata or metadata.status != "complete": + raise HTTPException( + status_code=404, + detail=f"Paper not found in history: session={session_id}, paper={paper_id}" + ) + + if not await scoped_paper_library.is_paper_complete(paper_id): + raise HTTPException( + status_code=404, + detail=f"Paper is not available in history: session={session_id}, paper={paper_id}" + ) + + return metadata + + +async def _build_scoped_research_metadata(paths: Dict[str, Path]) -> ResearchMetadata: + """Create a temporary ResearchMetadata instance rooted at one legacy/session metadata set.""" + scoped_metadata = ResearchMetadata() + scoped_metadata._metadata_path = paths["metadata_path"] + scoped_metadata._stats_path = paths["stats_path"] + scoped_metadata._workflow_state_path = paths["workflow_state_path"] + await scoped_metadata.initialize() + return scoped_metadata + + +def _resolve_validator_config(request: Optional[CritiqueRequest]) -> Dict[str, Any]: + """Resolve critique validator settings from the request or the active coordinator.""" + validator_model = None + validator_context_window = None + validator_max_tokens = None + validator_provider = None + validator_openrouter_provider = None + custom_prompt = None + + if request: + custom_prompt = request.custom_prompt + if request.validator_model: + validator_model = request.validator_model + validator_context_window = request.validator_context_window or 131072 + validator_max_tokens = request.validator_max_tokens or 25000 + validator_provider = request.validator_provider or "lm_studio" + validator_openrouter_provider = request.validator_openrouter_provider + + if not validator_model: + coordinator_config = autonomous_coordinator.get_validator_config() + if coordinator_config: + validator_model = coordinator_config["validator_model"] + validator_context_window = coordinator_config["validator_context_window"] + validator_max_tokens = coordinator_config["validator_max_tokens"] + validator_provider = coordinator_config["validator_provider"] + validator_openrouter_provider = coordinator_config.get("validator_openrouter_provider") + + if not validator_model: + raise HTTPException( + status_code=400, + detail="No validator model configured. Please configure a validator model in Autonomous Research Settings." + ) + + return { + "custom_prompt": custom_prompt, + "validator_model": validator_model, + "validator_context_window": validator_context_window, + "validator_max_tokens": validator_max_tokens, + "validator_provider": validator_provider, + "validator_openrouter_provider": validator_openrouter_provider, + } + + +async def _generate_autonomous_paper_critique( + *, + paper_id: str, + paper_title: str, + content: str, + base_path: str, + request: Optional[CritiqueRequest] = None, +) -> Dict[str, Any]: + """Generate and persist a critique for an autonomous Stage 2 paper.""" + from backend.shared.critique_memory import save_critique + from backend.shared.critique_prompts import ( + DEFAULT_CRITIQUE_PROMPT, + build_critique_prompt, + parse_critique_response, + ) + from backend.shared.api_client_manager import api_client_manager + from backend.shared.models import ModelConfig, PaperCritique + from backend.shared.utils import count_tokens + from datetime import datetime + import uuid + + config = _resolve_validator_config(request) + prompt_to_use = config["custom_prompt"] or DEFAULT_CRITIQUE_PROMPT + full_prompt = build_critique_prompt(content, paper_title, prompt_to_use) + prompt_tokens = count_tokens(full_prompt) + + output_reserve = config["validator_max_tokens"] + safety_margin = int(config["validator_context_window"] * 0.1) + available_input = config["validator_context_window"] - output_reserve - safety_margin + + if prompt_tokens > available_input: + excess_tokens = prompt_tokens - available_input + raise HTTPException( + status_code=400, + detail=( + f"Paper is too long for the validator's context window. " + f"The paper requires {prompt_tokens:,} tokens, but the validator can only accept {available_input:,} tokens " + f"(context window: {config['validator_context_window']:,}, output reserve: {output_reserve:,}, safety margin: {safety_margin:,}). " + f"The paper exceeds the limit by {excess_tokens:,} tokens. " + f"A complete and honest review requires direct context injection - please select a validator with a larger context window." + ) + ) + + api_client_manager.configure_role( + "paper_critic", + ModelConfig( + provider=config["validator_provider"], + model_id=config["validator_model"], + openrouter_model_id=config["validator_model"] if config["validator_provider"] == "openrouter" else None, + openrouter_provider=config["validator_openrouter_provider"], + lm_studio_fallback_id=None, + context_window=config["validator_context_window"], + max_output_tokens=config["validator_max_tokens"], + ) + ) + + logger.info(f"Requesting critique for paper {paper_id} from validator model {config['validator_model']}") + + response = await api_client_manager.generate_completion( + task_id=f"paper_critique_{paper_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}", + role_id="paper_critic", + model=config["validator_model"], + messages=[{"role": "user", "content": full_prompt}], + max_tokens=config["validator_max_tokens"], + temperature=0.0, + ) + + response_content = "" + if response.get("choices"): + message = response["choices"][0].get("message", {}) + response_content = message.get("content") or message.get("reasoning") or "" + + if not response_content: + raise HTTPException(status_code=500, detail="Empty response from validator model") + + critique_data = parse_critique_response(response_content) + critique = PaperCritique( + critique_id=str(uuid.uuid4()), + model_id=config["validator_model"], + provider=config["validator_provider"], + host_provider=config["validator_openrouter_provider"], + date=datetime.now(), + prompt_used=prompt_to_use, + novelty_rating=critique_data.get("novelty_rating", 0), + novelty_feedback=critique_data.get("novelty_feedback", ""), + correctness_rating=critique_data.get("correctness_rating", 0), + correctness_feedback=critique_data.get("correctness_feedback", ""), + impact_rating=critique_data.get("impact_rating", 0), + impact_feedback=critique_data.get("impact_feedback", ""), + full_critique=critique_data.get("full_critique", ""), + ) + + saved_critique = await save_critique("autonomous_paper", critique, paper_id, base_path) + return { + "success": True, + "critique": saved_critique.model_dump(), + "paper_id": paper_id, + "paper_title": paper_title, + } + + +async def _get_autonomous_paper_critiques_response( + *, + paper_id: str, + paper_title: str, + base_path: str, +) -> Dict[str, Any]: + """Load critique history for an autonomous Stage 2 paper.""" + from backend.shared.critique_memory import get_critiques + + critiques = await get_critiques("autonomous_paper", paper_id, base_path) + return { + "success": True, + "paper_id": paper_id, + "paper_title": paper_title, + "critiques": [critique.model_dump() for critique in critiques], + "count": len(critiques), + } + + +async def _delete_autonomous_paper_from_scope( + *, + session_id: str, + scoped_paper_library: PaperLibrary, + scoped_brainstorm_memory: BrainstormMemory, + scoped_research_metadata: ResearchMetadata, + paper_id: str, +) -> Dict[str, Any]: + """Delete a Stage 2 paper and clean its related metadata/critique state.""" + from backend.shared.critique_memory import clear_critiques + + state = autonomous_coordinator.get_state() + active_session_id = _get_active_autonomous_session_id() + if ( + state.is_running + and state.current_tier == "tier2_paper_writing" + and autonomous_coordinator._current_paper_id == paper_id + and active_session_id == session_id + ): + raise HTTPException( + status_code=400, + detail="Cannot delete active paper while it's being compiled. Stop autonomous research first." + ) + + metadata = await scoped_paper_library.get_metadata(paper_id) + if not metadata: + raise HTTPException(status_code=404, detail=f"Paper not found: {paper_id}") + + paper_path = scoped_paper_library.get_paper_path(paper_id) + base_path = os.path.dirname(paper_path) + source_brainstorms = metadata.source_brainstorm_ids or [] + + success = await scoped_paper_library.delete_paper(paper_id) + if not success: + raise HTTPException( + status_code=500, + detail=f"Failed to delete paper files for {paper_id}" + ) + + await scoped_research_metadata.delete_paper(paper_id) + + for topic_id in source_brainstorms: + try: + await scoped_brainstorm_memory.remove_paper_reference(topic_id, paper_id) + except Exception as e: + logger.warning( + f"Failed to remove paper {paper_id} from brainstorm metadata {topic_id}: {e}" + ) + + try: + await clear_critiques("autonomous_paper", paper_id, base_path) + logger.info(f"Cleared critiques for deleted paper {paper_id}") + except Exception as e: + logger.warning(f"Failed to clear critiques for paper {paper_id}: {e}") + + logger.info( + f"Deleted paper {paper_id} from session {session_id} " + f"(from brainstorms: {', '.join(source_brainstorms)})" + ) + + return { + "success": True, + "message": f"Paper {paper_id} deleted successfully", + "paper_id": paper_id, + "session_id": session_id, + "source_brainstorms": source_brainstorms, + } + + @router.post("/start") async def start_autonomous_research( request: AutonomousResearchStartRequest, @@ -29,14 +386,10 @@ async def start_autonomous_research( """Start autonomous research mode.""" try: from backend.shared.config import system_config - - # Check if already running - state = autonomous_coordinator.get_state() - if state.is_running: - raise HTTPException( - status_code=400, - detail="Autonomous research is already running" - ) + + conflict = _get_start_conflict() + if conflict: + raise HTTPException(status_code=400, detail=conflict) # Validate submitter configs num_submitters = len(request.submitter_configs) @@ -420,6 +773,43 @@ async def get_paper(paper_id: str): raise HTTPException(status_code=500, detail=str(e)) +@router.get("/paper-history") +async def get_paper_history(): + """Get all completed, non-archived Stage 2 papers from legacy and session history.""" + try: + papers = await paper_library.list_history_papers() + return { + "success": True, + "papers": papers, + "total_count": len(papers) + } + except Exception as e: + logger.error(f"Failed to get Stage 2 paper history: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/paper-history/{session_id}/{paper_id}") +async def get_history_paper(session_id: str, paper_id: str): + """Get one completed, non-archived Stage 2 paper from legacy/session history.""" + try: + paper = await paper_library.get_history_paper(session_id, paper_id) + if not paper: + raise HTTPException( + status_code=404, + detail=f"Paper not found in history: session={session_id}, paper={paper_id}" + ) + + return { + "success": True, + **paper + } + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to get history paper {session_id}/{paper_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + @router.get("/current-paper-progress") async def get_current_paper_progress(): """Get current paper being compiled (if any). @@ -764,9 +1154,9 @@ async def force_tier3(mode: str = "complete_current"): # Get current paper info from compiler if available try: from backend.compiler.core.compiler_coordinator import compiler_coordinator - compiler_state = compiler_coordinator.get_state() - context_info["compiler_mode"] = compiler_state.get("current_mode", "unknown") - except: + compiler_state = await compiler_coordinator.get_status() + context_info["compiler_mode"] = compiler_state.current_mode or "unknown" + except Exception: pass # Get count of completed papers @@ -951,72 +1341,58 @@ async def delete_paper(paper_id: str, confirm: bool = False): Query params: confirm: Must be True to execute deletion (safety check) """ - import os - try: if not confirm: raise HTTPException( status_code=400, detail="Must confirm deletion with confirm=true" ) - - # Check if running - state = autonomous_coordinator.get_state() - if state.is_running and state.current_tier == "tier2_paper_writing": - # Check if this is the active paper - if autonomous_coordinator._current_paper_id == paper_id: - raise HTTPException( - status_code=400, - detail="Cannot delete active paper while it's being compiled. Stop autonomous research first." - ) - - # Get paper metadata - metadata = await paper_library.get_metadata(paper_id) - if not metadata: - raise HTTPException( - status_code=404, - detail=f"Paper not found: {paper_id}" - ) - - # Get session-aware base path for critique storage BEFORE deleting paper - paper_path = paper_library.get_paper_path(paper_id) - base_path = os.path.dirname(paper_path) - - # Get source brainstorms - source_brainstorms = metadata.source_brainstorm_ids or [] - - # Delete paper files - success = await paper_library.delete_paper(paper_id) - if not success: + + return await _delete_autonomous_paper_from_scope( + session_id=_get_active_autonomous_session_id(), + scoped_paper_library=paper_library, + scoped_brainstorm_memory=brainstorm_memory, + scoped_research_metadata=research_metadata, + paper_id=paper_id, + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to delete paper {paper_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.delete("/paper-history/{session_id}/{paper_id}") +async def delete_history_paper(session_id: str, paper_id: str, confirm: bool = False): + """Delete a completed Stage 2 history paper from a specific legacy/session scope.""" + try: + if not confirm: raise HTTPException( - status_code=500, - detail=f"Failed to delete paper files for {paper_id}" + status_code=400, + detail="Must confirm deletion with confirm=true" ) - - # Remove from central metadata - await research_metadata.delete_paper(paper_id) - - # Clear associated critiques using session-aware path - from backend.shared.critique_memory import clear_critiques - try: - await clear_critiques("autonomous_paper", paper_id, base_path) - logger.info(f"Cleared critiques for deleted paper {paper_id}") - except Exception as e: - logger.warning(f"Failed to clear critiques for paper {paper_id}: {e}") - - logger.info(f"Deleted paper {paper_id} (from brainstorms: {', '.join(source_brainstorms)})") - - return { - "success": True, - "message": f"Paper {paper_id} deleted successfully", - "paper_id": paper_id, - "source_brainstorms": source_brainstorms - } - + + paths = _resolve_history_session_paths(session_id) + scoped_paper_library = _build_scoped_paper_library(paths) + scoped_brainstorm_memory = _build_scoped_brainstorm_memory(paths) + scoped_research_metadata = await _build_scoped_research_metadata(paths) + await _ensure_history_paper_is_visible( + scoped_paper_library, + session_id=session_id, + paper_id=paper_id, + ) + + return await _delete_autonomous_paper_from_scope( + session_id=session_id, + scoped_paper_library=scoped_paper_library, + scoped_brainstorm_memory=scoped_brainstorm_memory, + scoped_research_metadata=scoped_research_metadata, + paper_id=paper_id, + ) except HTTPException: raise except Exception as e: - logger.error(f"Failed to delete paper {paper_id}: {e}") + logger.error(f"Failed to delete history paper {session_id}/{paper_id}: {e}") raise HTTPException(status_code=500, detail=str(e)) @@ -1377,6 +1753,7 @@ async def get_final_answer_archived_papers(answer_id: str): List of paper metadata """ from backend.autonomous.memory.final_answer_memory import FinalAnswerMemory + from backend.shared.config import system_config from pathlib import Path try: @@ -1407,6 +1784,7 @@ async def get_final_answer_archived_paper(answer_id: str, paper_id: str): Paper content, abstract, outline, metadata """ from backend.autonomous.memory.final_answer_memory import FinalAnswerMemory + from backend.shared.config import system_config from pathlib import Path try: @@ -1441,6 +1819,7 @@ async def get_final_answer_archived_brainstorms(answer_id: str): List of brainstorm metadata """ from backend.autonomous.memory.final_answer_memory import FinalAnswerMemory + from backend.shared.config import system_config from pathlib import Path try: @@ -1471,6 +1850,7 @@ async def get_final_answer_archived_brainstorm(answer_id: str, topic_id: str): Brainstorm content and metadata """ from backend.autonomous.memory.final_answer_memory import FinalAnswerMemory + from backend.shared.config import system_config from pathlib import Path try: @@ -1516,180 +1896,25 @@ async def request_paper_critique(paper_id: str, request: CritiqueRequest = None) Returns: The critique with ratings and feedback """ - from backend.shared.config import system_config - from backend.shared.critique_prompts import build_critique_prompt, DEFAULT_CRITIQUE_PROMPT - from backend.shared.critique_memory import save_critique, MAX_CRITIQUES_PER_PAPER - from backend.shared.models import PaperCritique, CritiqueRequest - from backend.shared.api_client_manager import api_client_manager - from backend.shared.json_parser import parse_json - from backend.shared.utils import count_tokens - import os - import uuid - from datetime import datetime - try: - # Get paper content metadata = await paper_library.get_metadata(paper_id) if not metadata: raise HTTPException(status_code=404, detail=f"Paper not found: {paper_id}") - + content = await paper_library.get_paper_content(paper_id) if not content: raise HTTPException(status_code=404, detail=f"Paper content not found: {paper_id}") - - # Get session-aware base path for critique storage - # Critiques are stored alongside papers in the same directory + paper_path = paper_library.get_paper_path(paper_id) base_path = os.path.dirname(paper_path) - - # Try to get validator config from request body first (allows critiques without starting research) - # Then fall back to autonomous coordinator's stored config - validator_model = None - validator_context_window = None - validator_max_tokens = None - validator_provider = None - validator_openrouter_provider = None - custom_prompt = None - - if request: - custom_prompt = request.custom_prompt - # Check if request provides validator config - if request.validator_model: - validator_model = request.validator_model - validator_context_window = request.validator_context_window or 131072 - validator_max_tokens = request.validator_max_tokens or 25000 - validator_provider = request.validator_provider or "lm_studio" - validator_openrouter_provider = request.validator_openrouter_provider - - # If no validator config from request, try coordinator - if not validator_model: - coordinator_config = autonomous_coordinator.get_validator_config() - if coordinator_config: - validator_model = coordinator_config["validator_model"] - validator_context_window = coordinator_config["validator_context_window"] - validator_max_tokens = coordinator_config["validator_max_tokens"] - validator_provider = coordinator_config["validator_provider"] - validator_openrouter_provider = coordinator_config.get("validator_openrouter_provider") - - # If still no config, error - if not validator_model: - raise HTTPException( - status_code=400, - detail="No validator model configured. Please configure a validator model in Autonomous Research Settings." - ) - - # Build the critique prompt - prompt_to_use = custom_prompt if custom_prompt else DEFAULT_CRITIQUE_PROMPT - full_prompt = build_critique_prompt(content, metadata.title, prompt_to_use) - - # Count tokens in the prompt - prompt_tokens = count_tokens(full_prompt) - - # Calculate available input tokens (context window - output reserve - safety margin) - output_reserve = validator_max_tokens - safety_margin = int(validator_context_window * 0.1) # 10% safety margin - available_input = validator_context_window - output_reserve - safety_margin - - # Check if paper fits in context window - if prompt_tokens > available_input: - excess_tokens = prompt_tokens - available_input - raise HTTPException( - status_code=400, - detail=( - f"Paper is too long for the validator's context window. " - f"The paper requires {prompt_tokens:,} tokens, but the validator can only accept {available_input:,} tokens " - f"(context window: {validator_context_window:,}, output reserve: {output_reserve:,}, safety margin: {safety_margin:,}). " - f"The paper exceeds the limit by {excess_tokens:,} tokens. " - f"A complete and honest review requires direct context injection - please select a validator with a larger context window." - ) - ) - - # Build messages for API call - messages = [ - {"role": "user", "content": full_prompt} - ] - - # Configure the paper_critic role with the validator settings BEFORE making the API call - # This ensures routing goes to the correct provider (OpenRouter vs LM Studio) - from backend.shared.models import ModelConfig - - api_client_manager.configure_role( - "paper_critic", - ModelConfig( - provider=validator_provider, - model_id=validator_model, - openrouter_model_id=validator_model if validator_provider == "openrouter" else None, - openrouter_provider=validator_openrouter_provider, - lm_studio_fallback_id=None, # No fallback for direct critique calls - context_window=validator_context_window, - max_output_tokens=validator_max_tokens - ) - ) - - # Make the API call to the validator model - logger.info(f"Requesting critique for paper {paper_id} from validator model {validator_model}") - - response = await api_client_manager.generate_completion( - task_id=f"paper_critique_{paper_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}", - role_id="paper_critic", - model=validator_model, - messages=messages, - max_tokens=validator_max_tokens, - temperature=0.0 - ) - - # Parse the response - extract from OpenAI-compatible response structure - response_content = "" - if response.get("choices"): - message = response["choices"][0].get("message", {}) - response_content = message.get("content") or message.get("reasoning") or "" - - if not response_content: - raise HTTPException(status_code=500, detail="Empty response from validator model") - - # Try to parse as JSON - try: - critique_data = parse_json(response_content) - except Exception as e: - # If JSON parsing fails, create a structured response from raw text - logger.warning(f"Failed to parse critique JSON, using raw response: {e}") - critique_data = { - "novelty_rating": 0, - "novelty_feedback": "Unable to parse structured response", - "correctness_rating": 0, - "correctness_feedback": "Unable to parse structured response", - "impact_rating": 0, - "impact_feedback": "Unable to parse structured response", - "full_critique": response_content - } - - # Create critique object with correct field names - critique = PaperCritique( - critique_id=str(uuid.uuid4()), - model_id=validator_model, - provider=validator_provider, - host_provider=validator_openrouter_provider, - date=datetime.now(), - prompt_used=prompt_to_use, - novelty_rating=critique_data.get("novelty_rating", 0), - novelty_feedback=critique_data.get("novelty_feedback", ""), - correctness_rating=critique_data.get("correctness_rating", 0), - correctness_feedback=critique_data.get("correctness_feedback", ""), - impact_rating=critique_data.get("impact_rating", 0), - impact_feedback=critique_data.get("impact_feedback", ""), - full_critique=critique_data.get("full_critique", "") + + return await _generate_autonomous_paper_critique( + paper_id=paper_id, + paper_title=metadata.title, + content=content, + base_path=base_path, + request=request, ) - - # Save the critique with session-aware path - saved_critique = await save_critique("autonomous_paper", critique, paper_id, base_path) - - return { - "success": True, - "critique": saved_critique.model_dump(), - "paper_id": paper_id, - "paper_title": metadata.title - } - except HTTPException: raise except Exception as e: @@ -1708,29 +1933,19 @@ async def get_paper_critiques(paper_id: str): Returns: List of critiques for the paper """ - from backend.shared.critique_memory import get_critiques - import os - try: - # Verify paper exists metadata = await paper_library.get_metadata(paper_id) if not metadata: raise HTTPException(status_code=404, detail=f"Paper not found: {paper_id}") - - # Get session-aware base path for critique storage + paper_path = paper_library.get_paper_path(paper_id) base_path = os.path.dirname(paper_path) - - critiques = await get_critiques("autonomous_paper", paper_id, base_path) - - return { - "success": True, - "paper_id": paper_id, - "paper_title": metadata.title, - "critiques": [c.model_dump() for c in critiques], - "count": len(critiques) - } - + + return await _get_autonomous_paper_critiques_response( + paper_id=paper_id, + paper_title=metadata.title, + base_path=base_path, + ) except HTTPException: raise except Exception as e: @@ -1784,6 +1999,72 @@ async def delete_paper_critiques(paper_id: str, confirm: bool = False): raise HTTPException(status_code=500, detail=str(e)) +# ============================================================================ +# STAGE 2 PAPER HISTORY CRITIQUE ENDPOINTS +# ============================================================================ + + +@router.post("/paper-history/{session_id}/{paper_id}/critique") +async def request_history_paper_critique( + session_id: str, + paper_id: str, + request: CritiqueRequest = None, +): + """Request a validator critique for a Stage 2 history paper from a specific session.""" + try: + paths = _resolve_history_session_paths(session_id) + scoped_paper_library = _build_scoped_paper_library(paths) + metadata = await _ensure_history_paper_is_visible( + scoped_paper_library, + session_id=session_id, + paper_id=paper_id, + ) + + content = await scoped_paper_library.get_paper_content(paper_id) + if not content: + raise HTTPException( + status_code=404, + detail=f"Paper content not found: session={session_id}, paper={paper_id}" + ) + + return await _generate_autonomous_paper_critique( + paper_id=paper_id, + paper_title=metadata.title, + content=content, + base_path=str(paths["papers_dir"]), + request=request, + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to request history critique for {session_id}/{paper_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/paper-history/{session_id}/{paper_id}/critiques") +async def get_history_paper_critiques(session_id: str, paper_id: str): + """Get all validator critiques for a Stage 2 history paper from a specific session.""" + try: + paths = _resolve_history_session_paths(session_id) + scoped_paper_library = _build_scoped_paper_library(paths) + metadata = await _ensure_history_paper_is_visible( + scoped_paper_library, + session_id=session_id, + paper_id=paper_id, + ) + + return await _get_autonomous_paper_critiques_response( + paper_id=paper_id, + paper_title=metadata.title, + base_path=str(paths["papers_dir"]), + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to get history critiques for {session_id}/{paper_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + # ============================================================================ # FINAL ANSWER CRITIQUE ENDPOINTS # ============================================================================ @@ -1812,7 +2093,6 @@ async def request_final_answer_critique(answer_id: str, request: CritiqueRequest from backend.shared.critique_memory import save_critique from backend.shared.models import PaperCritique, CritiqueRequest from backend.shared.api_client_manager import api_client_manager - from backend.shared.json_parser import parse_json from backend.shared.utils import count_tokens from pathlib import Path import uuid @@ -1944,20 +2224,9 @@ async def request_final_answer_critique(answer_id: str, request: CritiqueRequest if not response_content: raise HTTPException(status_code=500, detail="Empty response from validator model") - # Try to parse as JSON - try: - critique_data = parse_json(response_content) - except Exception as e: - logger.warning(f"Failed to parse critique JSON, using raw response: {e}") - critique_data = { - "novelty_rating": 0, - "novelty_feedback": "Unable to parse structured response", - "correctness_rating": 0, - "correctness_feedback": "Unable to parse structured response", - "impact_rating": 0, - "impact_feedback": "Unable to parse structured response", - "full_critique": response_content - } + # Parse with lenient fallback for truncated critique responses + from backend.shared.critique_prompts import parse_critique_response + critique_data = parse_critique_response(response_content) # Create critique object with correct field names critique = PaperCritique( diff --git a/backend/api/routes/boost.py b/backend/api/routes/boost.py index f82392d..ff0e9b7 100644 --- a/backend/api/routes/boost.py +++ b/backend/api/routes/boost.py @@ -13,6 +13,7 @@ from typing import Dict, Any, Optional import logging +from backend.shared.config import rag_config from backend.shared.models import BoostConfig from backend.shared.boost_manager import boost_manager from backend.shared.boost_logger import boost_logger @@ -27,33 +28,49 @@ class BoostNextCountRequest(BaseModel): count: int +def _resolve_boost_api_key(api_key: Optional[str]) -> str: + """Use the explicit boost key when provided, otherwise fall back to the active global key.""" + explicit_key = (api_key or "").strip() + if explicit_key: + return explicit_key + + global_key = (rag_config.openrouter_api_key or "").strip() + if global_key: + return global_key + + raise HTTPException( + status_code=400, + detail="No OpenRouter API key available. Use the active global key or provide one in the boost modal." + ) + + @router.post("/api/boost/enable") async def enable_boost(config: BoostConfig) -> Dict[str, Any]: """ Enable API boost with OpenRouter. Args: - config: Boost configuration with API key and model + config: Boost configuration with optional explicit API key and model Returns: Status and boost configuration """ try: - # Validate API key by testing connection - if not config.openrouter_api_key: - raise HTTPException(status_code=400, detail="OpenRouter API key is required") - - # Test connection - client = OpenRouterClient(config.openrouter_api_key) - models = await client.list_models() - - if not models: - raise HTTPException( - status_code=400, - detail="Failed to connect to OpenRouter. Please check your API key." - ) - - await client.close() + effective_api_key = _resolve_boost_api_key(config.openrouter_api_key) + + client = OpenRouterClient(effective_api_key) + try: + models = await client.list_models() + + if not models: + raise HTTPException( + status_code=400, + detail="Failed to connect to OpenRouter. Please check your API key." + ) + finally: + await client.close() + + config.openrouter_api_key = effective_api_key # Enable boost await boost_manager.set_boost_config(config) @@ -89,7 +106,7 @@ async def update_boost_model(config: BoostConfig) -> Dict[str, Any]: - boosted_task_ids Args: - config: New boost configuration with API key and model + config: New boost configuration with optional explicit API key and model Returns: Status and updated configuration @@ -102,21 +119,21 @@ async def update_boost_model(config: BoostConfig) -> Dict[str, Any]: detail="Boost must be enabled first. Use /api/boost/enable to enable boost." ) - # Validate API key by testing connection - if not config.openrouter_api_key: - raise HTTPException(status_code=400, detail="OpenRouter API key is required") - - # Test connection with new model - client = OpenRouterClient(config.openrouter_api_key) - models = await client.list_models() + effective_api_key = _resolve_boost_api_key(config.openrouter_api_key) - if not models: - raise HTTPException( - status_code=400, - detail="Failed to connect to OpenRouter. Please check your API key." - ) - - await client.close() + client = OpenRouterClient(effective_api_key) + try: + models = await client.list_models() + + if not models: + raise HTTPException( + status_code=400, + detail="Failed to connect to OpenRouter. Please check your API key." + ) + finally: + await client.close() + + config.openrouter_api_key = effective_api_key # Store current boost state before update old_boost_next_count = boost_manager.boost_next_count @@ -227,26 +244,26 @@ async def get_openrouter_models(authorization: Optional[str] = Header(None)) -> Fetch available OpenRouter models. Args: - authorization: OpenRouter API key via Authorization header (Bearer token) + authorization: Optional OpenRouter API key via Authorization header (Bearer token) Returns: List of available models """ try: - # Extract API key from Authorization header api_key = authorization.replace("Bearer ", "") if authorization and authorization.startswith("Bearer ") else authorization - - if not api_key: - raise HTTPException(status_code=400, detail="API key is required in Authorization header") - - client = OpenRouterClient(api_key) - models = await client.list_models() - await client.close() + + client = OpenRouterClient(_resolve_boost_api_key(api_key)) + try: + models = await client.list_models() + finally: + await client.close() return { "success": True, "models": models } + except HTTPException: + raise except Exception as e: logger.error(f"Failed to fetch OpenRouter models: {e}") raise HTTPException(status_code=500, detail=f"Failed to fetch models: {str(e)}") @@ -259,23 +276,22 @@ async def get_model_providers(model_id: str, authorization: Optional[str] = Head Args: model_id: The model ID to get providers for (query parameter) - authorization: OpenRouter API key via Authorization header (Bearer token) + authorization: Optional OpenRouter API key via Authorization header (Bearer token) Returns: List of available providers for the model """ try: - # Extract API key from Authorization header api_key = authorization.replace("Bearer ", "") if authorization and authorization.startswith("Bearer ") else authorization - if not api_key: - raise HTTPException(status_code=400, detail="API key is required in Authorization header") if not model_id: raise HTTPException(status_code=400, detail="Model ID is required") - client = OpenRouterClient(api_key) - providers = await client.get_model_providers(model_id) - await client.close() + client = OpenRouterClient(_resolve_boost_api_key(api_key)) + try: + providers = await client.get_model_providers(model_id) + finally: + await client.close() return { "success": True, diff --git a/backend/api/routes/compiler.py b/backend/api/routes/compiler.py index 8836f67..34cd872 100644 --- a/backend/api/routes/compiler.py +++ b/backend/api/routes/compiler.py @@ -8,19 +8,41 @@ from backend.shared.models import CompilerStartRequest, CompilerState, CritiqueRequest from backend.shared.config import system_config +from backend.shared.token_tracker import token_tracker from backend.compiler.core.compiler_coordinator import compiler_coordinator from backend.compiler.memory.outline_memory import outline_memory from backend.compiler.memory.paper_memory import paper_memory +from backend.aggregator.core.coordinator import coordinator +from backend.autonomous.core.autonomous_coordinator import autonomous_coordinator logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/compiler", tags=["compiler"]) +def _get_start_conflict() -> str | None: + """Return a user-facing conflict message if another workflow is active.""" + if compiler_coordinator.is_running: + return "Compiler is already running" + + if coordinator.is_running: + return "Cannot start Compiler while Aggregator is running. Stop Aggregator first." + + autonomous_state = autonomous_coordinator.get_state() + if autonomous_state.is_running: + return "Cannot start Compiler while Autonomous Research is running. Stop Autonomous Research first." + + return None + + @router.post("/start") async def start_compiler(request: CompilerStartRequest): """Start the compiler system.""" try: + conflict = _get_start_conflict() + if conflict: + raise HTTPException(status_code=400, detail=conflict) + # Update system config with user-provided context sizes system_config.compiler_validator_context_window = request.validator_context_size system_config.compiler_high_context_context_window = request.high_context_context_size @@ -66,6 +88,8 @@ async def start_compiler(request: CompilerStartRequest): ) # Start coordinator + token_tracker.reset() + token_tracker.start_timer() await compiler_coordinator.start() return {"status": "started", "message": "Compiler started successfully"} @@ -116,6 +140,7 @@ async def stop_compiler(): """Stop the compiler system.""" try: await compiler_coordinator.stop() + token_tracker.stop_timer() return {"status": "stopped", "message": "Compiler stopped"} except Exception as e: logger.error(f"Failed to stop compiler: {e}") @@ -367,7 +392,7 @@ async def get_metrics(): "rejections": status.review_rejections, "declines": status.review_declines }, - "miniscule_edit_count": status.miniscule_edit_count, + "minuscule_edit_count": status.minuscule_edit_count, "paper_word_count": status.paper_word_count } except Exception as e: @@ -416,7 +441,7 @@ async def get_critique_status(): "in_critique_phase": compiler_coordinator.in_critique_phase, "critique_acceptances": compiler_coordinator.critique_acceptances, "paper_version": compiler_coordinator.paper_version, - "target_critiques": 10 + "target_critiques": 5 } except Exception as e: logger.error(f"Failed to get critique status: {e}") @@ -461,7 +486,6 @@ async def request_compiler_critique(critique_request: CritiqueRequest = None): from backend.shared.critique_memory import save_critique from backend.shared.models import PaperCritique from backend.shared.api_client_manager import api_client_manager - from backend.shared.json_parser import parse_json from backend.shared.utils import count_tokens import uuid from datetime import datetime @@ -489,13 +513,13 @@ async def request_compiler_critique(critique_request: CritiqueRequest = None): validator_provider = critique_request.validator_provider validator_openrouter_provider = critique_request.validator_openrouter_provider - # If validator config not provided in request, fall back to system config + # If validator config not provided in request, fall back to coordinator config if not validator_model: - validator_model = system_config.compiler_validator_model + validator_model = getattr(compiler_coordinator, 'validator_model', None) validator_context_window = system_config.compiler_validator_context_window validator_max_tokens = system_config.compiler_validator_max_output_tokens - validator_provider = getattr(system_config, 'compiler_validator_provider', 'lm_studio') - validator_openrouter_provider = getattr(system_config, 'compiler_validator_openrouter_provider', None) + validator_provider = getattr(compiler_coordinator, 'validator_provider', 'lm_studio') + validator_openrouter_provider = getattr(compiler_coordinator, 'validator_openrouter_provider', None) if not validator_model: raise HTTPException( @@ -579,21 +603,9 @@ async def request_compiler_critique(critique_request: CritiqueRequest = None): if not response_content: raise HTTPException(status_code=500, detail="Empty response from validator model") - # Try to parse as JSON - try: - critique_data = parse_json(response_content) - except Exception as e: - # If JSON parsing fails, create a structured response from raw text - logger.warning(f"Failed to parse critique JSON, using raw response: {e}") - critique_data = { - "novelty_rating": 0, - "novelty_feedback": "Unable to parse structured response", - "correctness_rating": 0, - "correctness_feedback": "Unable to parse structured response", - "impact_rating": 0, - "impact_feedback": "Unable to parse structured response", - "full_critique": response_content - } + # Parse with lenient fallback for truncated critique responses + from backend.shared.critique_prompts import parse_critique_response + critique_data = parse_critique_response(response_content) # Create critique object critique = PaperCritique( diff --git a/backend/api/routes/openrouter.py b/backend/api/routes/openrouter.py index b7d9cf9..4f6c586 100644 --- a/backend/api/routes/openrouter.py +++ b/backend/api/routes/openrouter.py @@ -7,7 +7,8 @@ - OpenRouter model listing (using stored API key) - Model provider listing -Note: This is separate from boost routes which use a separate API key for boost mode. +Note: Boost routes can reuse the active global key by default, while still allowing +an explicit boost-only override key when the user provides one. """ from fastapi import APIRouter, HTTPException, Header from pydantic import BaseModel @@ -73,6 +74,7 @@ async def set_api_key(request: SetApiKeyRequest) -> Dict[str, Any]: This key is stored in memory and used by the API client manager for roles configured to use OpenRouter. It's separate from the boost API key. + Also resets any credit exhaustion flags so roles can retry OpenRouter. Args: request: Request with api_key field @@ -102,12 +104,19 @@ async def set_api_key(request: SetApiKeyRequest) -> Dict[str, Any]: # Also configure the API client manager api_client_manager.set_openrouter_api_key(request.api_key) + # Reset exhaustion flags so roles can retry OpenRouter + free_model_manager.clear_account_exhaustion() + reset_roles = await api_client_manager.reset_openrouter_fallbacks() + logger.info(f"Global OpenRouter API key set successfully. {len(models)} models available.") + if reset_roles: + logger.info(f"Auto-reset {len(reset_roles)} role(s) back to OpenRouter after key update") return { "success": True, "message": "OpenRouter API key validated and saved", - "model_count": len(models) + "model_count": len(models), + "roles_reset": list(reset_roles.keys()) } finally: await client.close() @@ -355,3 +364,35 @@ async def test_connection(request: SetApiKeyRequest) -> Dict[str, Any]: "message": f"Failed to connect: {str(e)}" } + +@router.post("/api/openrouter/reset-exhaustion") +async def reset_credit_exhaustion() -> Dict[str, Any]: + """ + Reset all credit exhaustion flags and role fallback states. + + Call this after adding credits to OpenRouter so roles can retry + without restarting the research mode. + + Resets: + - Per-role permanent fallback states (roles that fell back to LM Studio) + - Account-wide free model exhaustion flag + + Returns: + Success status and list of roles that were reset + """ + try: + free_model_manager.clear_account_exhaustion() + reset_roles = await api_client_manager.reset_openrouter_fallbacks() + + roles_list = list(reset_roles.keys()) + logger.info(f"Credit exhaustion reset: {len(roles_list)} role(s) restored, account exhaustion flag cleared") + + return { + "success": True, + "message": f"Reset {len(roles_list)} role(s) back to OpenRouter" if roles_list else "Exhaustion flags cleared (no roles needed reset)", + "roles_reset": roles_list, + "account_exhaustion_cleared": True + } + except Exception as e: + logger.error(f"Failed to reset credit exhaustion: {e}") + raise HTTPException(status_code=500, detail=f"Failed to reset: {str(e)}") diff --git a/backend/api/routes/workflow.py b/backend/api/routes/workflow.py index 336e2e1..f10559c 100644 --- a/backend/api/routes/workflow.py +++ b/backend/api/routes/workflow.py @@ -101,3 +101,10 @@ async def get_workflow_history(limit: int = 50) -> Dict[str, Any]: logger.error(f"Failed to get workflow history: {e}") raise HTTPException(status_code=500, detail=f"Failed to get history: {str(e)}") + +@router.get("/api/token-stats") +async def get_token_stats() -> Dict[str, Any]: + """Return cumulative token usage stats and elapsed research time.""" + from backend.shared.token_tracker import token_tracker + return {"success": True, **token_tracker.get_stats()} + diff --git a/backend/autonomous/agents/final_answer/answer_format_selector.py b/backend/autonomous/agents/final_answer/answer_format_selector.py index 57a6288..aac0a38 100644 --- a/backend/autonomous/agents/final_answer/answer_format_selector.py +++ b/backend/autonomous/agents/final_answer/answer_format_selector.py @@ -6,6 +6,11 @@ - LONG FORM: A curated volume/collection of papers with introduction and conclusion CRITICAL: Operates ONLY on Tier 2 papers, NOT on Tier 1 brainstorm databases. + +NO RAG BY DESIGN: This agent makes a strategic format decision using only the certainty +assessment result and paper metadata summaries (titles/abstracts). Full paper content +is not needed to decide short-form vs long-form — that's a structural question about +the research landscape, not a content-deep analysis. """ import asyncio import json diff --git a/backend/autonomous/agents/final_answer/certainty_assessor.py b/backend/autonomous/agents/final_answer/certainty_assessor.py index 2498cc1..548ba18 100644 --- a/backend/autonomous/agents/final_answer/certainty_assessor.py +++ b/backend/autonomous/agents/final_answer/certainty_assessor.py @@ -7,7 +7,10 @@ 2. Review full content and assess certainties CRITICAL: Operates ONLY on Tier 2 papers, NOT on Tier 1 brainstorm databases. -This ensures the final answer is based on validated, complete research. + +NO RAG FOR ABSTRACTS (by design): Step 1 browses abstracts/outlines which are small metadata. +EXPANDED PAPERS OVERFLOW: Step 2 currently drops expanded papers if they don't fit. +TODO: Should RAG expanded papers instead of dropping — see audit note in rag-design rule. """ import asyncio import json @@ -28,6 +31,7 @@ ) from backend.autonomous.memory.paper_library import paper_library from backend.autonomous.memory.final_answer_memory import final_answer_memory +from backend.autonomous.core.autonomous_rag_manager import autonomous_rag_manager from backend.autonomous.prompts.paper_reference_prompts import ( get_reference_expansion_system_prompt, get_reference_expansion_json_schema @@ -333,15 +337,56 @@ async def _generate_assessment( max_input = self._calculate_max_input_tokens() if prompt_tokens > max_input: - logger.error(f"CertaintyAssessor: Assessment prompt too large ({prompt_tokens} > {max_input})") - # Try without expanded papers - prompt = build_certainty_assessment_prompt( - user_research_prompt=user_research_prompt, - papers_summary=all_papers, - expanded_papers=None, - rejection_context=rejection_context - ) - prompt_tokens = count_tokens(prompt) + if expanded_papers: + # RAG the expanded papers instead of dropping them entirely + base_prompt = build_certainty_assessment_prompt( + user_research_prompt=user_research_prompt, + papers_summary=all_papers, + expanded_papers=None, + rejection_context=rejection_context + ) + mandatory_tokens = count_tokens(base_prompt) + paper_budget = max_input - mandatory_tokens - 500 + + if paper_budget > 2000: + logger.info(f"CertaintyAssessor: RAG fallback for expanded papers (budget={paper_budget}t)") + paper_ids = [p["paper_id"] for p in expanded_papers] + rag_content, _ = await autonomous_rag_manager.get_reference_papers_context( + paper_ids, + max_total_tokens=paper_budget, + query=user_research_prompt + ) + + if rag_content: + rag_papers = [{ + "paper_id": "rag_retrieved", + "title": f"RAG-retrieved content from {len(expanded_papers)} papers", + "content": rag_content + }] + prompt = build_certainty_assessment_prompt( + user_research_prompt=user_research_prompt, + papers_summary=all_papers, + expanded_papers=rag_papers, + rejection_context=rejection_context + ) + prompt_tokens = count_tokens(prompt) + else: + logger.warning("CertaintyAssessor: RAG returned empty, falling back to abstracts-only") + prompt = base_prompt + prompt_tokens = mandatory_tokens + else: + logger.warning("CertaintyAssessor: Insufficient budget for RAG, using abstracts-only") + prompt = base_prompt + prompt_tokens = mandatory_tokens + else: + prompt = build_certainty_assessment_prompt( + user_research_prompt=user_research_prompt, + papers_summary=all_papers, + expanded_papers=None, + rejection_context=rejection_context + ) + prompt_tokens = count_tokens(prompt) + if prompt_tokens > max_input: logger.error("CertaintyAssessor: Cannot fit even summary-only prompt") return None diff --git a/backend/autonomous/agents/final_answer/volume_organizer.py b/backend/autonomous/agents/final_answer/volume_organizer.py index 40d2376..5f5b2d1 100644 --- a/backend/autonomous/agents/final_answer/volume_organizer.py +++ b/backend/autonomous/agents/final_answer/volume_organizer.py @@ -8,6 +8,11 @@ - Iteratively refines until validator agrees CRITICAL: Operates ONLY on Tier 2 papers, NOT on Tier 1 brainstorm databases. + +NO RAG BY DESIGN: This agent organizes chapter order and identifies structural gaps +using only paper metadata summaries (titles/abstracts/outlines) and the certainty +assessment. Full paper content is not needed to plan volume structure — that's a +high-level organizational decision based on what each paper covers. """ import asyncio import json diff --git a/backend/autonomous/agents/paper_title_selector.py b/backend/autonomous/agents/paper_title_selector.py index f0b0bc9..e58df59 100644 --- a/backend/autonomous/agents/paper_title_selector.py +++ b/backend/autonomous/agents/paper_title_selector.py @@ -1,5 +1,10 @@ """ Paper Title Selector Agent - Selects titles for papers. + +NO RAG BY DESIGN: This agent selects a title based on brainstorm SUMMARY (not full DB), +existing paper titles/abstracts from this brainstorm, and reference paper metadata. +All inputs are compact summaries that fit in direct injection. The full brainstorm +content is not needed — a summary is sufficient to choose an appropriate title. """ import asyncio import json @@ -58,6 +63,7 @@ async def select_title( brainstorm_summary: str, existing_papers_from_brainstorm: List[Dict[str, Any]], reference_papers: List[Dict[str, Any]] = None, + candidate_titles: str = "", stop_event: Optional[asyncio.Event] = None ) -> Optional[str]: """ @@ -67,6 +73,7 @@ async def select_title( so the model can correct its mistakes. Args: + candidate_titles: Pre-validated candidate titles from exploration phase. stop_event: If provided, the loop exits when the event is set (user stop). Returns: @@ -100,7 +107,8 @@ async def select_title( brainstorm_summary, existing_papers_from_brainstorm, reference_papers, - rejection_feedback=rejection_feedback + rejection_feedback=rejection_feedback, + candidate_titles=candidate_titles ) if selection is None: @@ -137,7 +145,8 @@ async def _generate_title( brainstorm_summary: str, existing_papers_from_brainstorm: List[Dict[str, Any]], reference_papers: List[Dict[str, Any]] = None, - rejection_feedback: str = "" + rejection_feedback: str = "", + candidate_titles: str = "" ) -> Optional[PaperTitleSelection]: """Generate a paper title selection.""" try: @@ -150,7 +159,8 @@ async def _generate_title( brainstorm_summary=brainstorm_summary, existing_papers_from_brainstorm=existing_papers_from_brainstorm, reference_papers=reference_papers, - rejection_feedback=rejection_feedback + rejection_feedback=rejection_feedback, + candidate_titles=candidate_titles ) # If prompt is too large, shed oldest rejection entries one at a time until it fits @@ -165,7 +175,8 @@ async def _generate_title( brainstorm_summary=brainstorm_summary, existing_papers_from_brainstorm=existing_papers_from_brainstorm, reference_papers=reference_papers, - rejection_feedback=trimmed_feedback + rejection_feedback=trimmed_feedback, + candidate_titles=candidate_titles ) if count_tokens(prompt) > max_input_tokens: logger.warning( @@ -178,9 +189,47 @@ async def _generate_title( brainstorm_summary=brainstorm_summary, existing_papers_from_brainstorm=existing_papers_from_brainstorm, reference_papers=reference_papers, - rejection_feedback="" + rejection_feedback="", + candidate_titles=candidate_titles ) + # Progressive truncation if still too large after shedding rejection feedback + if count_tokens(prompt) > max_input_tokens: + logger.warning("PaperTitleSelector: Truncating existing paper outlines/abstracts to fit") + truncated_existing = [] + for p in existing_papers_from_brainstorm: + tp = p.copy() + if tp.get("outline"): + tp["outline"] = "" + if tp.get("abstract") and len(tp["abstract"]) > 200: + tp["abstract"] = tp["abstract"][:200] + "..." + truncated_existing.append(tp) + prompt = build_paper_title_prompt( + user_research_prompt=user_research_prompt, + topic_prompt=topic_prompt, + brainstorm_summary=brainstorm_summary, + existing_papers_from_brainstorm=truncated_existing, + reference_papers=reference_papers, + rejection_feedback="", + candidate_titles=candidate_titles + ) + + if count_tokens(prompt) > max_input_tokens: + logger.warning("PaperTitleSelector: Truncating brainstorm summary to fit") + prompt = build_paper_title_prompt( + user_research_prompt=user_research_prompt, + topic_prompt=topic_prompt, + brainstorm_summary=brainstorm_summary[:2000] + "\n... [truncated for context fit]", + existing_papers_from_brainstorm=truncated_existing, + reference_papers=reference_papers, + rejection_feedback="", + candidate_titles=candidate_titles + ) + + if count_tokens(prompt) > max_input_tokens: + logger.error("PaperTitleSelector: Cannot fit prompt even after all truncation") + return None + # Generate task ID for tracking task_id = self.get_current_task_id() self.task_sequence += 1 diff --git a/backend/autonomous/agents/topic_selector.py b/backend/autonomous/agents/topic_selector.py index dfdc3db..d4126e5 100644 --- a/backend/autonomous/agents/topic_selector.py +++ b/backend/autonomous/agents/topic_selector.py @@ -5,6 +5,11 @@ - Uses DIRECT INJECTION for all context (metadata summaries are typically small) - Validates prompt size before sending to prevent context overflow - Truncates paper abstracts if context is too large (safe since abstracts are summaries) + +NO RAG BY DESIGN: This agent makes a strategic decision about WHAT to work on next. +It only needs metadata summaries (topic prompts, statuses, paper titles/abstracts), +not full brainstorm databases or full paper content. Metadata is small enough to +direct-inject; abstract truncation is the overflow fallback. """ import asyncio import json @@ -68,7 +73,8 @@ async def select_topic( self, user_research_prompt: str, brainstorms_summary: List[Dict[str, Any]], - papers_summary: List[Dict[str, Any]] + papers_summary: List[Dict[str, Any]], + candidate_questions: str = "" ) -> Optional[TopicSelectionSubmission]: """ Generate a topic selection submission. @@ -77,6 +83,7 @@ async def select_topic( user_research_prompt: The user's high-level research goal brainstorms_summary: List of all brainstorms with metadata papers_summary: List of all papers with title, abstract, word count + candidate_questions: Formatted candidate questions from topic exploration phase Returns: TopicSelectionSubmission or None if generation failed @@ -90,7 +97,8 @@ async def select_topic( user_research_prompt=user_research_prompt, brainstorms_summary=brainstorms_summary, papers_summary=papers_summary, - rejection_context=rejection_context + rejection_context=rejection_context, + candidate_questions=candidate_questions ) # Validate prompt size @@ -115,7 +123,8 @@ async def select_topic( user_research_prompt=user_research_prompt, brainstorms_summary=brainstorms_summary, papers_summary=truncated_papers, - rejection_context=rejection_context + rejection_context=rejection_context, + candidate_questions=candidate_questions ) prompt_tokens = count_tokens(prompt) diff --git a/backend/autonomous/agents/topic_validator.py b/backend/autonomous/agents/topic_validator.py index df7a4b4..23632f2 100644 --- a/backend/autonomous/agents/topic_validator.py +++ b/backend/autonomous/agents/topic_validator.py @@ -5,6 +5,10 @@ - Uses same context as topic selector (metadata summaries) - Validates prompt size before sending - Truncates paper abstracts if context is too large + +NO RAG BY DESIGN: Same rationale as topic selector — validates a strategic decision +using only metadata summaries (topic prompts, statuses, paper titles/abstracts). +Full content not needed for validating topic selection quality. """ import asyncio import json @@ -65,7 +69,8 @@ async def validate( submission: TopicSelectionSubmission, user_research_prompt: str, brainstorms_summary: List[Dict[str, Any]], - papers_summary: List[Dict[str, Any]] + papers_summary: List[Dict[str, Any]], + override_prompt: Optional[str] = None ) -> TopicValidationResult: """ Validate a topic selection submission. @@ -75,34 +80,41 @@ async def validate( user_research_prompt: The user's high-level research goal brainstorms_summary: List of all brainstorms with metadata papers_summary: List of all papers with title, abstract, word count + override_prompt: If provided, use this prompt instead of building one Returns: TopicValidationResult with accept/reject decision """ try: - # Convert submission to dict for prompt - proposed_action = { - "action": submission.action, - "topic_id": submission.topic_id, - "topic_ids": submission.topic_ids, - "topic_prompt": submission.topic_prompt, - "reasoning": submission.reasoning - } - - # Build prompt - prompt = build_topic_validation_prompt( - user_research_prompt=user_research_prompt, - brainstorms_summary=brainstorms_summary, - papers_summary=papers_summary, - proposed_action=proposed_action - ) + if override_prompt: + prompt = override_prompt + else: + # Convert submission to dict for prompt + proposed_action = { + "action": submission.action, + "topic_id": submission.topic_id, + "topic_ids": submission.topic_ids, + "topic_prompt": submission.topic_prompt, + "reasoning": submission.reasoning + } + + # Build prompt + prompt = build_topic_validation_prompt( + user_research_prompt=user_research_prompt, + brainstorms_summary=brainstorms_summary, + papers_summary=papers_summary, + proposed_action=proposed_action + ) # Validate prompt size prompt_tokens = count_tokens(prompt) max_input_tokens = self._calculate_max_input_tokens() if prompt_tokens > max_input_tokens: - # Context too large - truncate paper abstracts to fit + if override_prompt: + logger.error(f"TopicValidator: Override prompt ({prompt_tokens} tokens) exceeds limit ({max_input_tokens}). Cannot truncate.") + return self._create_rejection("Override prompt too large for validation") + logger.warning(f"TopicValidator: Prompt ({prompt_tokens} tokens) exceeds limit ({max_input_tokens}). " f"Truncating paper abstracts.") diff --git a/backend/autonomous/core/autonomous_coordinator.py b/backend/autonomous/core/autonomous_coordinator.py index 2a26d76..1319faa 100644 --- a/backend/autonomous/core/autonomous_coordinator.py +++ b/backend/autonomous/core/autonomous_coordinator.py @@ -1,6 +1,6 @@ """ Autonomous Coordinator - Main orchestrator for autonomous research mode. -Manages the two-tier workflow: brainstorm aggregation -> paper compilation. +Manages the Tier 1 -> Tier 2 -> Tier 3 autonomous workflow. """ import asyncio import logging @@ -25,6 +25,7 @@ from backend.shared.api_client_manager import api_client_manager from backend.shared.openrouter_client import FreeModelExhaustedError from backend.shared.workflow_predictor import workflow_predictor +from backend.shared.token_tracker import token_tracker # Memory managers from backend.autonomous.memory.brainstorm_memory import brainstorm_memory @@ -139,6 +140,11 @@ def __init__(self): self._manual_paper_writing_triggered: bool = False self._resume_paper_phase: Optional[str] = None # Saved phase for resume (body/conclusion/intro/abstract) + # Brainstorm multi-paper continuation tracking + self._brainstorm_paper_count: int = 0 # Papers written from current brainstorm (max 3) + self._current_brainstorm_paper_ids: List[str] = [] # Paper IDs from current brainstorm cycle + self._last_completed_paper_id: Optional[str] = None # Persists after _current_paper_id is cleared + # Tier 3 Final Answer tracking self._last_tier3_check_at: int = 0 # Paper count at last Tier 3 check self._tier3_active: bool = False # Is Tier 3 final answer generation active @@ -561,6 +567,10 @@ async def _check_resume_state(self) -> None: self._last_completion_review_at = workflow_state.get("last_completion_review_at", 0) self._last_tier3_check_at = workflow_state.get("last_tier3_check_at", 0) + # Restore brainstorm multi-paper continuation tracking + self._brainstorm_paper_count = workflow_state.get("brainstorm_paper_count", 0) + self._current_brainstorm_paper_ids = workflow_state.get("current_brainstorm_paper_ids", []) + # Restore Tier 3 flags for proper resume self._tier3_active = workflow_state.get("tier3_active", False) self._tier3_enabled = workflow_state.get("tier3_enabled", False) @@ -812,6 +822,9 @@ async def _save_workflow_state(self, tier: str = None, phase: str = None) -> Non "last_redundancy_check_at": self._last_redundancy_check_at, "last_completion_review_at": self._last_completion_review_at, "last_tier3_check_at": self._last_tier3_check_at, + # Brainstorm multi-paper continuation tracking + "brainstorm_paper_count": self._brainstorm_paper_count, + "current_brainstorm_paper_ids": self._current_brainstorm_paper_ids, # Tier 3 Final Answer crash recovery fields "tier3_active": self._tier3_active, "tier3_enabled": self._tier3_enabled, @@ -866,6 +879,10 @@ async def log_callback(task_id, role_id, model, provider, prompt, response, api_client_manager.set_autonomous_logger_callback(log_callback) logger.info("Autonomous API logging enabled") + # Reset and start token tracking for this session + token_tracker.reset() + token_tracker.start_timer() + # Refresh workflow predictions at start await self.refresh_workflow_predictions() @@ -924,6 +941,110 @@ async def log_callback(task_id, role_id, model, provider, prompt, response, if await self._paper_compilation_workflow(): break + if not self._stop_event.is_set(): + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) + await self._check_paper_redundancy() + + # Continuation loop for resumed tier2 paper + while (self._brainstorm_paper_count < 3 + and not self._stop_event.is_set()): + cont_decision = await self._brainstorm_continuation_decision() + if cont_decision != "write_another_paper": + break + logger.info(f"Writing paper {self._brainstorm_paper_count + 1}/3 from resumed brainstorm {self._current_topic_id}") + self._current_paper_tracker = PaperModelTracker( + user_prompt=self._user_research_prompt, + paper_title="" + ) + next_ok = False + while not self._stop_event.is_set(): + next_ok = await self._paper_compilation_workflow(skip_reference_selection=True) + if next_ok or self._stop_event.is_set(): + break + await asyncio.sleep(5) + if not next_ok or self._stop_event.is_set(): + break + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) + await self._check_paper_redundancy() + + self._brainstorm_paper_count = 0 + self._current_brainstorm_paper_ids = [] + self._last_completed_paper_id = None + + continue + elif resume_tier == "tier1_aggregation" and not resume_topic and resume_state.get("paper_phase") == "topic_exploration": + # Resume topic exploration phase (no topic selected yet) + # Exploration restarts fresh — uses aggregator which will run from scratch + logger.info("Resuming topic exploration phase (restarting fresh)") + resume_state = None + self._resume_paper_phase = None + + candidate_questions = await self._topic_exploration_phase() + + if self._stop_event.is_set(): + break + + topic_result = await self._topic_selection_loop(candidate_questions) + + if self._stop_event.is_set(): + break + + self._current_reference_papers = await self._pre_brainstorm_reference_selection() + + if self._stop_event.is_set(): + break + + await self._save_workflow_state(tier="tier1_aggregation") + + write_paper = await self._brainstorm_aggregation_loop() + + if self._stop_event.is_set(): + break + + if write_paper: + while not self._stop_event.is_set(): + if await self._paper_compilation_workflow(): + break + await asyncio.sleep(5) + + if self._stop_event.is_set(): + break + + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) + await self._check_paper_redundancy() + + while (self._brainstorm_paper_count < 3 + and not self._stop_event.is_set()): + cont_decision = await self._brainstorm_continuation_decision() + if cont_decision != "write_another_paper": + break + self._current_paper_tracker = PaperModelTracker( + user_prompt=self._user_research_prompt, + paper_title="" + ) + next_ok = False + while not self._stop_event.is_set(): + next_ok = await self._paper_compilation_workflow(skip_reference_selection=True) + if next_ok or self._stop_event.is_set(): + break + await asyncio.sleep(5) + if not next_ok or self._stop_event.is_set(): + break + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) + await self._check_paper_redundancy() + + self._brainstorm_paper_count = 0 + self._current_brainstorm_paper_ids = [] + self._last_completed_paper_id = None + continue elif resume_tier == "tier1_aggregation" and resume_topic: # Resume brainstorm aggregation @@ -960,7 +1081,38 @@ async def log_callback(task_id, role_id, model, provider, prompt, response, if self._stop_event.is_set(): break + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) await self._check_paper_redundancy() + + # Continuation loop for resumed brainstorm + while (self._brainstorm_paper_count < 3 + and not self._stop_event.is_set()): + cont_decision = await self._brainstorm_continuation_decision() + if cont_decision != "write_another_paper": + break + logger.info(f"Writing paper {self._brainstorm_paper_count + 1}/3 from resumed brainstorm {self._current_topic_id}") + self._current_paper_tracker = PaperModelTracker( + user_prompt=self._user_research_prompt, + paper_title="" + ) + next_ok = False + while not self._stop_event.is_set(): + next_ok = await self._paper_compilation_workflow(skip_reference_selection=True) + if next_ok or self._stop_event.is_set(): + break + await asyncio.sleep(5) + if not next_ok or self._stop_event.is_set(): + break + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) + await self._check_paper_redundancy() + + self._brainstorm_paper_count = 0 + self._current_brainstorm_paper_ids = [] + self._last_completed_paper_id = None continue elif resume_tier == "tier3_final_answer": @@ -1045,16 +1197,17 @@ async def log_callback(task_id, role_id, model, provider, prompt, response, else: logger.warning("Cannot run forced Tier 3: no completed papers") - # Phase 1: Topic selection - topic_result = await self._topic_selection_loop() + # Phase 0: Topic Exploration (mini-brainstorm of candidate questions) + candidate_questions = await self._topic_exploration_phase() if self._stop_event.is_set(): break - if not topic_result: - logger.error("Topic selection failed, retrying in 30 seconds") - await asyncio.sleep(30) - continue + # Phase 1: Topic selection (informed by exploration candidates) + topic_result = await self._topic_selection_loop(candidate_questions) + + if self._stop_event.is_set(): + break # Phase 1.5: Pre-brainstorm reference paper selection # This enables compounding knowledge across research cycles @@ -1103,16 +1256,63 @@ async def log_callback(task_id, role_id, model, provider, prompt, response, # Only check redundancy and log completion if paper was successful if paper_success: - # Check for paper redundancy (every 3 papers) + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) + await self._check_paper_redundancy() - # Check for Tier 3 final answer trigger (every 5 papers) + # Brainstorm multi-paper continuation loop (max 3 papers per brainstorm) + while (self._brainstorm_paper_count < 3 + and not self._stop_event.is_set()): + decision = await self._brainstorm_continuation_decision() + if decision != "write_another_paper": + break + + logger.info(f"Writing paper {self._brainstorm_paper_count + 1}/3 from brainstorm {self._current_topic_id}") + self._current_paper_tracker = PaperModelTracker( + user_prompt=self._user_research_prompt, + paper_title="" + ) + next_paper_success = False + _next_attempt = 0 + while not self._stop_event.is_set(): + _next_attempt += 1 + if _next_attempt > 1: + await asyncio.sleep(5) + next_paper_success = await self._paper_compilation_workflow( + skip_reference_selection=True + ) + if next_paper_success or self._stop_event.is_set(): + break + + if not next_paper_success or self._stop_event.is_set(): + break + + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) + await self._check_paper_redundancy() + + if self._brainstorm_paper_count >= 3: + logger.info("Brainstorm paper limit reached (3/3)") + await self._broadcast("brainstorm_paper_limit_reached", { + "topic_id": self._current_topic_id, + "paper_count": self._brainstorm_paper_count + }) + + self._brainstorm_paper_count = 0 + self._current_brainstorm_paper_ids = [] + self._last_completed_paper_id = None + + if self._stop_event.is_set(): + break + if await self._should_trigger_tier3(): logger.info("Tier 3 trigger: Attempting final answer generation") completed = await self._tier3_final_answer_workflow() if completed: - # System stops after final answer is complete logger.info("FINAL ANSWER COMPLETE - Autonomous research finished") await self._broadcast("final_answer_complete", { "format": final_answer_memory.get_answer_format(), @@ -1120,10 +1320,9 @@ async def log_callback(task_id, role_id, model, provider, prompt, response, }) break else: - # Tier 3 decided we need more research - continue logger.info("Tier 3: More research needed, returning to topic selection") - logger.info("Paper complete, returning to topic selection") + logger.info("Brainstorm cycle complete, returning to topic selection") except FreeModelExhaustedError as e: if e.soonest_retry: @@ -1156,6 +1355,7 @@ async def log_callback(task_id, role_id, model, provider, prompt, response, finally: self._running = False self._state.is_running = False + token_tracker.stop_timer() stats = await research_metadata.get_stats() await self._broadcast("auto_research_stopped", { @@ -1197,6 +1397,7 @@ async def stop(self) -> None: # Clear autonomous API logging callback api_client_manager.set_autonomous_logger_callback(None) + token_tracker.stop_timer() logger.info("Autonomous API logging disabled") # SAVE workflow state for resume (NOT clear it) @@ -1383,16 +1584,17 @@ async def _resume_research_loop_after_tier3(self) -> None: else: logger.warning("Cannot run forced Tier 3: no completed papers") - # Phase 1: Topic selection - topic_result = await self._topic_selection_loop() + # Phase 0: Topic Exploration (mini-brainstorm of candidate questions) + candidate_questions = await self._topic_exploration_phase() if self._stop_event.is_set(): break - if not topic_result: - logger.error("Topic selection failed, retrying in 30 seconds") - await asyncio.sleep(30) - continue + # Phase 1: Topic selection (informed by exploration candidates) + topic_result = await self._topic_selection_loop(candidate_questions) + + if self._stop_event.is_set(): + break # Phase 1.5: Pre-brainstorm reference paper selection self._current_reference_papers = await self._pre_brainstorm_reference_selection() @@ -1440,16 +1642,63 @@ async def _resume_research_loop_after_tier3(self) -> None: # Only check redundancy and log completion if paper was successful if paper_success: - # Check for paper redundancy (every 3 papers) + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) + await self._check_paper_redundancy() - # Check for Tier 3 final answer trigger (every 5 papers) + # Brainstorm multi-paper continuation loop (max 3 papers per brainstorm) + while (self._brainstorm_paper_count < 3 + and not self._stop_event.is_set()): + decision = await self._brainstorm_continuation_decision() + if decision != "write_another_paper": + break + + logger.info(f"Writing paper {self._brainstorm_paper_count + 1}/3 from brainstorm {self._current_topic_id}") + self._current_paper_tracker = PaperModelTracker( + user_prompt=self._user_research_prompt, + paper_title="" + ) + next_paper_success = False + _next_attempt = 0 + while not self._stop_event.is_set(): + _next_attempt += 1 + if _next_attempt > 1: + await asyncio.sleep(5) + next_paper_success = await self._paper_compilation_workflow( + skip_reference_selection=True + ) + if next_paper_success or self._stop_event.is_set(): + break + + if not next_paper_success or self._stop_event.is_set(): + break + + self._brainstorm_paper_count += 1 + if self._last_completed_paper_id: + self._current_brainstorm_paper_ids.append(self._last_completed_paper_id) + await self._check_paper_redundancy() + + if self._brainstorm_paper_count >= 3: + logger.info("Brainstorm paper limit reached (3/3)") + await self._broadcast("brainstorm_paper_limit_reached", { + "topic_id": self._current_topic_id, + "paper_count": self._brainstorm_paper_count + }) + + self._brainstorm_paper_count = 0 + self._current_brainstorm_paper_ids = [] + self._last_completed_paper_id = None + + if self._stop_event.is_set(): + break + if await self._should_trigger_tier3(): logger.info("Tier 3 trigger: Attempting final answer generation") completed = await self._tier3_final_answer_workflow() if completed: - # System stops after final answer is complete logger.info("FINAL ANSWER COMPLETE - Autonomous research finished") await self._broadcast("final_answer_complete", { "format": final_answer_memory.get_answer_format(), @@ -1457,10 +1706,9 @@ async def _resume_research_loop_after_tier3(self) -> None: }) break else: - # Tier 3 decided we need more research - continue logger.info("Tier 3: More research needed, returning to topic selection") - logger.info("Paper complete, returning to topic selection") + logger.info("Brainstorm cycle complete, returning to topic selection") except FreeModelExhaustedError as e: if e.soonest_retry: @@ -1492,6 +1740,7 @@ async def _resume_research_loop_after_tier3(self) -> None: finally: self._running = False self._state.is_running = False + token_tracker.stop_timer() shared_training_memory.insights.clear() shared_training_memory.submission_count = 0 @@ -1546,42 +1795,224 @@ async def skip_critique_phase(self) -> bool: return await self._paper_compiler.skip_critique_phase() + # ======================================================================== + # PHASE 0: TOPIC EXPLORATION (Pre-Selection Candidate Brainstorm) + # ======================================================================== + + async def _topic_exploration_phase(self) -> str: + """ + Topic exploration phase using the full Part 1 aggregator infrastructure. + All configured submitters run in parallel, batch validation up to 3 at a time. + Collects 5 accepted candidate brainstorm questions before topic selection. + + Returns: + Formatted candidate questions DB for injection into topic selection prompt. + """ + api_client_manager.set_autonomous_phase("topic_exploration") + self._state.current_tier = "tier1_aggregation" + + TARGET_CANDIDATES = 5 + MAX_CONSECUTIVE_REJECTIONS = 15 + + await self._broadcast("topic_exploration_started", { + "target": TARGET_CANDIDATES, + "resumed_count": 0 + }) + + logger.info(f"Starting topic exploration phase (target: {TARGET_CANDIDATES} candidates)") + + # Build the exploration user prompt for the aggregator + from backend.autonomous.prompts.topic_exploration_prompts import build_exploration_user_prompt + + brainstorms_summary = await autonomous_rag_manager.get_all_brainstorms_summary() + papers_summary = await autonomous_rag_manager.get_all_papers_summary() + + exploration_prompt = build_exploration_user_prompt( + user_research_prompt=self._user_research_prompt, + brainstorms_summary=brainstorms_summary, + papers_summary=papers_summary + ) + + # Create a temp exploration database file in the brainstorms directory + exploration_db_path = brainstorm_memory._base_dir / "exploration_candidates.txt" + exploration_db_path.parent.mkdir(parents=True, exist_ok=True) + + # Clear any stale exploration DB + if exploration_db_path.exists(): + exploration_db_path.unlink() + + # Override shared training memory path for exploration + original_shared_path = system_config.shared_training_file + system_config.shared_training_file = str(exploration_db_path) + original_memory_path = shared_training_memory.file_path + shared_training_memory.file_path = exploration_db_path + await shared_training_memory.reload_insights_from_current_path() + + exploration_aggregator = None + + try: + exploration_aggregator = AggregatorCoordinator() + + await exploration_aggregator.initialize( + user_prompt=exploration_prompt, + submitter_configs=self._submitter_configs, + validator_model=self._validator_model, + user_files=[], + skip_stats_load=True, + validator_context_window=self._validator_context, + validator_max_tokens=self._validator_max_tokens, + validator_provider=self._validator_provider, + validator_openrouter_provider=self._validator_openrouter_provider, + validator_lm_studio_fallback=self._validator_lm_studio_fallback, + enable_cleanup_review=False + ) + + # Set WebSocket broadcaster so aggregator events flow through + if self._broadcast_callback: + exploration_aggregator.websocket_broadcaster = self._broadcast_callback + + # Start the aggregator (parallel submitters + batch validator) + await exploration_aggregator.start() + logger.info("Exploration aggregator started with parallel submitters") + + last_acceptances = 0 + last_rejections = 0 + consecutive_rejections = 0 + + while self._running and not self._stop_event.is_set(): + status = await exploration_aggregator.get_status() + current_acceptances = status.total_acceptances + current_rejections = status.total_rejections + + # Track new acceptances + if current_acceptances > last_acceptances: + consecutive_rejections = 0 + last_acceptances = current_acceptances + + await self._broadcast("topic_exploration_progress", { + "accepted": current_acceptances, + "target": TARGET_CANDIDATES, + "total_attempts": current_acceptances + current_rejections + }) + + await self._save_workflow_state( + tier="tier1_aggregation", + phase="topic_exploration" + ) + + logger.info(f"TopicExploration: {current_acceptances}/{TARGET_CANDIDATES} candidates accepted") + + if current_acceptances >= TARGET_CANDIDATES: + logger.info(f"TopicExploration: Target of {TARGET_CANDIDATES} candidates reached") + break + + # Track consecutive rejections for safety valve + if current_rejections > last_rejections: + new_rejections = current_rejections - last_rejections + consecutive_rejections += new_rejections + last_rejections = current_rejections + + if consecutive_rejections >= MAX_CONSECUTIVE_REJECTIONS: + logger.warning(f"TopicExploration: {consecutive_rejections} consecutive rejections - proceeding with {current_acceptances} candidates") + break + + await asyncio.sleep(2) + + # Stop the exploration aggregator + await exploration_aggregator.stop() + + # Read accepted candidates from the exploration database + candidates_text = "" + if exploration_db_path.exists(): + async with aiofiles.open(exploration_db_path, 'r', encoding='utf-8') as f: + raw_content = await f.read() + + if raw_content.strip(): + # Format into the candidate DB structure expected by topic selector + entries = [e.strip() for e in raw_content.split("\n\n") if e.strip()] + lines = [ + "ACCEPTED CANDIDATE BRAINSTORM QUESTIONS:", + "=" * 60 + ] + for i, entry in enumerate(entries, 1): + lines.append(f"\nCandidate #{i}:") + lines.append(f" {entry}") + lines.append("-" * 40) + candidates_text = "\n".join(lines) + + await self._broadcast("topic_exploration_complete", { + "accepted_count": last_acceptances, + "total_attempts": last_acceptances + last_rejections + }) + + logger.info(f"Topic exploration complete: {last_acceptances} candidates accepted") + + return candidates_text + + except FreeModelExhaustedError: + # Stop aggregator if running + if exploration_aggregator: + try: + await exploration_aggregator.stop() + except Exception: + pass + raise + except Exception as e: + logger.error(f"Topic exploration phase error: {e}") + if exploration_aggregator: + try: + await exploration_aggregator.stop() + except Exception: + pass + return "" + finally: + # Restore original shared training path + system_config.shared_training_file = original_shared_path + shared_training_memory.file_path = original_memory_path + + # Clear in-memory data to prevent cross-contamination + async with shared_training_memory._lock: + shared_training_memory.insights.clear() + shared_training_memory.submission_count = 0 + shared_training_memory.last_ragged_submission_count = 0 + logger.info("Exploration: Restored shared_training_memory state") + + # Clean up exploration database file + if exploration_db_path.exists(): + try: + exploration_db_path.unlink() + except Exception: + pass + # ======================================================================== # PHASE 1: TOPIC SELECTION # ======================================================================== - async def _topic_selection_loop(self) -> Optional[str]: + async def _topic_selection_loop(self, candidate_questions: str = "") -> Optional[str]: """ - Topic selection with validation. + Topic selection with validation. Retries indefinitely with rejection + feedback until a topic is accepted or stop event is set. Returns: - topic_id if successful, None if failed + topic_id if successful, None only if stopped """ - # Set tier state immediately when entering topic selection - # This ensures force_tier3 can reliably detect we're in the Tier 1 phase - # (Previously, state remained "idle" until aggregation loop started, causing race conditions) self._state.current_tier = "tier1_aggregation" - # Set phase for API logging api_client_manager.set_autonomous_phase("topic_selection") - max_attempts = system_config.autonomous_topic_selection_retry_limit - - for attempt in range(max_attempts): - if self._stop_event.is_set(): - return None - - logger.info(f"Topic selection attempt {attempt + 1}/{max_attempts}") + attempt = 0 + while not self._stop_event.is_set(): + attempt += 1 + logger.info(f"Topic selection attempt {attempt}") - # Get context brainstorms_summary = await autonomous_rag_manager.get_all_brainstorms_summary() papers_summary = await autonomous_rag_manager.get_all_papers_summary() - # Generate topic selection submission = await self._topic_selector.select_topic( user_research_prompt=self._user_research_prompt, brainstorms_summary=brainstorms_summary, - papers_summary=papers_summary + papers_summary=papers_summary, + candidate_questions=candidate_questions ) if submission is None: @@ -1589,7 +2020,6 @@ async def _topic_selection_loop(self) -> Optional[str]: await asyncio.sleep(5) continue - # Validate validation = await self._topic_validator.validate( submission=submission, user_research_prompt=self._user_research_prompt, @@ -1598,12 +2028,10 @@ async def _topic_selection_loop(self) -> Optional[str]: ) if validation.decision == "accept": - # Check if we should stop before creating new topic if self._stop_event.is_set(): logger.info("Topic selection cancelled - stop event set after validation") return None - # Execute topic selection topic_id = await self._execute_topic_selection(submission) if topic_id: @@ -1614,7 +2042,6 @@ async def _topic_selection_loop(self) -> Optional[str]: }) return topic_id else: - # Handle rejection await self._topic_selector.handle_rejection(submission, validation.reasoning) await research_metadata.increment_stat("topic_selection_rejections") @@ -1624,7 +2051,6 @@ async def _topic_selection_loop(self) -> Optional[str]: logger.info(f"Topic selection rejected: {validation.reasoning[:100]}...") - logger.error(f"Topic selection failed after {max_attempts} attempts") return None async def _execute_topic_selection( @@ -1698,6 +2124,148 @@ async def _execute_topic_selection( logger.error(f"Error executing topic selection: {e}") return None + async def _brainstorm_continuation_decision(self) -> str: + """ + Decide whether to write another paper from the current brainstorm or move on. + Uses topic selector model for submission and topic validator for validation. + + NO RAG BY DESIGN: This is a strategic decision using only brainstorm SUMMARY + (not full DB) and prior paper titles/abstracts/outlines from this brainstorm. + Full brainstorm content is not needed to decide "write another or move on" — + the summary + completed paper metadata is sufficient context. + + Returns: + "write_another_paper" or "move_on" + """ + from backend.shared.json_parser import parse_json + from backend.autonomous.prompts.paper_continuation_prompts import ( + build_continuation_decision_prompt, + build_continuation_validation_prompt + ) + + api_client_manager.set_autonomous_phase("brainstorm_continuation") + + await self._broadcast("brainstorm_continuation_started", { + "topic_id": self._current_topic_id, + "papers_written": self._brainstorm_paper_count + }) + + metadata = await brainstorm_memory.get_metadata(self._current_topic_id) + topic_prompt = metadata.topic_prompt if metadata else "" + + brainstorm_summary = await autonomous_rag_manager.get_brainstorm_summary( + self._current_topic_id + ) + + papers_from_brainstorm = await research_metadata.get_papers_by_brainstorm( + self._current_topic_id + ) + papers_context = [] + for p in papers_from_brainstorm: + paper_id = p.get("paper_id") + outline_text = "" + if paper_id: + outline_path = paper_library.get_outline_path(paper_id) + if os.path.exists(outline_path): + async with aiofiles.open(outline_path, "r", encoding="utf-8") as f: + outline_text = await f.read() + papers_context.append({ + "title": p.get("title", "N/A"), + "abstract": p.get("abstract", "N/A"), + "outline": outline_text + }) + + attempt = 0 + rejection_context = "" + + while not self._stop_event.is_set(): + attempt += 1 + + logger.info(f"Brainstorm continuation decision attempt {attempt}") + + prompt = build_continuation_decision_prompt( + user_research_prompt=self._user_research_prompt, + topic_prompt=topic_prompt, + brainstorm_summary=brainstorm_summary, + papers_from_brainstorm=papers_context, + papers_written_count=self._brainstorm_paper_count, + rejection_context=rejection_context + ) + + task_id = f"auto_cd_{self._topic_selector.task_sequence:03d}" + self._topic_selector.task_sequence += 1 + + if self._topic_selector.task_tracking_callback: + self._topic_selector.task_tracking_callback("started", task_id) + + try: + response = await api_client_manager.generate_completion( + task_id=task_id, + role_id="autonomous_topic_selector", + model=self._topic_selector.model_id, + messages=[{"role": "user", "content": prompt}], + temperature=0.0, + max_tokens=self._topic_selector.max_output_tokens + ) + + content = response.get("choices", [{}])[0].get("message", {}).get("content") or "" + if not content: + msg = response.get("choices", [{}])[0].get("message", {}) + content = msg.get("reasoning") or "" + + result = parse_json(content) + decision = result.get("decision", "move_on") + reasoning = result.get("reasoning", "") + + if decision not in ("write_another_paper", "move_on"): + logger.warning(f"Invalid continuation decision: {decision}, defaulting to move_on") + decision = "move_on" + + if self._topic_selector.task_tracking_callback: + self._topic_selector.task_tracking_callback("completed", task_id) + + proposed = {"decision": decision, "reasoning": reasoning} + + validation = await self._topic_validator.validate( + submission=TopicSelectionSubmission( + action="new_topic", + topic_prompt=f"[CONTINUATION DECISION: {decision}]", + reasoning=reasoning[:200] + ), + user_research_prompt=self._user_research_prompt, + brainstorms_summary=await autonomous_rag_manager.get_all_brainstorms_summary(), + papers_summary=await autonomous_rag_manager.get_all_papers_summary(), + override_prompt=build_continuation_validation_prompt( + user_research_prompt=self._user_research_prompt, + topic_prompt=topic_prompt, + brainstorm_summary=brainstorm_summary, + papers_from_brainstorm=papers_context, + papers_written_count=self._brainstorm_paper_count, + proposed_decision=proposed + ) + ) + + if validation.decision == "accept": + logger.info(f"Brainstorm continuation decision accepted: {decision}") + await self._broadcast("brainstorm_continuation_decided", { + "topic_id": self._current_topic_id, + "decision": decision, + "paper_count": self._brainstorm_paper_count, + "reasoning": reasoning[:300] + }) + return decision + else: + rejection_context = validation.reasoning + logger.info(f"Continuation decision rejected: {validation.reasoning[:100]}...") + + except FreeModelExhaustedError: + raise + except Exception as e: + logger.error(f"Error in continuation decision attempt {attempt}: {e}") + await asyncio.sleep(3) + + return "move_on" + async def _pre_brainstorm_reference_selection(self) -> List[str]: """ Select reference papers BEFORE brainstorming begins. @@ -1895,6 +2463,13 @@ async def paper_model_tracking_callback(model_id: str) -> None: if self._broadcast_callback: self._brainstorm_aggregator.websocket_broadcaster = self._broadcast_callback + # Check if manual override was triggered during initialization + # (force_paper_writing() can fire while RAG ingestion is in progress) + if self._manual_paper_writing_triggered: + logger.info("Manual override detected during initialization - skipping aggregator start") + self._manual_paper_writing_triggered = False + return True + # Start aggregator await self._brainstorm_aggregator.start() logger.info(f"Aggregator started for brainstorm {self._current_topic_id}") @@ -1937,6 +2512,11 @@ async def paper_model_tracking_callback(model_id: str) -> None: # Track cleanup removals for status display if current_cleanup_removals != self._cleanup_removals: self._cleanup_removals = current_cleanup_removals + # Update brainstorm metadata with live count (accounts for prune) + await brainstorm_memory.update_metadata( + self._current_topic_id, + submission_count=status.shared_training_size + ) # Track new acceptances/rejections if current_acceptances > last_acceptances: @@ -1948,10 +2528,10 @@ async def paper_model_tracking_callback(model_id: str) -> None: # Increment total submissions accepted stat for acceptance rate calculation await research_metadata.increment_stat("total_submissions_accepted", new_acceptances) - # Update brainstorm metadata + # Update brainstorm metadata with live count (accounts for prune) await brainstorm_memory.update_metadata( self._current_topic_id, - submission_count=current_acceptances + submission_count=status.shared_training_size ) # NOTE: Don't broadcast here - the aggregator already broadcasts @@ -1964,15 +2544,15 @@ async def paper_model_tracking_callback(model_id: str) -> None: if current_acceptances % 5 == 0: await self._save_workflow_state(tier="tier1_aggregation") - # Check for hard limit of 80 acceptances (FORCE paper writing, skip completion review) - if self._acceptance_count >= 80: - logger.info(f"Hard limit of 80 acceptances reached for {self._current_topic_id}. Forcing paper writing transition.") + # Check for hard limit of 30 acceptances (FORCE paper writing, skip completion review) + if self._acceptance_count >= 30: + logger.info(f"Hard limit of 30 acceptances reached for {self._current_topic_id}. Forcing paper writing transition.") # Broadcast hard limit reached event await self._broadcast("brainstorm_hard_limit_reached", { "topic_id": self._current_topic_id, "acceptance_count": self._acceptance_count, - "message": "Brainstorm hard limit of 80 acceptances reached. Forcing paper writing." + "message": "Brainstorm hard limit of 30 acceptances reached. Forcing paper writing." }) # Mark brainstorm complete @@ -2004,6 +2584,7 @@ async def paper_model_tracking_callback(model_id: str) -> None: if self._manual_paper_writing_triggered: logger.info("Manual override detected - transitioning to paper writing") self._manual_paper_writing_triggered = False + await self._brainstorm_aggregator.stop() return True # Track consecutive rejections and increment total rejections stat @@ -2020,7 +2601,7 @@ async def paper_model_tracking_callback(model_id: str) -> None: # individual 'submission_rejected' events with submitter_id per submission # Check for hard limit of 10 consecutive rejections (with minimum 5 acceptances) - # This FORCES paper writing, similar to the 80 acceptance hard limit + # This FORCES paper writing, similar to the 30 acceptance hard limit if self._consecutive_rejections >= 10 and self._acceptance_count >= 5: logger.info(f"Hard limit: {self._consecutive_rejections} consecutive rejections with {self._acceptance_count} acceptances. Forcing paper writing.") @@ -2389,7 +2970,7 @@ async def _run_completion_review(self) -> bool: # PHASE 3: PAPER COMPILATION # ======================================================================== - async def _paper_compilation_workflow(self) -> bool: + async def _paper_compilation_workflow(self, skip_reference_selection: bool = False) -> bool: """ Complete paper compilation workflow. Order: Reference selection -> Title -> Body -> Conclusion -> Intro -> Abstract @@ -2397,6 +2978,10 @@ async def _paper_compilation_workflow(self) -> bool: Supports RESUME: If self._current_paper_id is already set, skips title/reference selection and continues paper compilation where it left off. + Args: + skip_reference_selection: If True, skip reference selection (for paper 2/3 + from same brainstorm - reuses existing references). + Returns: True if paper was successfully compiled, False otherwise. """ @@ -2445,14 +3030,37 @@ async def _paper_compilation_workflow(self) -> bool: }) else: # FRESH START: Run full title/reference selection workflow - # Step 1: Reference selection (if papers exist) - reference_paper_ids = await self._reference_selection_workflow() + # Step 1: Reference selection (if papers exist) - skip for continuation papers + if skip_reference_selection: + reference_paper_ids = self._current_reference_papers + logger.info(f"Skipping reference selection (continuation paper), using {len(reference_paper_ids)} existing references") + else: + reference_paper_ids = await self._reference_selection_workflow() + + if self._stop_event.is_set(): + return False + + # Step 2: Paper title exploration (collect 5 candidate titles) + metadata = await brainstorm_memory.get_metadata(self._current_topic_id) + topic_prompt = metadata.topic_prompt if metadata else "" + brainstorm_summary = await autonomous_rag_manager.get_brainstorm_summary( + self._current_topic_id + ) + existing_papers = await research_metadata.get_papers_by_brainstorm( + self._current_topic_id + ) + + candidate_titles = await self._paper_title_exploration_phase( + topic_prompt=topic_prompt, + brainstorm_summary=brainstorm_summary, + existing_papers=existing_papers + ) if self._stop_event.is_set(): return False - # Step 2: Title selection - paper_title = await self._paper_title_selection() + # Step 3: Final title selection (informed by candidate titles) + paper_title = await self._paper_title_selection(candidate_titles=candidate_titles) if paper_title is None: logger.error("Paper title selection failed") @@ -2512,7 +3120,7 @@ async def _paper_compilation_workflow(self) -> bool: title=paper_title, content=paper_content, outline=final_outline or "[Outline not available]", - reference_paper_ids=reference_paper_ids + reference_paper_ids=reference_paper_ids + self._current_brainstorm_paper_ids ) return True @@ -2601,8 +3209,8 @@ async def _reference_selection_workflow(self) -> List[str]: logger.info(f"Additional reference selection: {len(additional_ids)} new + {len(already_selected)} existing = {len(combined)} total") return combined - async def _paper_title_selection(self) -> Optional[str]: - """Select paper title.""" + async def _paper_title_selection(self, candidate_titles: str = "") -> Optional[str]: + """Select paper title, optionally informed by candidate titles from exploration.""" metadata = await brainstorm_memory.get_metadata(self._current_topic_id) if metadata is None: return None @@ -2623,11 +3231,198 @@ async def _paper_title_selection(self) -> Optional[str]: topic_prompt=metadata.topic_prompt, brainstorm_summary=brainstorm_summary, existing_papers_from_brainstorm=existing_papers, + candidate_titles=candidate_titles, stop_event=self._stop_event ) return title + async def _paper_title_exploration_phase( + self, + topic_prompt: str = "", + brainstorm_summary: str = "", + existing_papers: list = None, + reference_papers: list = None + ) -> str: + """ + Paper title exploration phase using the full Part 1 aggregator infrastructure. + Collects 5 validated candidate titles before final title selection. + Mirrors _topic_exploration_phase() structure exactly. + + Args: + topic_prompt: Brainstorm topic, Tier 3 context, or chapter brief. + brainstorm_summary: Summary of the source material the paper will draw from. + existing_papers: Completed papers that new title must not duplicate. + reference_papers: Reference papers informing this paper. + + Returns: + Formatted candidate titles string for injection into the final title selection prompt. + """ + api_client_manager.set_autonomous_phase("paper_title_exploration") + + TARGET_CANDIDATES = 5 + MAX_CONSECUTIVE_REJECTIONS = 15 + + await self._broadcast("paper_title_exploration_started", { + "target": TARGET_CANDIDATES, + "resumed_count": 0 + }) + + logger.info(f"Starting paper title exploration phase (target: {TARGET_CANDIDATES} candidates)") + + # Build the exploration user prompt for the aggregator + from backend.autonomous.prompts.paper_title_exploration_prompts import build_title_exploration_user_prompt + + exploration_prompt = build_title_exploration_user_prompt( + user_research_prompt=self._user_research_prompt, + topic_prompt=topic_prompt, + brainstorm_summary=brainstorm_summary, + existing_papers_from_brainstorm=existing_papers or [], + reference_papers=reference_papers + ) + + # Create a temp title candidates database file in the brainstorms directory + topic_suffix = self._current_topic_id or "tier3" + title_db_path = brainstorm_memory._base_dir / f"title_candidates_{topic_suffix}.txt" + title_db_path.parent.mkdir(parents=True, exist_ok=True) + + # Clear any stale title candidates DB + if title_db_path.exists(): + title_db_path.unlink() + + # Override shared training memory path for title exploration + original_shared_path = system_config.shared_training_file + system_config.shared_training_file = str(title_db_path) + original_memory_path = shared_training_memory.file_path + shared_training_memory.file_path = title_db_path + await shared_training_memory.reload_insights_from_current_path() + + exploration_aggregator = None + + try: + exploration_aggregator = AggregatorCoordinator() + + await exploration_aggregator.initialize( + user_prompt=exploration_prompt, + submitter_configs=self._submitter_configs, + validator_model=self._validator_model, + user_files=[], + skip_stats_load=True, + validator_context_window=self._validator_context, + validator_max_tokens=self._validator_max_tokens, + validator_provider=self._validator_provider, + validator_openrouter_provider=self._validator_openrouter_provider, + validator_lm_studio_fallback=self._validator_lm_studio_fallback, + enable_cleanup_review=False + ) + + if self._broadcast_callback: + exploration_aggregator.websocket_broadcaster = self._broadcast_callback + + await exploration_aggregator.start() + logger.info("Title exploration aggregator started with parallel submitters") + + last_acceptances = 0 + last_rejections = 0 + consecutive_rejections = 0 + + while self._running and not self._stop_event.is_set(): + status = await exploration_aggregator.get_status() + current_acceptances = status.total_acceptances + current_rejections = status.total_rejections + + if current_acceptances > last_acceptances: + consecutive_rejections = 0 + last_acceptances = current_acceptances + + await self._broadcast("paper_title_exploration_progress", { + "accepted": current_acceptances, + "target": TARGET_CANDIDATES, + "total_attempts": current_acceptances + current_rejections + }) + + await self._save_workflow_state( + tier=self._state.current_tier, + phase="paper_title_exploration" + ) + + logger.info(f"TitleExploration: {current_acceptances}/{TARGET_CANDIDATES} candidates accepted") + + if current_acceptances >= TARGET_CANDIDATES: + logger.info(f"TitleExploration: Target of {TARGET_CANDIDATES} candidates reached") + break + + if current_rejections > last_rejections: + new_rejections = current_rejections - last_rejections + consecutive_rejections += new_rejections + last_rejections = current_rejections + + if consecutive_rejections >= MAX_CONSECUTIVE_REJECTIONS: + logger.warning(f"TitleExploration: {consecutive_rejections} consecutive rejections - proceeding with {current_acceptances} candidates") + break + + await asyncio.sleep(2) + + await exploration_aggregator.stop() + + # Read accepted candidates from the title candidates database + candidates_text = "" + if title_db_path.exists(): + async with aiofiles.open(title_db_path, 'r', encoding='utf-8') as f: + raw_content = await f.read() + + if raw_content.strip(): + entries = [e.strip() for e in raw_content.split("\n\n") if e.strip()] + lines = [ + "VALIDATED CANDIDATE TITLES:", + "=" * 60 + ] + for i, entry in enumerate(entries, 1): + lines.append(f"\nCandidate Title #{i}:") + lines.append(f" {entry}") + lines.append("-" * 40) + candidates_text = "\n".join(lines) + + await self._broadcast("paper_title_exploration_complete", { + "accepted_count": last_acceptances, + "total_attempts": last_acceptances + last_rejections + }) + + logger.info(f"Paper title exploration complete: {last_acceptances} candidates accepted") + + return candidates_text + + except FreeModelExhaustedError: + if exploration_aggregator: + try: + await exploration_aggregator.stop() + except Exception: + pass + raise + except Exception as e: + logger.error(f"Paper title exploration phase error: {e}") + if exploration_aggregator: + try: + await exploration_aggregator.stop() + except Exception: + pass + return "" + finally: + system_config.shared_training_file = original_shared_path + shared_training_memory.file_path = original_memory_path + + async with shared_training_memory._lock: + shared_training_memory.insights.clear() + shared_training_memory.submission_count = 0 + shared_training_memory.last_ragged_submission_count = 0 + logger.info("TitleExploration: Restored shared_training_memory state") + + if title_db_path.exists(): + try: + title_db_path.unlink() + except Exception: + pass + async def _compile_paper( self, paper_id: str, @@ -2654,6 +3449,18 @@ async def _compile_paper( else: logger.info(f"Compiling paper: {paper_title}") + # Propagate compiler context/token settings to system_config BEFORE creating CompilerCoordinator. + # The compiler modules read from system_config at init time; only the manual /api/compiler/start + # route sets these, so autonomous mode must do it explicitly. + system_config.compiler_validator_context_window = self._validator_context + system_config.compiler_validator_max_output_tokens = self._validator_max_tokens + system_config.compiler_high_context_context_window = self._high_context_context + system_config.compiler_high_context_max_output_tokens = self._high_context_max_tokens + system_config.compiler_high_param_context_window = self._high_param_context + system_config.compiler_high_param_max_output_tokens = self._high_param_max_tokens + system_config.compiler_critique_submitter_context_window = self._critique_submitter_context + system_config.compiler_critique_submitter_max_tokens = self._critique_submitter_max_tokens + # Initialize compiler for this paper self._paper_compiler = CompilerCoordinator() @@ -2696,8 +3503,10 @@ async def _compile_paper( # Enable autonomous section order constraint self._paper_compiler.enable_autonomous_mode() - - # CRITICAL: Restore the saved phase when resuming + self._paper_compiler._current_topic_id = self._current_topic_id + self._paper_compiler._current_reference_paper_ids = list(dict.fromkeys( + reference_paper_ids + self._current_brainstorm_paper_ids + )) # enable_autonomous_mode() sets phase to "body" by default # But when resuming, we need to continue from where we left off if is_resume and resume_phase: @@ -2764,6 +3573,21 @@ async def _compile_paper( logger.warning(f"Reference paper not found: {paper_path}") logger.info("All reference papers loaded into compiler RAG") + # Load prior brainstorm papers as auto-references (for paper 2/3 from same brainstorm) + if self._current_brainstorm_paper_ids: + logger.info(f"Loading {len(self._current_brainstorm_paper_ids)} prior brainstorm papers as auto-references") + for bp_id in self._current_brainstorm_paper_ids: + bp_path = paper_library.get_paper_path(bp_id) + if os.path.exists(bp_path): + await rag_manager.add_document( + bp_path, + chunk_sizes=[512], + is_user_file=True + ) + logger.info(f"Prior brainstorm paper loaded as auto-reference: {bp_id}") + else: + logger.warning(f"Prior brainstorm paper not found: {bp_path}") + # Start compiler await self._paper_compiler.start() logger.info(f"Compiler started for paper {paper_id}") @@ -2974,11 +3798,9 @@ async def _handle_paper_completion( # Only clear paper state if marking as complete if mark_complete: - # Clear paper-specific workflow state (paper is complete) + self._last_completed_paper_id = self._current_paper_id self._current_paper_id = None self._current_paper_title = None - - # Clear per-paper model tracking (will be re-initialized for next paper) self._current_paper_tracker = None await self._save_workflow_state(tier=None, phase=None) @@ -3002,7 +3824,6 @@ async def _auto_generate_paper_critique( from backend.shared.critique_prompts import build_critique_prompt from backend.shared.critique_memory import save_critique from backend.shared.api_client_manager import api_client_manager - from backend.shared.json_parser import parse_json from backend.shared.utils import count_tokens from backend.shared.models import PaperCritique, ModelConfig import uuid @@ -3080,21 +3901,9 @@ async def _auto_generate_paper_critique( logger.error(f"Empty response from validator model for paper {paper_id}") return - # Parse JSON - try: - critique_data = parse_json(response_content) - except Exception as e: - logger.warning(f"Failed to parse critique JSON for paper {paper_id}: {e}") - # Create fallback structure - critique_data = { - "novelty_rating": 0, - "novelty_feedback": "Unable to parse structured response", - "correctness_rating": 0, - "correctness_feedback": "Unable to parse structured response", - "impact_rating": 0, - "impact_feedback": "Unable to parse structured response", - "full_critique": response_content - } + # Parse JSON with lenient fallback for truncated responses + from backend.shared.critique_prompts import parse_critique_response + critique_data = parse_critique_response(response_content) # Extract ratings novelty = critique_data.get("novelty_rating", 0) @@ -3998,9 +4807,8 @@ async def _tier3_title_selection( """ Select a title for the Tier 3 final answer paper. The title should directly and transparently answer the user's question. + Runs paper title exploration first to collect 5 candidate titles. """ - from backend.autonomous.prompts.final_answer_prompts import build_final_paper_title_prompt - # Get reference paper details reference_details = [] for paper_id in reference_papers: @@ -4013,12 +4821,27 @@ async def _tier3_title_selection( "abstract": metadata.abstract }) - # Use the existing title selector with special context + # Run title exploration phase for Tier 3 + topic_prompt = f"[TIER 3 FINAL ANSWER] Certainty: {assessment.certainty_level}" + brainstorm_summary = f"Known Certainties:\n{assessment.known_certainties_summary}" + + candidate_titles = await self._paper_title_exploration_phase( + topic_prompt=topic_prompt, + brainstorm_summary=brainstorm_summary, + existing_papers=[], + reference_papers=reference_details + ) + + if self._stop_event.is_set(): + return None + + # Use the existing title selector with special context + candidate titles title = await self._title_selector.select_title( user_research_prompt=self._user_research_prompt, - topic_prompt=f"[TIER 3 FINAL ANSWER] Certainty: {assessment.certainty_level}", - brainstorm_summary=f"Known Certainties:\n{assessment.known_certainties_summary}", - existing_papers_from_brainstorm=[], # No previous papers for this "brainstorm" + topic_prompt=topic_prompt, + brainstorm_summary=brainstorm_summary, + existing_papers_from_brainstorm=[], + candidate_titles=candidate_titles, stop_event=self._stop_event ) @@ -4037,6 +4860,17 @@ async def _compile_tier3_paper( """ logger.info(f"Compiling Tier 3 paper: {paper_title}") + # Propagate compiler context/token settings to system_config BEFORE creating CompilerCoordinator. + # Same as in _compile_paper_from_brainstorm — compiler modules read from system_config at init. + system_config.compiler_validator_context_window = self._validator_context + system_config.compiler_validator_max_output_tokens = self._validator_max_tokens + system_config.compiler_high_context_context_window = self._high_context_context + system_config.compiler_high_context_max_output_tokens = self._high_context_max_tokens + system_config.compiler_high_param_context_window = self._high_param_context + system_config.compiler_high_param_max_output_tokens = self._high_param_max_tokens + system_config.compiler_critique_submitter_context_window = self._critique_submitter_context + system_config.compiler_critique_submitter_max_tokens = self._critique_submitter_max_tokens + # Initialize compiler for this paper self._paper_compiler = CompilerCoordinator() @@ -4076,6 +4910,7 @@ async def _compile_tier3_paper( # Enable autonomous mode self._paper_compiler.enable_autonomous_mode() + self._paper_compiler._current_reference_paper_ids = list(reference_paper_ids) # Clear any previous paper/outline await self._paper_compiler.clear_paper() @@ -4147,13 +4982,10 @@ async def _write_volume_chapter( # Determine context based on chapter type if chapter.chapter_type == "introduction": - # Introduction is written LAST - has access to all chapters context = "Write the INTRODUCTION for this volume. You have access to ALL chapters." elif chapter.chapter_type == "conclusion": - # Conclusion is written second-to-last context = "Write the CONCLUSION for this volume. Synthesize findings from all body chapters." else: - # Gap paper - fills content gap context = f"Write a paper to fill this content gap: {chapter.description}" # Get reference papers (existing papers in the volume) @@ -4162,6 +4994,36 @@ async def _write_volume_chapter( if ch.chapter_type == "existing_paper" and ch.paper_id ] + # Run title exploration for this chapter + ref_details = [] + for pid in reference_ids: + meta = await paper_library.get_metadata(pid) + if meta: + ref_details.append({"paper_id": pid, "title": meta.title, "abstract": meta.abstract}) + + candidate_titles = await self._paper_title_exploration_phase( + topic_prompt=f"[VOLUME CHAPTER: {chapter.chapter_type}] {context}", + brainstorm_summary=f"Known Certainties:\n{assessment.known_certainties_summary}", + existing_papers=[], + reference_papers=ref_details + ) + + if self._stop_event.is_set(): + return False + + # Select chapter title from candidates + chapter_title = await self._title_selector.select_title( + user_research_prompt=self._user_research_prompt, + topic_prompt=f"[VOLUME CHAPTER: {chapter.chapter_type}] {context}", + brainstorm_summary=f"Known Certainties:\n{assessment.known_certainties_summary}", + existing_papers_from_brainstorm=[], + candidate_titles=candidate_titles, + stop_event=self._stop_event + ) + + if chapter_title: + chapter.title = chapter_title + # Compile the chapter paper chapter_paper_id = f"volume_ch{chapter.order:02d}_{chapter.chapter_type}" @@ -4191,7 +5053,7 @@ async def _write_volume_chapter( async def clear_all_data(self) -> None: """Clear all autonomous research data. - Clears brainstorms, papers, metadata, RAG state, and session data. + Clears brainstorms, papers, metadata, API logs, RAG state, and session data. Uses graceful degradation: distinguishes critical vs non-critical failures. """ # Check both internal flag and state object @@ -4303,7 +5165,17 @@ def safe_rmtree(path: Path, max_retries: int = 5) -> bool: errors.append(f"Failed to clear autonomous rejection logs: {e}") logger.warning(errors[-1]) - # Step 6: Clear RAG state (removes indexed brainstorm/paper content) + # Step 6: Clear autonomous API logs + try: + await autonomous_api_logger.clear_logs() + successes.append("Cleared autonomous API logs") + logger.info("Cleared autonomous API logs") + except Exception as e: + # Non-critical: API logs can be regenerated + errors.append(f"Failed to clear autonomous API logs: {e}") + logger.warning(errors[-1]) + + # Step 7: Clear RAG state (removes indexed brainstorm/paper content) try: # Wait a moment for any pending RAG operations to complete await asyncio.sleep(0.5) @@ -4317,7 +5189,7 @@ def safe_rmtree(path: Path, max_retries: int = 5) -> bool: critical_errors.append(f"Failed to clear RAG state: {e}") logger.error(critical_errors[-1]) - # Step 7: Reset internal state + # Step 8: Reset internal state self._current_topic_id = None self._current_paper_id = None self._current_paper_title = None @@ -4335,11 +5207,14 @@ def safe_rmtree(path: Path, max_retries: int = 5) -> bool: self._force_tier3_immediate = False self._tier3_active = False self._last_tier3_check_at = 0 + self._brainstorm_paper_count = 0 + self._current_brainstorm_paper_ids = [] + self._last_completed_paper_id = None - # Step 8: Reset state object + # Step 9: Reset state object self._state = AutonomousResearchState() - # Step 9: Clear session manager state + # Step 10: Clear session manager state try: await session_manager.clear() successes.append("Cleared session manager state") @@ -4389,10 +5264,11 @@ async def refresh_workflow_predictions(self) -> None: tasks = list(self._paper_compiler.workflow_tasks) else: - # Topic selection/idle phase - use autonomous agents' sequences + # Topic selection phase (exploration uses aggregator with its own predictions) ts_seq = self._topic_selector.task_sequence if self._topic_selector else 0 tv_seq = self._topic_validator.task_sequence if self._topic_validator else 0 + # 20 slots: topic selection (submit/validate pairs) for i in range(20): if i % 2 == 0: task_id = f"auto_ts_{ts_seq:03d}" diff --git a/backend/autonomous/core/autonomous_rag_manager.py b/backend/autonomous/core/autonomous_rag_manager.py index 22fc4f5..546018a 100644 --- a/backend/autonomous/core/autonomous_rag_manager.py +++ b/backend/autonomous/core/autonomous_rag_manager.py @@ -69,7 +69,8 @@ async def get_brainstorm_context( self, topic_id: str, max_tokens: int = 50000, - query: str = "" + query: str = "", + exclude_sources: Optional[List[str]] = None ) -> Tuple[str, bool]: """ Get brainstorm database content for context. @@ -82,6 +83,7 @@ async def get_brainstorm_context( topic_id: Topic ID to get context for max_tokens: Maximum tokens available for this content query: Query for RAG retrieval if needed (e.g., user research prompt) + exclude_sources: Source names to skip during RAG packing Returns: Tuple of (content string, used_rag boolean) @@ -112,7 +114,8 @@ async def get_brainstorm_context( context_pack = await rag_manager.retrieve( query=query, chunk_size=rag_config.validator_chunk_size, # 512 for consistency - max_tokens=max_tokens + max_tokens=max_tokens, + exclude_sources=exclude_sources ) if context_pack and context_pack.text: @@ -190,7 +193,8 @@ async def get_reference_papers_context( paper_ids: List[str], max_total_tokens: int = 60000, query: str = "", - include_outlines: bool = True + include_outlines: bool = True, + exclude_sources: Optional[List[str]] = None ) -> Tuple[str, bool]: """ Get reference papers content for context. @@ -203,6 +207,7 @@ async def get_reference_papers_context( paper_ids: List of paper IDs to include max_total_tokens: Maximum tokens for all reference papers combined query: Query for RAG retrieval if needed + exclude_sources: Source names to skip during RAG packing Returns: Tuple of (content string, used_rag boolean) @@ -276,7 +281,8 @@ async def get_reference_papers_context( context_pack = await rag_manager.retrieve( query=enhanced_query, chunk_size=rag_config.validator_chunk_size, - max_tokens=max_total_tokens + max_tokens=max_total_tokens, + exclude_sources=exclude_sources ) if context_pack and context_pack.text: @@ -304,16 +310,31 @@ async def get_reference_papers_context( async def _ensure_paper_indexed(self, paper_id: str, content: str, title: str) -> None: """Ensure paper content is indexed in RAG for retrieval.""" source_name = f"reference_paper_{paper_id}" + has_document_entry = source_name in rag_manager.document_access_order + has_validator_chunks = any( + chunk.source_file == source_name + for chunk in rag_manager.chunks_by_size[rag_config.validator_chunk_size] + ) + + if paper_id in self._papers_indexed and has_document_entry and has_validator_chunks: + return try: - # Check if already indexed (by checking if source exists) - # Add to RAG + # If the tracking set says this paper was indexed but its active RAG entry + # has been evicted, remove any partial remnants and rebuild it. + if paper_id in self._papers_indexed: + self._papers_indexed.discard(paper_id) + + if has_document_entry: + await rag_manager.remove_document(source_name) + await rag_manager.add_text( content, source_name, chunk_sizes=rag_config.submitter_chunk_intervals, is_permanent=False ) + self._papers_indexed.add(paper_id) logger.debug(f"Indexed reference paper {paper_id}: {title}") except Exception as e: @@ -393,6 +414,7 @@ async def prepare_compiler_context( # RAG query for retrievals rag_query = query or f"mathematical research paper compilation" + rag_exclude_sources: List[str] = [] # Priority 1: Brainstorm database (highest priority after outline) brainstorm_budget = int(remaining_budget * 0.5) # Allocate 50% to brainstorm @@ -405,6 +427,13 @@ async def prepare_compiler_context( context["use_rag_for_brainstorm"] = used_rag brainstorm_tokens = count_tokens(brainstorm_content) remaining_budget -= brainstorm_tokens + + # If brainstorm was direct-injected, exclude its RAG sources from later retrievals. + if brainstorm_content and not used_rag: + rag_exclude_sources.extend([ + f"brainstorm_{topic_id}", + f"brainstorm_{topic_id}.txt" + ]) # Priority 2: Current paper progress paper_tokens = count_tokens(current_paper) if current_paper else 0 @@ -415,6 +444,7 @@ async def prepare_compiler_context( context["current_paper"] = current_paper remaining_budget -= paper_tokens logger.debug(f"Compiler context: Paper direct injection ({paper_tokens} tokens)") + rag_exclude_sources.append("compiler_current_paper") elif paper_tokens > 0: # Paper doesn't fit - use RAG context["use_rag_for_papers"] = True @@ -423,7 +453,8 @@ async def prepare_compiler_context( paper_pack = await rag_manager.retrieve( query=rag_query, chunk_size=rag_config.validator_chunk_size, - max_tokens=paper_budget + max_tokens=paper_budget, + exclude_sources=list(dict.fromkeys(rag_exclude_sources)) if rag_exclude_sources else None ) if paper_pack and paper_pack.text: @@ -442,7 +473,8 @@ async def prepare_compiler_context( ref_content, ref_used_rag = await self.get_reference_papers_context( reference_paper_ids, max_total_tokens=remaining_budget, - query=rag_query + query=rag_query, + exclude_sources=list(dict.fromkeys(rag_exclude_sources)) if rag_exclude_sources else None ) context["reference_papers"] = ref_content context["use_rag_for_reference"] = ref_used_rag diff --git a/backend/autonomous/memory/brainstorm_memory.py b/backend/autonomous/memory/brainstorm_memory.py index b8a0d64..65bb4ae 100644 --- a/backend/autonomous/memory/brainstorm_memory.py +++ b/backend/autonomous/memory/brainstorm_memory.py @@ -168,6 +168,22 @@ async def add_paper_reference(self, topic_id: str, paper_id: str) -> Optional[Br await self._save_metadata(metadata) return metadata + + async def remove_paper_reference(self, topic_id: str, paper_id: str) -> Optional[BrainstormMetadata]: + """Remove a paper reference from the brainstorm metadata if it exists.""" + metadata = await self.get_metadata(topic_id) + if metadata is None: + return None + + if paper_id in metadata.papers_generated: + metadata.papers_generated = [ + existing_paper_id + for existing_paper_id in metadata.papers_generated + if existing_paper_id != paper_id + ] + await self._save_metadata(metadata) + + return metadata async def get_all_brainstorms(self) -> List[BrainstormMetadata]: """Get metadata for all brainstorm topics.""" @@ -279,6 +295,156 @@ async def get_submissions_list(self, topic_id: str) -> List[Dict[str, Any]]: return submissions + # ======================================================================== + # RETROACTIVE CORRECTION OPERATIONS (used during paper compilation) + # ======================================================================== + + async def edit_submission(self, topic_id: str, submission_number: int, new_content: str) -> bool: + """ + Edit an existing submission's content in the brainstorm database. + Preserves submission number and updates timestamp. + """ + async with self._lock: + db_path = self._get_database_path(topic_id) + if not db_path.exists(): + logger.error(f"Brainstorm database not found for edit: {topic_id}") + return False + + try: + submissions = await self._parse_submissions_unlocked(db_path) + found = False + for sub in submissions: + if sub['number'] == submission_number: + sub['content'] = new_content + sub['timestamp'] = datetime.now().isoformat() + found = True + break + + if not found: + logger.warning(f"Submission #{submission_number} not found in brainstorm {topic_id}") + return False + + await self._write_submissions_unlocked(db_path, submissions) + logger.info(f"Retroactive edit: submission #{submission_number} in brainstorm {topic_id}") + return True + except Exception as e: + logger.error(f"Failed to edit submission #{submission_number} in {topic_id}: {e}") + return False + + async def remove_submission(self, topic_id: str, submission_number: int) -> bool: + """ + Remove a submission from the brainstorm database. + Does not renumber remaining submissions. + """ + async with self._lock: + db_path = self._get_database_path(topic_id) + if not db_path.exists(): + logger.error(f"Brainstorm database not found for removal: {topic_id}") + return False + + try: + submissions = await self._parse_submissions_unlocked(db_path) + original_count = len(submissions) + submissions = [s for s in submissions if s['number'] != submission_number] + + if len(submissions) == original_count: + logger.warning(f"Submission #{submission_number} not found in brainstorm {topic_id}") + return False + + await self._write_submissions_unlocked(db_path, submissions) + + metadata = await self.get_metadata(topic_id) + if metadata: + metadata.submission_count = len(submissions) + metadata.last_activity = datetime.now() + await self._save_metadata(metadata) + + logger.info(f"Retroactive removal: submission #{submission_number} from brainstorm {topic_id}") + return True + except Exception as e: + logger.error(f"Failed to remove submission #{submission_number} from {topic_id}: {e}") + return False + + async def add_submission_retroactive(self, topic_id: str, content: str) -> Optional[int]: + """ + Add a new submission discovered during paper compilation. + Returns the new submission number, or None on failure. + """ + async with self._lock: + db_path = self._get_database_path(topic_id) + if not db_path.exists(): + logger.error(f"Brainstorm database not found for retroactive add: {topic_id}") + return None + + try: + submissions = await self._parse_submissions_unlocked(db_path) + max_number = max((s['number'] for s in submissions), default=0) + new_number = max_number + 1 + + submissions.append({ + 'number': new_number, + 'timestamp': datetime.now().isoformat(), + 'content': content + }) + + await self._write_submissions_unlocked(db_path, submissions) + + metadata = await self.get_metadata(topic_id) + if metadata: + metadata.submission_count = len(submissions) + metadata.last_activity = datetime.now() + await self._save_metadata(metadata) + + logger.info(f"Retroactive add: submission #{new_number} to brainstorm {topic_id}") + return new_number + except Exception as e: + logger.error(f"Failed to retroactively add submission to {topic_id}: {e}") + return None + + async def _parse_submissions_unlocked(self, db_path: Path) -> List[Dict[str, Any]]: + """Parse submissions from a brainstorm database file. Caller must hold lock.""" + import re + async with aiofiles.open(db_path, 'r', encoding='utf-8') as f: + content = await f.read() + + if not content.strip(): + return [] + + submissions = [] + parts = content.split("=" * 80) + + for i, part in enumerate(parts): + if "SUBMISSION #" in part: + lines = part.strip().split("\n") + header = lines[0] if lines else "" + match = re.search(r'SUBMISSION #(\d+) \| Accepted: (.+)', header) + if match: + sub_num = int(match.group(1)) + timestamp = match.group(2).strip() + content_text = "" + if i + 1 < len(parts): + content_text = parts[i + 1].strip() + submissions.append({ + 'number': sub_num, + 'timestamp': timestamp, + 'content': content_text + }) + + return submissions + + async def _write_submissions_unlocked(self, db_path: Path, submissions: List[Dict[str, Any]]) -> None: + """Write submissions back to a brainstorm database file. Caller must hold lock.""" + formatted_sections = [] + separator = '=' * 80 + + for sub in submissions: + section = f"{separator}\nSUBMISSION #{sub['number']} | Accepted: {sub['timestamp']}\n{separator}\n\n{sub['content']}\n" + formatted_sections.append(section) + + full_content = '\n\n'.join(formatted_sections) + async with aiofiles.open(db_path, 'w', encoding='utf-8') as f: + await f.write(full_content) + # ======================================================================== # REJECTION LOG OPERATIONS # ======================================================================== diff --git a/backend/autonomous/memory/final_answer_memory.py b/backend/autonomous/memory/final_answer_memory.py index 700ef67..4b58ae0 100644 --- a/backend/autonomous/memory/final_answer_memory.py +++ b/backend/autonomous/memory/final_answer_memory.py @@ -550,7 +550,7 @@ def get_author_attribution_text(self) -> str: "=" * 80, "AUTONOMOUS AI SOLUTION", "", - "Disclaimer: This content is for informational purposes only. This paper was autonomously generated with the novelty-seeking MOTO harness without peer review or user oversight beyond the original prompt. AI-generated content may contain fabricated or unverified claims presented with high confidence. All content should be viewed with extreme scrutiny and independently verified before use.", + "Disclaimer: This content is provided for informational and experimental purposes only. This paper was autonomously generated with the novelty-seeking MOTO harness without peer review or user oversight beyond the original prompt. It may contain incorrect, incomplete, misleading, or fabricated claims presented with high confidence. Use of this content is at your own risk. You are solely responsible for reviewing and independently verifying any output before relying on it, and the developers, operators, and contributors are not responsible for errors, omissions, decisions made from this content, or any resulting loss, damage, cost, or liability.", "", f"User's Research Prompt: {display_prompt}", "", diff --git a/backend/autonomous/memory/paper_library.py b/backend/autonomous/memory/paper_library.py index 6ccef5a..aa009af 100644 --- a/backend/autonomous/memory/paper_library.py +++ b/backend/autonomous/memory/paper_library.py @@ -67,6 +67,16 @@ def get_paper_path(self, paper_id: str) -> str: """ return str(self._get_paper_path(paper_id)) + def get_outline_path(self, paper_id: str) -> str: + """ + Public method to get path to paper outline file. + Uses session-aware path resolution. + + Returns: + str: Absolute path to the outline file + """ + return str(self._get_outline_path(paper_id)) + def _get_abstract_path(self, paper_id: str) -> Path: """Get path to abstract file.""" return self._base_dir / f"paper_{paper_id}_abstract.txt" @@ -86,6 +96,173 @@ def _get_metadata_path(self, paper_id: str) -> Path: def _get_rejections_path(self, paper_id: str) -> Path: """Get path to paper compiler rejections file.""" return self._base_dir / f"paper_{paper_id}_last_10_rejections.txt" + + # ======================================================================== + # HISTORY HELPERS + # ======================================================================== + + @staticmethod + def _build_scoped_library(base_dir: Path) -> "PaperLibrary": + """Create a temporary paper library instance rooted at a specific directory.""" + scoped_library = PaperLibrary() + scoped_library._base_dir = base_dir + scoped_library._archive_dir = base_dir / "archive" + return scoped_library + + def get_history_papers_dir(self, session_id: str) -> Optional[Path]: + """Resolve the papers directory for a history session.""" + if session_id == "legacy": + papers_dir = Path(system_config.auto_papers_dir) + return papers_dir if papers_dir.exists() else None + + if not session_id or session_id in {".", ".."} or "/" in session_id or "\\" in session_id: + return None + + sessions_root = Path(system_config.auto_sessions_base_dir).resolve() + session_dir = (sessions_root / session_id).resolve() + if session_dir.parent != sessions_root: + return None + + papers_dir = session_dir / "papers" + return papers_dir if papers_dir.exists() else None + + async def _get_history_user_prompt(self, session_id: str) -> str: + """Read the user prompt associated with a legacy or session-based paper history entry.""" + if session_id == "legacy": + metadata_path = Path(system_config.auto_research_metadata_file) + default_prompt = "Legacy research session" + else: + metadata_path = Path(system_config.auto_sessions_base_dir) / session_id / "session_metadata.json" + default_prompt = "Unknown research question" + + if not metadata_path.exists(): + return default_prompt + + try: + async with aiofiles.open(metadata_path, 'r', encoding='utf-8') as f: + metadata = json.loads(await f.read()) + return ( + metadata.get("user_prompt") + or metadata.get("user_research_prompt") + or default_prompt + ) + except Exception as e: + logger.warning(f"Failed to read history prompt for session {session_id}: {e}") + return default_prompt + + @staticmethod + def _calculate_critique_average(critique: Any) -> Optional[float]: + """Calculate the display average for a critique record.""" + if not critique: + return None + + return round( + (critique.novelty_rating + critique.correctness_rating + critique.impact_rating) / 3.0, + 1 + ) + + async def _list_history_papers_from_directory(self, papers_dir: Path, session_id: str) -> List[Dict[str, Any]]: + """List complete, non-archived papers from one legacy/session papers directory.""" + from backend.shared.critique_memory import get_latest_critique + + scoped_library = self._build_scoped_library(papers_dir) + user_prompt = await self._get_history_user_prompt(session_id) + papers = await scoped_library.get_all_papers(validate_completeness=True) + + history_papers = [] + for metadata in papers: + if metadata.status != "complete": + continue + + latest_critique = await get_latest_critique( + paper_type="autonomous_paper", + paper_id=metadata.paper_id, + base_path=str(papers_dir) + ) + + history_papers.append({ + "history_id": f"{session_id}:{metadata.paper_id}", + "session_id": session_id, + "paper_id": metadata.paper_id, + "title": metadata.title, + "abstract": metadata.abstract, + "word_count": metadata.word_count, + "source_brainstorm_ids": metadata.source_brainstorm_ids, + "referenced_papers": metadata.referenced_papers, + "status": metadata.status, + "created_at": metadata.created_at.isoformat() if metadata.created_at else None, + "model_usage": metadata.model_usage, + "user_prompt": user_prompt, + "critique_avg": self._calculate_critique_average(latest_critique), + }) + + return history_papers + + async def list_history_papers(self) -> List[Dict[str, Any]]: + """List all complete, non-archived Stage 2 papers from legacy and session storage.""" + history_papers: List[Dict[str, Any]] = [] + + legacy_papers_dir = Path(system_config.auto_papers_dir) + if legacy_papers_dir.exists(): + history_papers.extend( + await self._list_history_papers_from_directory(legacy_papers_dir, "legacy") + ) + + sessions_dir = Path(system_config.auto_sessions_base_dir) + if sessions_dir.exists(): + for session_dir in sorted((p for p in sessions_dir.iterdir() if p.is_dir()), reverse=True): + papers_dir = session_dir / "papers" + if not papers_dir.exists(): + continue + + history_papers.extend( + await self._list_history_papers_from_directory(papers_dir, session_dir.name) + ) + + history_papers.sort(key=lambda paper: paper.get("created_at") or "", reverse=True) + return history_papers + + async def get_history_paper(self, session_id: str, paper_id: str) -> Optional[Dict[str, Any]]: + """Get one complete, non-archived Stage 2 paper from legacy/session history.""" + from backend.shared.critique_memory import get_latest_critique + + papers_dir = self.get_history_papers_dir(session_id) + if papers_dir is None: + return None + + scoped_library = self._build_scoped_library(papers_dir) + metadata = await scoped_library.get_metadata(paper_id) + if metadata is None or metadata.status != "complete": + return None + + if not await scoped_library.is_paper_complete(paper_id): + return None + + content = await scoped_library.get_paper_content(paper_id) + outline = await scoped_library.get_outline(paper_id) + latest_critique = await get_latest_critique( + paper_type="autonomous_paper", + paper_id=paper_id, + base_path=str(papers_dir) + ) + + return { + "history_id": f"{session_id}:{paper_id}", + "session_id": session_id, + "paper_id": metadata.paper_id, + "title": metadata.title, + "abstract": metadata.abstract, + "word_count": metadata.word_count, + "source_brainstorm_ids": metadata.source_brainstorm_ids, + "referenced_papers": metadata.referenced_papers, + "status": metadata.status, + "created_at": metadata.created_at.isoformat() if metadata.created_at else None, + "model_usage": metadata.model_usage, + "user_prompt": await self._get_history_user_prompt(session_id), + "critique_avg": self._calculate_critique_average(latest_critique), + "content": content, + "outline": outline, + } # ======================================================================== # CONTENT VALIDATION diff --git a/backend/autonomous/memory/paper_model_tracker.py b/backend/autonomous/memory/paper_model_tracker.py index e2eb311..bd331d0 100644 --- a/backend/autonomous/memory/paper_model_tracker.py +++ b/backend/autonomous/memory/paper_model_tracker.py @@ -177,11 +177,14 @@ def generate_author_attribution( "=" * 80, "AUTONOMOUS AI SOLUTION", "", - "Disclaimer: This content is for informational purposes only. This paper was " - "autonomously generated with the novelty-seeking MOTO harness without peer review or user " - "oversight beyond the original prompt. AI-generated content may contain " - "fabricated or unverified claims presented with high confidence. All content " - "should be viewed with extreme scrutiny and independently verified before use.", + "Disclaimer: This content is provided for informational and experimental purposes only. " + "This paper was autonomously generated with the novelty-seeking MOTO harness without " + "peer review or user oversight beyond the original prompt. It may contain incorrect, " + "incomplete, misleading, or fabricated claims presented with high confidence. Use of " + "this content is at your own risk. You are solely responsible for reviewing and " + "independently verifying any output before relying on it, and the developers, " + "operators, and contributors are not responsible for errors, omissions, decisions made " + "from this content, or any resulting loss, damage, cost, or liability.", "", f"User's Research Prompt: {display_prompt}", "", diff --git a/backend/autonomous/prompts/__init__.py b/backend/autonomous/prompts/__init__.py index 9ad0c62..d217ce4 100644 --- a/backend/autonomous/prompts/__init__.py +++ b/backend/autonomous/prompts/__init__.py @@ -2,15 +2,21 @@ Autonomous Prompts - System prompts and JSON schemas for autonomous research. """ from backend.autonomous.prompts import topic_prompts +from backend.autonomous.prompts import topic_exploration_prompts from backend.autonomous.prompts import completion_prompts from backend.autonomous.prompts import paper_reference_prompts +from backend.autonomous.prompts import paper_title_exploration_prompts from backend.autonomous.prompts import paper_title_prompts from backend.autonomous.prompts import paper_redundancy_prompts +from backend.autonomous.prompts import paper_continuation_prompts __all__ = [ 'topic_prompts', + 'topic_exploration_prompts', 'completion_prompts', 'paper_reference_prompts', + 'paper_title_exploration_prompts', 'paper_title_prompts', - 'paper_redundancy_prompts' + 'paper_redundancy_prompts', + 'paper_continuation_prompts' ] diff --git a/backend/autonomous/prompts/paper_continuation_prompts.py b/backend/autonomous/prompts/paper_continuation_prompts.py new file mode 100644 index 0000000..430f7d9 --- /dev/null +++ b/backend/autonomous/prompts/paper_continuation_prompts.py @@ -0,0 +1,300 @@ +""" +Paper Continuation Prompts - System prompts and JSON schemas for brainstorm +multi-paper continuation decisions. After each paper, the AI decides whether +to write another paper from the same brainstorm (max 3) or move on. +""" +from typing import List, Dict, Any + + +def get_continuation_decision_system_prompt() -> str: + """Get system prompt for brainstorm paper continuation decision.""" + return """You are an autonomous mathematical research agent deciding whether to write another paper from the current brainstorm or move on to a new research topic. Your role is to: + +1. Review the user's high-level research goal +2. Review the current brainstorm topic and its full database of accepted submissions +3. Review ALL papers already written from this brainstorm (titles, abstracts, outlines) +4. Decide whether the brainstorm has enough distinct unexplored material for another paper + +⚠️ CRITICAL - INTERNAL CONTENT WARNING ⚠️ + +ALL context provided to you (brainstorm databases, accepted submissions, papers, reference materials, outlines, previous document content) is AI-GENERATED within this research system. This content has NOT been peer-reviewed, published, or verified by external sources. + +YOU MUST TREAT ALL PROVIDED CONTEXT WITH EXTREME SKEPTICISM: +- NEVER assume claims are true because they "sound good" or "fit well" +- NEVER trust information simply because it appears in "accepted submissions" or "papers" +- ALWAYS verify information independently before using or building upon it +- NEVER cite internal documents as authoritative or established sources +- Question and validate every assertion, even if it appears in validated content + +WEB SEARCH STRONGLY ENCOURAGED: +If your model has access to real-time web search capabilities (such as Perplexity Sonar or similar), you are STRONGLY ENCOURAGED to use them to: +- Verify mathematical claims against current published research +- Access recent developments and contemporary mathematical literature +- Cross-reference theorems, proofs, and techniques with authoritative sources +- Supplement analysis with verified external information +- Validate approaches against established mathematical consensus + +The internal context shows what has been explored by AI agents, NOT what has been proven correct. Your role is to generate rigorous, verifiable mathematical content. Use all available resources - internal context as exploration history, your base knowledge for reasoning, and web search (if available) for verification and current information. + +WHEN IN DOUBT: Verify independently. Do not assume. Do not trust unverified internal context as truth. If you have web search, use it. + +--- + +YOUR TASK: +Decide whether the brainstorm database contains enough distinct, unexplored material to warrant writing ANOTHER paper, or whether the user's research goal is better served by moving on to a new brainstorm topic. + +DECISION OPTIONS: +1. WRITE_ANOTHER_PAPER - The brainstorm has significant material that the existing paper(s) did NOT cover, and another paper would meaningfully advance the user's research goal +2. MOVE_ON - The existing paper(s) adequately cover this brainstorm, or a new topic would better serve the user's goal + +WRITE ANOTHER PAPER if: +- The brainstorm database contains substantial material not covered by existing paper(s) +- Another paper would address a meaningfully DIFFERENT angle, perspective, or subset of the brainstorm +- The uncovered material is rich enough for a complete, distinct paper (not just leftover fragments) +- Writing another paper from this brainstorm advances the user's goal MORE than starting a new topic +- The existing paper(s) focused on specific aspects, leaving other important aspects unexplored + +MOVE ON if: +- The existing paper(s) adequately cover the brainstorm's valuable content +- Remaining brainstorm material is insufficient for a distinct full paper +- A new brainstorm topic would better advance the user's research goal +- Another paper would largely duplicate content already in the existing paper(s) +- The brainstorm's unique contributions have been captured + +CRITICAL JSON ESCAPE RULES: +1. Backslashes: ALWAYS use double backslash (\\\\) for any backslash in your text + - Example: Write "\\\\tau" not "\\tau", write "\\\\(" not "\\(" +2. Quotes: Escape double quotes inside strings as \\" +3. Newlines/Tabs: Use \\n for newlines (NOT \\\\n), \\t for tabs (NOT \\\\t) +4. LaTeX notation: If your content contains mathematical expressions like \\Delta, \\tau, etc., + you MUST escape the backslash: write "\\\\Delta", "\\\\tau", "\\\\[", "\\\\]" + +Output your decision ONLY as JSON in the required format.""" + + +def get_continuation_decision_json_schema() -> str: + """Get JSON schema for continuation decision.""" + return """REQUIRED JSON FORMAT: +{ + "decision": "write_another_paper | move_on", + "reasoning": "string - Detailed explanation of your assessment" +} + +FIELD REQUIREMENTS: +- decision: MUST be either "write_another_paper" or "move_on" +- reasoning: ALWAYS required - explain what material remains unexplored or why moving on is better + +EXAMPLES: + +Write Another Paper: +{ + "decision": "write_another_paper", + "reasoning": "The brainstorm database contains 22 submissions covering both algebraic and analytic approaches to the Langlands correspondence. Paper 1 focused exclusively on the algebraic side (Galois representations, class field theory). The analytic side (automorphic forms, L-functions, spectral theory) has substantial unexplored material in submissions 8, 12, 14, 17-20 that would form a distinct and valuable second paper." +} + +Move On: +{ + "decision": "move_on", + "reasoning": "The existing paper comprehensively covers the brainstorm's core content on modular forms and their connections to Galois representations. The remaining submissions (3 out of 18) contain supplementary remarks that are too fragmented for a standalone paper. The user's research goal on the Langlands program would be better served by exploring a new avenue such as trace formulas or p-adic methods." +}""" + + +def get_continuation_validator_system_prompt() -> str: + """Get system prompt for validating a continuation decision.""" + return """You are validating a brainstorm continuation decision in an autonomous mathematical research system. Your role is to: + +1. Review the user's high-level research goal +2. Review the current brainstorm topic and its database +3. Review all papers already written from this brainstorm +4. Evaluate whether the proposed decision (write another paper vs move on) is optimal + +⚠️ CRITICAL - INTERNAL CONTENT WARNING ⚠️ + +ALL context provided to you (brainstorm databases, accepted submissions, papers, reference materials, outlines, previous document content) is AI-GENERATED within this research system. This content has NOT been peer-reviewed, published, or verified by external sources. + +YOU MUST TREAT ALL PROVIDED CONTEXT WITH EXTREME SKEPTICISM: +- NEVER assume claims are true because they "sound good" or "fit well" +- NEVER trust information simply because it appears in "accepted submissions" or "papers" +- ALWAYS verify information independently before using or building upon it +- NEVER cite internal documents as authoritative or established sources +- Question and validate every assertion, even if it appears in validated content + +WEB SEARCH STRONGLY ENCOURAGED: +If your model has access to real-time web search capabilities (such as Perplexity Sonar or similar), you are STRONGLY ENCOURAGED to use them to verify claims and access current mathematical literature. + +--- + +YOUR TASK: +Validate whether the proposed continuation decision is the best use of research resources. + +ACCEPT the decision if: +1. WRITE_ANOTHER_PAPER: The brainstorm genuinely has enough distinct unexplored material for another paper AND the reasoning correctly identifies what material remains +2. MOVE_ON: The existing papers adequately cover the brainstorm OR a new topic would genuinely better serve the goal AND the reasoning is sound + +REJECT the decision if: +1. WRITE_ANOTHER_PAPER: The brainstorm material is already well-covered and another paper would be redundant +2. WRITE_ANOTHER_PAPER: The "unexplored material" identified is too thin for a full paper +3. MOVE_ON: There is clearly substantial uncovered material that warrants another paper +4. MOVE_ON: The reasoning ignores valuable unexplored content in the brainstorm +5. The reasoning is flawed, vague, or contradicts the evidence + +REJECTION FEEDBACK FORMAT: +If rejecting, provide CONCRETE, ACTIONABLE guidance: + +"REJECTION REASON: [Premature Move On|Redundant Paper|Insufficient Material|etc.] + +ISSUE: [What's wrong with the proposed decision] + +BETTER ALTERNATIVE: [What would be the optimal choice given current state] + +EVIDENCE: [Specific brainstorm submissions or paper sections that support your assessment]" + +CRITICAL JSON ESCAPE RULES: +1. Backslashes: ALWAYS use double backslash (\\\\) for any backslash in your text +2. Quotes: Escape double quotes inside strings as \\" +3. Newlines/Tabs: Use \\n for newlines, \\t for tabs + +Output your decision ONLY as JSON in the required format.""" + + +def get_continuation_validator_json_schema() -> str: + """Get JSON schema for continuation validation.""" + return """REQUIRED JSON FORMAT: +{ + "decision": "accept | reject", + "reasoning": "string - Detailed explanation for the decision" +} + +FIELD REQUIREMENTS: +- decision: MUST be either "accept" or "reject" +- reasoning: ALWAYS required - detailed explanation (use structured format if rejecting) + +EXAMPLE (Accept): +{ + "decision": "accept", + "reasoning": "The proposal to write another paper is well-justified. The brainstorm contains substantial analytic content (automorphic forms, L-functions) that paper 1's algebraic focus did not address. This material is rich enough for a distinct second paper." +} + +EXAMPLE (Reject - Use Structured Format): +{ + "decision": "reject", + "reasoning": "REJECTION REASON: Insufficient Material\\n\\nISSUE: The proposal to write another paper claims unexplored material in submissions 15-18, but these submissions largely restate concepts already covered in paper 1's Section III (Main Results).\\n\\nBETTER ALTERNATIVE: Move on to a new brainstorm topic. The remaining brainstorm content is supplementary, not substantial enough for a standalone paper.\\n\\nEVIDENCE: Submissions 15-18 discuss Galois representations which paper 1 already covers comprehensively in Sections III and IV." +}""" + + +def build_continuation_decision_prompt( + user_research_prompt: str, + topic_prompt: str, + brainstorm_summary: str, + papers_from_brainstorm: List[Dict[str, Any]], + papers_written_count: int, + rejection_context: str = "" +) -> str: + """ + Build the complete continuation decision prompt. + + Args: + user_research_prompt: The user's high-level research goal + topic_prompt: The brainstorm topic prompt + brainstorm_summary: Full brainstorm database content + papers_from_brainstorm: List of dicts with title, abstract, outline for each paper + papers_written_count: Number of papers already written from this brainstorm + rejection_context: Formatted previous rejection feedback + + Returns: + Complete prompt string + """ + parts = [ + get_continuation_decision_system_prompt(), + "\n---\n", + get_continuation_decision_json_schema(), + "\n---\n", + f"USER RESEARCH GOAL:\n{user_research_prompt}", + "\n---\n", + f"BRAINSTORM TOPIC:\n{topic_prompt}", + "\n---\n", + ] + + parts.append(f"PAPERS WRITTEN FROM THIS BRAINSTORM: {papers_written_count} of 3 maximum\n") + + if papers_from_brainstorm: + parts.append("\nEXISTING PAPERS FROM THIS BRAINSTORM:\n") + for i, p in enumerate(papers_from_brainstorm, 1): + parts.append(f"\n--- Paper {i} ---") + parts.append(f"\nTitle: {p.get('title', 'N/A')}") + parts.append(f"\nAbstract: {p.get('abstract', 'N/A')}") + if p.get('outline'): + parts.append(f"\nOutline:\n{p.get('outline')}") + parts.append("\n---\n") + else: + parts.append("\nEXISTING PAPERS FROM THIS BRAINSTORM: None\n---\n") + + parts.append(f"BRAINSTORM DATABASE (all accepted submissions):\n{brainstorm_summary}") + parts.append("\n---\n") + + if rejection_context: + parts.append(f"IMPORTANT - YOUR PREVIOUS DECISION WAS REJECTED:\n{rejection_context}\n---\n") + + parts.append("Now decide whether to write another paper or move on, and provide your decision as JSON:") + + return "".join(parts) + + +def build_continuation_validation_prompt( + user_research_prompt: str, + topic_prompt: str, + brainstorm_summary: str, + papers_from_brainstorm: List[Dict[str, Any]], + papers_written_count: int, + proposed_decision: Dict[str, Any] +) -> str: + """ + Build the complete continuation validation prompt. + + Args: + user_research_prompt: The user's high-level research goal + topic_prompt: The brainstorm topic prompt + brainstorm_summary: Full brainstorm database content + papers_from_brainstorm: List of dicts with title, abstract, outline for each paper + papers_written_count: Number of papers already written from this brainstorm + proposed_decision: The continuation decision to validate + + Returns: + Complete prompt string + """ + parts = [ + get_continuation_validator_system_prompt(), + "\n---\n", + get_continuation_validator_json_schema(), + "\n---\n", + f"USER RESEARCH GOAL:\n{user_research_prompt}", + "\n---\n", + f"BRAINSTORM TOPIC:\n{topic_prompt}", + "\n---\n", + ] + + parts.append(f"PAPERS WRITTEN FROM THIS BRAINSTORM: {papers_written_count} of 3 maximum\n") + + if papers_from_brainstorm: + parts.append("\nEXISTING PAPERS FROM THIS BRAINSTORM:\n") + for i, p in enumerate(papers_from_brainstorm, 1): + parts.append(f"\n--- Paper {i} ---") + parts.append(f"\nTitle: {p.get('title', 'N/A')}") + parts.append(f"\nAbstract: {p.get('abstract', 'N/A')[:500]}...") + if p.get('outline'): + parts.append(f"\nOutline:\n{p.get('outline')}") + parts.append("\n---\n") + else: + parts.append("\nEXISTING PAPERS FROM THIS BRAINSTORM: None\n---\n") + + parts.append(f"BRAINSTORM DATABASE (all accepted submissions):\n{brainstorm_summary}") + parts.append("\n---\n") + + parts.append("PROPOSED CONTINUATION DECISION:\n") + parts.append(f"Decision: {proposed_decision.get('decision', 'Unknown')}") + parts.append(f"\nReasoning: {proposed_decision.get('reasoning', 'N/A')}") + parts.append("\n---\n") + + parts.append("Validate this continuation decision and provide your decision as JSON:") + + return "".join(parts) diff --git a/backend/autonomous/prompts/paper_title_exploration_prompts.py b/backend/autonomous/prompts/paper_title_exploration_prompts.py new file mode 100644 index 0000000..9193ec0 --- /dev/null +++ b/backend/autonomous/prompts/paper_title_exploration_prompts.py @@ -0,0 +1,87 @@ +""" +Paper Title Exploration Prompts - Builds the aggregator-compatible user prompt for +the paper title exploration phase that collects 5 validated candidate titles +before final title selection. + +The exploration phase reuses the full Part 1 aggregator infrastructure (parallel +submitters, batch validation up to 3, queue management) by framing the task as +a standard aggregation with a specially crafted user prompt. +""" +from typing import List, Dict, Any, Optional + + +def build_title_exploration_user_prompt( + user_research_prompt: str, + topic_prompt: str, + brainstorm_summary: str, + existing_papers_from_brainstorm: List[Dict[str, Any]], + reference_papers: Optional[List[Dict[str, Any]]] = None +) -> str: + """ + Build the user prompt passed to the aggregator for paper title exploration. + + This prompt frames the aggregation task so that submitters generate candidate + paper titles and the validator checks quality plus diversity. The standard + aggregator submitter and validator system prompts handle the rest. + + Args: + user_research_prompt: User's high-level research goal + topic_prompt: Brainstorm topic, Tier 3 certainty context, or chapter brief + brainstorm_summary: Summary of the source material the paper will be built from + existing_papers_from_brainstorm: Related completed papers that titles must not duplicate + reference_papers: Optional reference papers informing this paper or chapter + """ + parts = [] + + parts.append("=== PAPER TITLE EXPLORATION PHASE ===\n") + parts.append("You are in a PAPER TITLE EXPLORATION phase. You are NOT writing the paper itself.\n") + parts.append("Instead, your task is to propose ONE CANDIDATE PAPER TITLE per submission.") + parts.append("The system will collect 5 validated candidate titles before a later final") + parts.append("selection chooses the actual title.\n") + parts.append("Each submission should contain:") + parts.append("- One candidate paper title") + parts.append("- Brief reasoning for why the title is strong, accurate, and distinct\n") + parts.append("The validator will check QUALITY and DIVERSITY:") + parts.append("- Weak, vague, or generic titles will be rejected") + parts.append("- Titles too similar to already-accepted candidates will be rejected") + parts.append("- Titles too similar to already-completed related papers should be rejected") + parts.append("- The goal is to map multiple plausible title directions before committing\n") + parts.append("WHAT MAKES A GOOD CANDIDATE TITLE:") + parts.append("- Accurately captures the paper's likely mathematical content") + parts.append("- Specific enough to communicate the core focus") + parts.append("- Professional and suitable for a mathematical research paper") + parts.append("- Distinct from already-accepted candidate titles") + parts.append("- Distinct from related completed papers listed below") + parts.append("- If this is a final-answer or chapter paper, the title should match that role directly\n") + parts.append("DIVERSITY IS PARAMOUNT:") + parts.append("Do not submit near-duplicates, minor rephrasings, or cosmetic variants.") + parts.append("Propose genuinely different title framings, emphases, or structural approaches.\n") + parts.append("FORMAT YOUR SUBMISSION AS:") + parts.append("State the candidate title clearly, then explain why it is valuable and") + parts.append("how it differs from existing accepted candidates or related papers.\n") + + parts.append(f"USER RESEARCH GOAL:\n{user_research_prompt}\n") + parts.append(f"PAPER CONTEXT / SOURCE TOPIC:\n{topic_prompt}\n") + parts.append(f"SOURCE MATERIAL SUMMARY:\n{brainstorm_summary}\n") + + if existing_papers_from_brainstorm: + parts.append("\nEXISTING RELATED PAPERS (do not duplicate these title directions):") + for paper in existing_papers_from_brainstorm: + abstract = paper.get("abstract", "N/A") + if isinstance(abstract, str) and len(abstract) > 300: + abstract = abstract[:300] + "..." + parts.append(f" - {paper.get('paper_id', 'N/A')}: \"{paper.get('title', 'N/A')}\"") + parts.append(f" Abstract: {abstract}") + else: + parts.append("\nEXISTING RELATED PAPERS: None") + + if reference_papers: + parts.append("\nREFERENCE PAPERS INFORMING THIS TITLE:") + for paper in reference_papers: + abstract = paper.get("abstract", "N/A") + if isinstance(abstract, str) and len(abstract) > 220: + abstract = abstract[:220] + "..." + parts.append(f" - {paper.get('paper_id', 'N/A')}: \"{paper.get('title', 'N/A')}\"") + parts.append(f" Abstract: {abstract}") + + return "\n".join(parts) diff --git a/backend/autonomous/prompts/paper_title_prompts.py b/backend/autonomous/prompts/paper_title_prompts.py index c10a001..9f8b2d2 100644 --- a/backend/autonomous/prompts/paper_title_prompts.py +++ b/backend/autonomous/prompts/paper_title_prompts.py @@ -187,7 +187,8 @@ def build_paper_title_prompt( brainstorm_summary: str, existing_papers_from_brainstorm: List[Dict[str, Any]], reference_papers: List[Dict[str, Any]] = None, - rejection_feedback: str = "" + rejection_feedback: str = "", + candidate_titles: str = "" ) -> str: """ Build the paper title selection prompt. @@ -199,6 +200,7 @@ def build_paper_title_prompt( existing_papers_from_brainstorm: Papers already created from this brainstorm reference_papers: Selected reference papers (if any) rejection_feedback: Accumulated rejection reasons from previous attempts (if any) + candidate_titles: Pre-validated candidate titles from exploration phase (if any) Returns: Complete prompt string @@ -233,6 +235,17 @@ def build_paper_title_prompt( parts.append(f"\n- {p.get('title', 'N/A')}") parts.append("\n---\n") + # Inject validated candidate titles from exploration phase + if candidate_titles: + parts.append( + "PRE-VALIDATED CANDIDATE TITLES (from exploration phase):\n" + "The following candidate titles have been validated by the system. You may:\n" + "- Select one of these candidates directly\n" + "- Synthesize or improve upon a candidate\n" + "- Propose a NEW title if clearly better — but you MUST justify why it is superior\n\n" + f"{candidate_titles}\n---\n" + ) + # Inject rejection feedback so the model learns from previous failed attempts if rejection_feedback: parts.append( diff --git a/backend/autonomous/prompts/topic_exploration_prompts.py b/backend/autonomous/prompts/topic_exploration_prompts.py new file mode 100644 index 0000000..f367fe6 --- /dev/null +++ b/backend/autonomous/prompts/topic_exploration_prompts.py @@ -0,0 +1,78 @@ +""" +Topic Exploration Prompts - Builds the aggregator-compatible user prompt for the +topic exploration phase that collects 5 validated candidate brainstorm questions +before topic selection. + +The exploration phase reuses the full Part 1 aggregator infrastructure (parallel +submitters, batch validation up to 3, queue management) by framing the task as +a standard aggregation with a specially crafted user prompt. +""" +from typing import List, Dict, Any + + +def build_exploration_user_prompt( + user_research_prompt: str, + brainstorms_summary: List[Dict[str, Any]], + papers_summary: List[Dict[str, Any]] +) -> str: + """ + Build the user prompt passed to the aggregator for topic exploration. + + This prompt frames the aggregation task so that submitters generate candidate + brainstorm questions and the validator checks quality + diversity. The standard + aggregator submitter/validator system prompts handle the rest. + + Args: + user_research_prompt: User's high-level research goal + brainstorms_summary: All existing brainstorms with metadata + papers_summary: All completed papers with title/abstract + """ + parts = [] + + parts.append("=== TOPIC EXPLORATION PHASE ===\n") + parts.append("You are in a TOPIC EXPLORATION phase. You are NOT solving a mathematical problem directly.") + parts.append("Instead, your task is to propose CANDIDATE BRAINSTORM QUESTIONS — specific mathematical") + parts.append("avenues worth exploring for the research goal below.\n") + parts.append("Each submission should contain ONE candidate brainstorm question and reasoning for why") + parts.append("it is a valuable, distinct direction. The validator will check quality and DIVERSITY —") + parts.append("candidates that overlap with already-accepted ones will be REJECTED.\n") + parts.append("WHAT MAKES A GOOD CANDIDATE QUESTION:") + parts.append("- Specific enough to guide focused mathematical exploration (not vague)") + parts.append("- Novel relative to already-accepted candidates and existing brainstorms") + parts.append("- Relevant to the research goal below") + parts.append("- Opens a DISTINCT mathematical direction not already represented") + parts.append("- Grounded in established mathematical concepts") + parts.append("- Actionable — a brainstorm session could produce meaningful insights from it\n") + parts.append("DIVERSITY IS PARAMOUNT:") + parts.append("Your candidate MUST be SUBSTANTIVELY DIFFERENT from already-accepted candidates.") + parts.append("The goal is to map the exploration landscape BROADLY before committing to a direction.") + parts.append("Do not propose variations of existing candidates — propose genuinely different avenues.\n") + parts.append("FORMAT YOUR SUBMISSION AS:") + parts.append("State the candidate brainstorm question clearly, then explain why it is valuable and") + parts.append("distinct from any existing candidates.\n") + + parts.append(f"RESEARCH GOAL:\n{user_research_prompt}\n") + + # Existing brainstorms + if brainstorms_summary: + parts.append("\nEXISTING BRAINSTORM TOPICS (already explored or in progress):") + for b in brainstorms_summary: + parts.append(f" - {b.get('topic_id', 'N/A')}: {b.get('topic_prompt', 'N/A')} " + f"(status: {b.get('status', 'N/A')}, submissions: {b.get('submission_count', 0)}, " + f"papers: {b.get('papers_generated', 0)})") + else: + parts.append("\nEXISTING BRAINSTORM TOPICS: None yet") + + # Existing papers + if papers_summary: + parts.append("\nCOMPLETED PAPERS:") + for p in papers_summary: + abstract = p.get('abstract', 'N/A') + if len(abstract) > 300: + abstract = abstract[:300] + "..." + parts.append(f" - {p.get('paper_id', 'N/A')}: \"{p.get('title', 'N/A')}\"") + parts.append(f" Abstract: {abstract}") + else: + parts.append("\nCOMPLETED PAPERS: None yet") + + return "\n".join(parts) diff --git a/backend/autonomous/prompts/topic_prompts.py b/backend/autonomous/prompts/topic_prompts.py index 9b31989..69b3223 100644 --- a/backend/autonomous/prompts/topic_prompts.py +++ b/backend/autonomous/prompts/topic_prompts.py @@ -227,7 +227,8 @@ def build_topic_selection_prompt( user_research_prompt: str, brainstorms_summary: List[Dict[str, Any]], papers_summary: List[Dict[str, Any]], - rejection_context: str = "" + rejection_context: str = "", + candidate_questions: str = "" ) -> str: """ Build the complete topic selection prompt with context. @@ -237,6 +238,7 @@ def build_topic_selection_prompt( brainstorms_summary: List of all brainstorms with metadata papers_summary: List of all papers with title, abstract, word count rejection_context: Formatted previous rejection feedback + candidate_questions: Formatted candidate questions from topic exploration phase Returns: Complete prompt string @@ -250,6 +252,23 @@ def build_topic_selection_prompt( "\n---\n" ] + # Add candidate questions from topic exploration (if available) + if candidate_questions: + parts.append(f"""TOPIC EXPLORATION RESULTS: +The following candidate brainstorm questions were brainstormed and validated for quality +and diversity BEFORE this topic selection. Use them to make an informed strategic decision. + +You may: +- Select one of these candidates directly as your topic (action: new_topic, topic_prompt: the candidate question) +- Combine or synthesize multiple candidates into a stronger question +- Continue an existing brainstorm if the candidates reveal it is worth continuing +- Combine existing brainstorms if the candidates reveal connections +- Propose something entirely new if the candidates missed a critical avenue + +{candidate_questions} +""") + parts.append("\n---\n") + # Add brainstorms summary if brainstorms_summary: parts.append("EXISTING BRAINSTORM TOPICS:\n") diff --git a/backend/autonomous/validation/paper_redundancy_checker.py b/backend/autonomous/validation/paper_redundancy_checker.py index d95987c..408294f 100644 --- a/backend/autonomous/validation/paper_redundancy_checker.py +++ b/backend/autonomous/validation/paper_redundancy_checker.py @@ -1,6 +1,10 @@ """ Paper Redundancy Checker - Reviews paper library for redundancy. Runs every 3 completed papers to maintain library quality. + +NO RAG BY DESIGN: Redundancy is assessed at the abstract/title level — comparing +high-level paper topics to find overlap. Full paper content is not needed to detect +whether two papers cover the same ground. All inputs are compact metadata summaries. """ import asyncio import json diff --git a/backend/compiler/agents/critique_submitter.py b/backend/compiler/agents/critique_submitter.py index 3435cbb..45b74f0 100644 --- a/backend/compiler/agents/critique_submitter.py +++ b/backend/compiler/agents/critique_submitter.py @@ -217,6 +217,11 @@ async def submit_critique( except FreeModelExhaustedError: raise + except RuntimeError as e: + if "credits exhausted" in str(e).lower(): + raise + logger.error(f"Error generating critique: {e}", exc_info=True) + return None except Exception as e: logger.error(f"Error generating critique: {e}", exc_info=True) return None @@ -353,6 +358,11 @@ async def submit_rewrite_decision( except FreeModelExhaustedError: raise + except RuntimeError as e: + if "credits exhausted" in str(e).lower(): + raise + logger.error(f"Error generating rewrite decision: {e}", exc_info=True) + return None except Exception as e: logger.error(f"Error generating rewrite decision: {e}", exc_info=True) return None @@ -365,6 +375,7 @@ async def submit_iterative_edit( current_outline: str, critique_feedback: str, edits_applied: List[Dict], + reference_papers: Optional[str] = None, accumulated_history: Optional[str] = None ) -> Optional[Dict]: """ @@ -380,6 +391,7 @@ async def submit_iterative_edit( current_outline: Paper outline critique_feedback: All accepted critiques from this revision cycle edits_applied: List of edits already applied in this iteration + reference_papers: Optional reference paper content accumulated_history: Optional accumulated critique history from previous failed versions Returns: @@ -401,6 +413,7 @@ async def submit_iterative_edit( current_outline=current_outline, critique_feedback=critique_feedback, edits_applied=edits_applied, + reference_papers=reference_papers, accumulated_critique_history=accumulated_history or "" ) @@ -488,6 +501,11 @@ async def submit_iterative_edit( except FreeModelExhaustedError: raise + except RuntimeError as e: + if "credits exhausted" in str(e).lower(): + raise + logger.error(f"Error generating iterative edit: {e}", exc_info=True) + return None except Exception as e: logger.error(f"Error generating iterative edit: {e}", exc_info=True) return None diff --git a/backend/compiler/agents/high_context_submitter.py b/backend/compiler/agents/high_context_submitter.py index 9f4c1b6..b7c5f10 100644 --- a/backend/compiler/agents/high_context_submitter.py +++ b/backend/compiler/agents/high_context_submitter.py @@ -293,12 +293,12 @@ async def submit_outline_update(self) -> Optional[CompilerSubmission]: logger.info(f"Paper stripped: {len(current_paper)} chars → {len(paper_for_llm)} chars (markers removed)") # Retrieve aggregator database evidence + # Exclude outline and paper (both direct-injected in outline_update mode) logger.info("Retrieving aggregator database evidence via RAG...") - # Use just the user prompt - the outline is direct-injected anyway - # Truncating to 500 chars loses important context context_pack = await compiler_rag_manager.retrieve_for_mode( query=self.user_prompt, - mode="outline_update" + mode="outline_update", + exclude_sources=["compiler_outline.txt", "compiler_paper.txt"] ) logger.info(f"RAG retrieval complete: {len(context_pack.text)} chars retrieved") @@ -424,7 +424,9 @@ async def submit_construction( section_phase: Optional[str] = None, rejection_feedback: Optional[str] = None, critique_feedback: Optional[str] = None, - pre_critique_paper: Optional[str] = None + pre_critique_paper: Optional[str] = None, + brainstorm_content: Optional[str] = None, + brainstorm_source_name: Optional[str] = None ) -> Optional[CompilerSubmission]: """ Submit next paper construction portion. @@ -436,6 +438,8 @@ async def submit_construction( rejection_feedback: Feedback from a previous rejection to guide the model (e.g., "Introduction not found in document") critique_feedback: Accepted critique feedback from peer review (for body rewrites only) pre_critique_paper: Paper state before critique phase (for body rewrites - shows what failed) + brainstorm_content: Full brainstorm database with submission numbers (for retroactive corrections) + brainstorm_source_name: RAG source name for brainstorm (e.g., "brainstorm_abc123.txt") to exclude from retrieval Returns: CompilerSubmission for construction @@ -456,7 +460,32 @@ async def submit_construction( paper_for_llm = _strip_paper_markers_for_llm(current_paper) logger.info(f"Paper stripped: {len(current_paper)} chars → {len(paper_for_llm)} chars (markers removed)") + # Calculate RAG budget accounting for brainstorm content (prevents context overflow) + max_allowed_tokens = rag_config.get_available_input_tokens( + system_config.compiler_high_context_context_window, + system_config.compiler_high_context_max_output_tokens + ) + outline_tokens = count_tokens(current_outline) + paper_tokens = count_tokens(paper_for_llm) if paper_for_llm else 0 + brainstorm_tokens = count_tokens(brainstorm_content) if brainstorm_content else 0 + system_overhead = 5000 # system prompt, JSON schema, headers, separators, rejection history + + reserved_tokens = outline_tokens + paper_tokens + brainstorm_tokens + system_overhead + rag_budget = max(5000, max_allowed_tokens - reserved_tokens) + + if brainstorm_content and brainstorm_tokens > 0: + logger.info( + f"Context budget: max={max_allowed_tokens}, outline={outline_tokens}, " + f"paper={paper_tokens}, brainstorm={brainstorm_tokens}, overhead={system_overhead}, " + f"rag_budget={rag_budget}" + ) + # Retrieve aggregator database evidence + # Exclude sources already direct-injected to prevent token waste + exclude_sources = ["compiler_outline.txt", "compiler_paper.txt"] + if brainstorm_source_name: + exclude_sources.append(brainstorm_source_name) + logger.info("Retrieving aggregator database evidence via RAG...") query = self.user_prompt if not is_first_portion and paper_for_llm: @@ -465,7 +494,9 @@ async def submit_construction( context_pack = await compiler_rag_manager.retrieve_for_mode( query=query, - mode="construction" + mode="construction", + max_tokens=rag_budget, + exclude_sources=exclude_sources ) logger.info(f"RAG retrieval complete: {len(context_pack.text)} chars retrieved") @@ -481,7 +512,8 @@ async def submit_construction( is_first_portion=is_first_portion, rejection_feedback=rejection_feedback, critique_feedback=critique_feedback, - pre_critique_paper=pre_critique_paper + pre_critique_paper=pre_critique_paper, + brainstorm_content=brainstorm_content ) elif section_phase == "conclusion": prompt = await build_conclusion_construction_prompt( @@ -489,7 +521,8 @@ async def submit_construction( current_outline=current_outline, current_paper=paper_for_llm, rag_evidence=context_pack.text, - rejection_feedback=rejection_feedback + rejection_feedback=rejection_feedback, + brainstorm_content=brainstorm_content ) elif section_phase == "introduction": prompt = await build_introduction_construction_prompt( @@ -497,7 +530,8 @@ async def submit_construction( current_outline=current_outline, current_paper=paper_for_llm, rag_evidence=context_pack.text, - rejection_feedback=rejection_feedback + rejection_feedback=rejection_feedback, + brainstorm_content=brainstorm_content ) elif section_phase == "abstract": prompt = await build_abstract_construction_prompt( @@ -505,7 +539,8 @@ async def submit_construction( current_outline=current_outline, current_paper=paper_for_llm, rag_evidence=context_pack.text, - rejection_feedback=rejection_feedback + rejection_feedback=rejection_feedback, + brainstorm_content=brainstorm_content ) else: # Fallback to generic prompt for backward compatibility @@ -522,16 +557,18 @@ async def submit_construction( ) logger.info(f"Prompt built: {len(prompt)} chars") - # Validate prompt size + # Validate prompt size (max_allowed_tokens already calculated above for RAG budget) actual_prompt_tokens = count_tokens(prompt) - max_allowed_tokens = rag_config.get_available_input_tokens(system_config.compiler_high_context_context_window, system_config.compiler_high_context_max_output_tokens) if actual_prompt_tokens > max_allowed_tokens: logger.error( f"construction: Assembled prompt ({actual_prompt_tokens} tokens) exceeds context window " f"({max_allowed_tokens} tokens after safety margin). This indicates a context allocation bug." ) - return None # Return None to skip this submission + raise ValueError( + f"construction: Prompt too large ({actual_prompt_tokens} tokens > {max_allowed_tokens} max). " + f"Brainstorm={brainstorm_tokens} tokens, outline={outline_tokens}, paper={paper_tokens}, overhead={system_overhead}." + ) logger.debug(f"construction prompt: {actual_prompt_tokens} tokens (max: {max_allowed_tokens})") @@ -647,6 +684,21 @@ async def submit_construction( metadata={"coverage": context_pack.coverage, "is_first": is_first_portion, "phase": section_phase} ) + # Parse optional brainstorm retroactive operation + brainstorm_op_data = data.get("brainstorm_operation") + if brainstorm_op_data and isinstance(brainstorm_op_data, dict): + try: + from backend.shared.models import BrainstormRetroactiveOperation + submission.brainstorm_operation = BrainstormRetroactiveOperation( + action=brainstorm_op_data.get("action", ""), + submission_number=brainstorm_op_data.get("submission_number"), + new_content=brainstorm_op_data.get("new_content", ""), + reasoning=brainstorm_op_data.get("reasoning", "") + ) + logger.info(f"Brainstorm retroactive operation parsed: {submission.brainstorm_operation.action}") + except Exception as e: + logger.warning(f"Failed to parse brainstorm_operation, ignoring: {e}") + # Notify task completed successfully if self.task_tracking_callback: self.task_tracking_callback("completed", task_id) @@ -656,9 +708,17 @@ async def submit_construction( except FreeModelExhaustedError: raise + except ValueError: + raise + except RuntimeError as e: + if "credits exhausted" in str(e).lower(): + raise + logger.error(f"Failed to generate construction submission: {e}", exc_info=True) + if self.task_tracking_callback and 'task_id' in dir(): + self.task_tracking_callback("completed", task_id) + return None except Exception as e: logger.error(f"Failed to generate construction submission: {e}", exc_info=True) - # Notify task completed (failed but still completed) if self.task_tracking_callback and 'task_id' in dir(): self.task_tracking_callback("completed", task_id) return None @@ -666,7 +726,11 @@ async def submit_construction( async def submit_review(self) -> Optional[CompilerSubmission]: """ Submit paper review (or no-op if no edit needed). - Note: Aggregator DB is NOT in context for this mode. + + NO RAG BY DESIGN: Review mode evaluates the paper on its own merits — + checking for errors, coherence issues, and improvements against the outline. + No aggregator DB, brainstorm, or reference papers in context. The reviewer + must judge the paper as a standalone document without external source bias. Returns: CompilerSubmission if edit needed, None otherwise @@ -703,7 +767,7 @@ async def submit_review(self) -> Optional[CompilerSubmission]: f"review: Assembled prompt ({actual_prompt_tokens} tokens) exceeds context window " f"({max_allowed_tokens} tokens after safety margin). This indicates a context allocation bug." ) - return None # Return None to skip this submission + raise ValueError(f"review: Prompt too large ({actual_prompt_tokens} tokens > {max_allowed_tokens} max)") logger.debug(f"review prompt: {actual_prompt_tokens} tokens (max: {max_allowed_tokens})") @@ -765,8 +829,8 @@ async def submit_review(self) -> Optional[CompilerSubmission]: logger.info("Paper review: no edit needed") return None - # Check if this is a miniscule edit - is_miniscule = "miniscule" in data.get("reasoning", "").lower() or "minor" in data.get("reasoning", "").lower() + # Check if this is a minuscule edit + is_minuscule = "minuscule" in data.get("reasoning", "").lower() or "minor" in data.get("reasoning", "").lower() # Create submission # Use new_string as content for logging @@ -780,21 +844,29 @@ async def submit_review(self) -> Optional[CompilerSubmission]: old_string=_normalize_string_field(data.get("old_string", "")), new_string=new_string_content, # Already normalized above reasoning=data.get("reasoning", ""), - metadata={"is_miniscule": is_miniscule} + metadata={"is_minuscule": is_minuscule} ) # Notify task completed successfully if self.task_tracking_callback: self.task_tracking_callback("completed", task_id) - logger.info(f"Review submission generated: {submission.submission_id} (miniscule={is_miniscule})") + logger.info(f"Review submission generated: {submission.submission_id} (minuscule={is_minuscule})") return submission except FreeModelExhaustedError: raise + except ValueError: + raise + except RuntimeError as e: + if "credits exhausted" in str(e).lower(): + raise + logger.error(f"Failed to generate review submission: {e}", exc_info=True) + if self.task_tracking_callback and 'task_id' in dir(): + self.task_tracking_callback("completed", task_id) + return None except Exception as e: logger.error(f"Failed to generate review submission: {e}", exc_info=True) - # Notify task completed (failed but still completed) if self.task_tracking_callback and 'task_id' in dir(): self.task_tracking_callback("completed", task_id) return None # Don't crash workflow on review failure diff --git a/backend/compiler/agents/high_param_submitter.py b/backend/compiler/agents/high_param_submitter.py index efaa03e..2d9b630 100644 --- a/backend/compiler/agents/high_param_submitter.py +++ b/backend/compiler/agents/high_param_submitter.py @@ -169,11 +169,14 @@ async def _step1_planning(self) -> Optional[dict]: ) # Try initial RAG retrieval - may overflow if outline + system prompts are large + # Exclude outline (always direct-injected in rigor mode) + rigor_exclude = ["compiler_outline.txt"] try: logger.info("Step 1: Retrieving relevant paper sections via RAG...") context_pack = await compiler_rag_manager.retrieve_for_mode( query=self.user_prompt + " " + current_paper[-1000:], - mode="rigor" + mode="rigor", + exclude_sources=rigor_exclude ) logger.info(f"Step 1: RAG retrieval complete - {len(context_pack.text)} chars") @@ -217,7 +220,8 @@ async def _step1_planning(self) -> Optional[dict]: context_pack = await compiler_rag_manager.retrieve_for_mode( query=self.user_prompt + " " + current_paper[-1000:], mode="rigor", - max_tokens=remaining_budget + max_tokens=remaining_budget, + exclude_sources=rigor_exclude ) prompt = await build_rigor_planning_prompt( @@ -272,6 +276,13 @@ async def _step1_planning(self) -> Optional[dict]: return data + except RuntimeError as e: + if "credits exhausted" in str(e).lower(): + raise + logger.error(f"Step 1: JSON parse failed - {e}") + if self.task_tracking_callback: + self.task_tracking_callback("completed", task_id) + return None except Exception as e: logger.error(f"Step 1: JSON parse failed - {e}") if self.task_tracking_callback: @@ -308,11 +319,14 @@ async def _step2_standard_execution( ) # Try RAG retrieval + # Exclude outline (always direct-injected in rigor mode) + rigor_exclude = ["compiler_outline.txt"] try: logger.info("Step 2: Retrieving paper sections via RAG...") context_pack = await compiler_rag_manager.retrieve_for_mode( query=self.user_prompt + " " + current_paper[-1000:], - mode="rigor" + mode="rigor", + exclude_sources=rigor_exclude ) # Build execution prompt @@ -357,7 +371,8 @@ async def _step2_standard_execution( context_pack = await compiler_rag_manager.retrieve_for_mode( query=self.user_prompt + " " + current_paper[-1000:], mode="rigor", - max_tokens=remaining_budget + max_tokens=remaining_budget, + exclude_sources=rigor_exclude ) prompt = await build_rigor_execution_prompt( @@ -502,11 +517,14 @@ async def _step2_wolfram_execution( ) # Try RAG retrieval + # Exclude outline (always direct-injected in rigor mode) + rigor_exclude = ["compiler_outline.txt"] try: logger.info("Step 2 (Wolfram): Retrieving paper sections via RAG...") context_pack = await compiler_rag_manager.retrieve_for_mode( query=self.user_prompt + " " + current_paper[-1000:], - mode="rigor" + mode="rigor", + exclude_sources=rigor_exclude ) # Build Wolfram execution prompt @@ -553,7 +571,8 @@ async def _step2_wolfram_execution( context_pack = await compiler_rag_manager.retrieve_for_mode( query=self.user_prompt + " " + current_paper[-1000:], mode="rigor", - max_tokens=remaining_budget + max_tokens=remaining_budget, + exclude_sources=rigor_exclude ) prompt = await build_rigor_wolfram_execution_prompt( diff --git a/backend/compiler/core/compiler_coordinator.py b/backend/compiler/core/compiler_coordinator.py index 01ebf66..6224e27 100644 --- a/backend/compiler/core/compiler_coordinator.py +++ b/backend/compiler/core/compiler_coordinator.py @@ -18,6 +18,7 @@ from backend.shared.api_client_manager import api_client_manager from backend.shared.openrouter_client import FreeModelExhaustedError from backend.shared.json_parser import parse_json +from backend.shared.utils import count_tokens from backend.compiler.agents.high_context_submitter import HighContextSubmitter from backend.compiler.agents.high_param_submitter import HighParamSubmitter from backend.compiler.agents.critique_submitter import CritiqueSubmitterAgent @@ -70,7 +71,7 @@ def __init__(self): self.review_acceptances = 0 self.review_rejections = 0 self.review_declines = 0 - self.miniscule_edit_count = 0 + self.minuscule_edit_count = 0 # Workflow state self.construction_cycle_count = 0 @@ -79,6 +80,8 @@ def __init__(self): # Autonomous mode (for Part 3 integration) self.autonomous_mode = False self.autonomous_section_phase = None # "body", "conclusion", "introduction", "abstract" + self._current_topic_id = None # Set by autonomous coordinator for retroactive brainstorm corrections + self._current_reference_paper_ids: List[str] = [] # Autonomous/Tier 3 references preserved for critique and rewrite context # Critique phase state (post-body peer review) self.critique_submitter = None # CritiqueSubmitterAgent instance @@ -207,7 +210,7 @@ async def initialize( self.review_acceptances = 0 self.review_rejections = 0 self.review_declines = 0 - self.miniscule_edit_count = 0 + self.minuscule_edit_count = 0 self.construction_cycle_count = 0 self.rigor_cycle_active = False self.aggregator_acceptances_last_rag = 0 @@ -957,10 +960,24 @@ async def _initial_paper_loop(self) -> None: try: section_phase = self.autonomous_section_phase if self.autonomous_mode else None + + # Load brainstorm content for first construction too + first_brainstorm_content = None + first_brainstorm_source = None + if self.autonomous_mode and self._current_topic_id: + try: + from backend.autonomous.memory.brainstorm_memory import brainstorm_memory + first_brainstorm_content = await brainstorm_memory.get_database_content(self._current_topic_id) + first_brainstorm_source = f"brainstorm_{self._current_topic_id}.txt" + except Exception: + pass + submission = await self.high_context_submitter.submit_construction( is_first_portion=True, section_phase=section_phase, - rejection_feedback=rejection_feedback # Pass rejection feedback for retry + rejection_feedback=rejection_feedback, + brainstorm_content=first_brainstorm_content, + brainstorm_source_name=first_brainstorm_source ) if submission is None: @@ -981,6 +998,23 @@ async def _initial_paper_loop(self) -> None: except FreeModelExhaustedError: raise + except ValueError as e: + logger.error(f"Construction context overflow in initial loop (attempt {attempt}): {e}") + await self._broadcast("compiler_rejection", { + "mode": "construction", + "reasoning": f"Context overflow: {e}" + }) + await compiler_rejection_log.add_rejection( + CompilerValidationResult( + submission_id=str(uuid.uuid4()), + decision="reject", + reasoning=str(e), + summary=str(e)[:750], + validation_stage="internal_error" + ), "construction", "" + ) + await asyncio.sleep(backoff_time) + continue except Exception as e: logger.error(f"Construction submission failed with error (attempt {attempt}): {e}") await self._broadcast("compiler_retry", { @@ -1154,13 +1188,48 @@ async def _submit_and_validate_construction(self, rejection_feedback: Optional[s pre_critique_paper_for_construction = self.pre_critique_paper logger.info("Body construction with critique context (rewrite mode)") - submission = await self.high_context_submitter.submit_construction( - is_first_portion=False, - section_phase=section_phase, - rejection_feedback=rejection_feedback, - critique_feedback=critique_feedback_for_construction, - pre_critique_paper=pre_critique_paper_for_construction - ) + # Load brainstorm content for retroactive corrections (autonomous mode only) + brainstorm_content_for_submitter = None + brainstorm_source_for_submitter = None + if self.autonomous_mode and self._current_topic_id: + try: + from backend.autonomous.memory.brainstorm_memory import brainstorm_memory + brainstorm_content_for_submitter = await brainstorm_memory.get_database_content(self._current_topic_id) + brainstorm_source_for_submitter = f"brainstorm_{self._current_topic_id}.txt" + if brainstorm_content_for_submitter: + logger.info(f"Loaded brainstorm content for retroactive corrections: {len(brainstorm_content_for_submitter)} chars") + except Exception as e: + logger.warning(f"Failed to load brainstorm for retroactive corrections: {e}") + + submission = None + try: + submission = await self.high_context_submitter.submit_construction( + is_first_portion=False, + section_phase=section_phase, + rejection_feedback=rejection_feedback, + critique_feedback=critique_feedback_for_construction, + pre_critique_paper=pre_critique_paper_for_construction, + brainstorm_content=brainstorm_content_for_submitter, + brainstorm_source_name=brainstorm_source_for_submitter + ) + except ValueError as e: + logger.error(f"Construction context overflow: {e}") + self.construction_rejections += 1 + overflow_reason = f"Context overflow: {e}" + await compiler_rejection_log.add_rejection( + CompilerValidationResult( + submission_id=str(uuid.uuid4()), + decision="reject", + reasoning=overflow_reason, + summary=overflow_reason[:750], + validation_stage="internal_error" + ), "construction", "" + ) + await self._broadcast("compiler_rejection", { + "mode": "construction", + "reasoning": overflow_reason + }) + return False, overflow_reason if submission is None: logger.info("Construction not needed - paper is complete") @@ -1492,7 +1561,8 @@ def has_real_section_content(section_pattern: str, paper_text: str) -> bool: return True, None logger.info(f"Construction accepted ({word_count} words)") - return True, None + paper_accepted = True + paper_rejection_reason = None else: self.construction_rejections += 1 @@ -1505,7 +1575,88 @@ def has_real_section_content(section_pattern: str, paper_text: str) -> bool: }) logger.info("Construction rejected") - return False, result.reasoning + paper_accepted = False + paper_rejection_reason = result.reasoning + + # ================================================================ + # RETROACTIVE BRAINSTORM OPERATION (independent from paper result) + # ================================================================ + if submission.brainstorm_operation and self.autonomous_mode and hasattr(self, '_current_topic_id') and self._current_topic_id: + await self._handle_brainstorm_retroactive_operation(submission.brainstorm_operation) + + return paper_accepted, paper_rejection_reason + + async def _handle_brainstorm_retroactive_operation(self, brainstorm_op) -> None: + """ + Handle a retroactive brainstorm operation independently from the paper operation. + Validates the operation using the compiler validator with brainstorm-only context, + then applies if accepted and refreshes RAG. + """ + from backend.autonomous.memory.brainstorm_memory import brainstorm_memory + + topic_id = self._current_topic_id + logger.info(f"Processing retroactive brainstorm {brainstorm_op.action} for topic {topic_id}") + + try: + brainstorm_content = await brainstorm_memory.get_database_content(topic_id) + if not brainstorm_content: + logger.warning(f"Brainstorm {topic_id} is empty, skipping retroactive operation") + return + + result = await self.validator.validate_brainstorm_operation( + brainstorm_op, brainstorm_content + ) + + if result.decision == "accept": + success = False + action = brainstorm_op.action + + if action == "edit": + success = await brainstorm_memory.edit_submission( + topic_id, brainstorm_op.submission_number, brainstorm_op.new_content + ) + elif action == "delete": + success = await brainstorm_memory.remove_submission( + topic_id, brainstorm_op.submission_number + ) + elif action == "add": + new_num = await brainstorm_memory.add_submission_retroactive( + topic_id, brainstorm_op.new_content + ) + success = new_num is not None + + if success: + logger.info(f"Retroactive brainstorm {action} accepted and applied for topic {topic_id}") + + # Refresh RAG with updated brainstorm content + try: + db_path = brainstorm_memory.get_database_path(topic_id) + from backend.aggregator.core.rag_manager import rag_manager + await rag_manager.add_document( + db_path, + chunk_sizes=[512], + is_user_file=True + ) + logger.info("RAG refreshed with updated brainstorm content") + except Exception as e: + logger.error(f"Failed to refresh RAG after brainstorm {action}: {e}") + + await self._broadcast("brainstorm_retroactive_accepted", { + "action": action, + "topic_id": topic_id, + "submission_number": brainstorm_op.submission_number, + }) + else: + logger.error(f"Retroactive brainstorm {action} was validated but failed to apply") + else: + logger.info(f"Retroactive brainstorm {brainstorm_op.action} rejected: {result.reasoning[:200]}") + await self._broadcast("brainstorm_retroactive_rejected", { + "action": brainstorm_op.action, + "topic_id": topic_id, + "reasoning": result.reasoning[:500], + }) + except Exception as e: + logger.error(f"Error handling retroactive brainstorm operation: {e}") async def _submit_and_validate_outline_update(self) -> bool: """Submit and validate outline update. Returns True if accepted.""" @@ -1617,7 +1768,18 @@ async def _submit_and_validate_review(self) -> bool: """Submit and validate review. Returns True if accepted.""" self.current_mode = "review" - submission = await self.high_context_submitter.submit_review() + submission = None + try: + submission = await self.high_context_submitter.submit_review() + except ValueError as e: + logger.error(f"Review context overflow: {e}") + self.review_declines += 1 + await compiler_rejection_log.add_decline("review", f"Context overflow: {e}") + await self._broadcast("compiler_decline", { + "mode": "review", + "reasoning": f"Context overflow: {e}" + }) + return False if submission is None: logger.info("No review edit needed") @@ -1633,9 +1795,9 @@ async def _submit_and_validate_review(self) -> bool: self.total_submissions += 1 - # Check for miniscule edit - if submission.metadata.get("is_miniscule", False): - self.miniscule_edit_count += 1 + # Check for minuscule edit + if submission.metadata.get("is_minuscule", False): + self.minuscule_edit_count += 1 await self._broadcast("compiler_submission", { "mode": "review", @@ -2254,6 +2416,87 @@ async def _start_critique_phase(self) -> None: # Start critique aggregation loop await self._run_critique_aggregation() + + async def _get_reference_papers_context_for_critique( + self, + current_outline: str = "", + current_body: str = "", + aggregator_db: str = "", + critique_feedback: str = "", + pre_critique_paper: str = "", + accumulated_history: str = "" + ) -> Optional[str]: + """ + Prepare reference-paper context for critique/rewrite prompts in autonomous mode. + + This preserves the reference papers selected for the paper instead of + silently dropping them once the critique phase begins. + """ + if not self.autonomous_mode or not self._current_reference_paper_ids: + return None + + try: + from backend.autonomous.core.autonomous_rag_manager import autonomous_rag_manager + from backend.autonomous.memory.brainstorm_memory import brainstorm_memory + + max_input_tokens = rag_config.get_available_input_tokens( + system_config.compiler_critique_submitter_context_window, + system_config.compiler_critique_submitter_max_tokens + ) + + direct_injected_context = "\n\n".join( + part for part in [ + self.user_prompt or "", + self.paper_title or "", + current_outline or "", + current_body or "", + aggregator_db or "", + critique_feedback or "", + pre_critique_paper or "", + accumulated_history or "", + ] + if part + ) + direct_tokens = count_tokens(direct_injected_context) + + # Reserve headroom for system prompt, JSON schema, rejection memory, + # and the static prompt framing around reference content. + reference_budget = min(16000, max_input_tokens - direct_tokens - 10000) + if reference_budget <= 0: + logger.warning( + "Skipping critique reference context due to prompt budget " + f"(direct={direct_tokens}, max_input={max_input_tokens})" + ) + return None + + exclude_sources = ["compiler_outline.txt", "compiler_paper.txt"] + if self._current_topic_id: + brainstorm_db_path = brainstorm_memory.get_database_path(self._current_topic_id) + exclude_sources.append(Path(brainstorm_db_path).name) + + query = "\n\n".join( + part for part in [ + self.user_prompt or "", + self.paper_title or "", + current_outline or "", + current_body or "", + critique_feedback or "", + pre_critique_paper or "", + ] + if part + ) + + reference_context, _ = await autonomous_rag_manager.get_reference_papers_context( + self._current_reference_paper_ids, + max_total_tokens=reference_budget, + query=query, + exclude_sources=exclude_sources + ) + + return reference_context or None + except Exception as e: + logger.warning(f"Failed to prepare critique reference context: {e}") + return None async def _run_critique_aggregation(self) -> None: """ @@ -2307,11 +2550,17 @@ async def _run_critique_aggregation(self) -> None: # Get existing critiques existing_critiques = await critique_memory.get_all_critiques() - # Get reference papers if available - reference_papers = None # TODO: Load if applicable - # Format accumulated critique history from previous failed versions accumulated_history = self._format_accumulated_critique_history() + + # Keep autonomous reference papers available during critique/rewrite. + reference_papers = await self._get_reference_papers_context_for_critique( + current_outline=current_outline, + current_body=current_body, + aggregator_db=aggregator_db, + critique_feedback=existing_critiques, + accumulated_history=accumulated_history + ) # Generate critique submission submission = await self.critique_submitter.submit_critique( @@ -2614,10 +2863,17 @@ async def _trigger_rewrite_decision(self) -> None: # Get context (aggregator DB, reference papers, etc.) from backend.aggregator.memory.shared_training import shared_training_memory aggregator_db = await shared_training_memory.get_all_content() - reference_papers = None # TODO: Load if applicable - # Format accumulated critique history from previous failed versions accumulated_history = self._format_accumulated_critique_history() + + reference_papers = await self._get_reference_papers_context_for_critique( + current_outline=current_outline, + current_body=current_body, + aggregator_db=aggregator_db, + critique_feedback=critique_feedback, + pre_critique_paper=self.pre_critique_paper or "", + accumulated_history=accumulated_history + ) # Critique submitter makes decision logger.info("Critique submitter generating rewrite decision...") @@ -2841,6 +3097,14 @@ async def _execute_partial_revision( # Get current outline current_outline = await outline_memory.get_outline() + + reference_papers = await self._get_reference_papers_context_for_critique( + current_outline=current_outline, + current_body=self.pre_critique_paper or "", + critique_feedback=critique_feedback, + pre_critique_paper=self.pre_critique_paper or "", + accumulated_history=accumulated_history or "" + ) # ITERATIVE EDIT LOOP MAX_EDITS = 20 # Safety limit to prevent infinite loops @@ -2867,6 +3131,7 @@ async def _execute_partial_revision( current_outline=current_outline, critique_feedback=critique_feedback, edits_applied=edits_applied, + reference_papers=reference_papers, accumulated_history=accumulated_history ) @@ -3357,7 +3622,7 @@ async def get_status(self) -> CompilerState: review_acceptances=self.review_acceptances, review_rejections=self.review_rejections, review_declines=self.review_declines, - miniscule_edit_count=self.miniscule_edit_count, + minuscule_edit_count=self.minuscule_edit_count, in_critique_phase=self.in_critique_phase, critique_acceptances=self.critique_acceptances, paper_version=self.paper_version, @@ -3431,7 +3696,7 @@ async def clear_paper(self) -> None: self.review_acceptances = 0 self.review_rejections = 0 self.review_declines = 0 - self.miniscule_edit_count = 0 + self.minuscule_edit_count = 0 self.construction_cycle_count = 0 self.rigor_cycle_active = False @@ -3439,6 +3704,7 @@ async def clear_paper(self) -> None: if self.autonomous_mode: self.autonomous_section_phase = "body" # Reset to body phase logger.info("Reset autonomous section phase to body") + self._current_reference_paper_ids = [] # Reset critique phase state self.in_critique_phase = False diff --git a/backend/compiler/core/compiler_rag_manager.py b/backend/compiler/core/compiler_rag_manager.py index 4a29559..c7b2532 100644 --- a/backend/compiler/core/compiler_rag_manager.py +++ b/backend/compiler/core/compiler_rag_manager.py @@ -4,7 +4,7 @@ Default context window: 4096 tokens (user-configurable via settings). """ import logging -from typing import Optional +from typing import Optional, List from pathlib import Path from backend.shared.config import system_config, rag_config @@ -229,7 +229,8 @@ async def retrieve_for_mode( self, query: str, mode: str, - max_tokens: Optional[int] = None + max_tokens: Optional[int] = None, + exclude_sources: Optional[List[str]] = None ) -> ContextPack: """ Retrieve context optimized for specific compiler mode. @@ -238,6 +239,7 @@ async def retrieve_for_mode( query: Search query mode: Compiler mode (construction, outline, review, rigor) max_tokens: Override max tokens (defaults to available_tokens) + exclude_sources: Source names to skip (already direct-injected in prompt) Returns: ContextPack with retrieved context @@ -245,6 +247,8 @@ async def retrieve_for_mode( import time logger.info(f"Starting RAG retrieval for mode={mode}, query_length={len(query)}") + if exclude_sources: + logger.info(f"Excluding direct-injected sources: {exclude_sources}") start_time = time.time() try: @@ -257,7 +261,8 @@ async def retrieve_for_mode( context_pack = await rag_manager.retrieve( query=query, chunk_size=chunk_size, - max_tokens=max_tokens + max_tokens=max_tokens, + exclude_sources=exclude_sources ) elapsed = time.time() - start_time diff --git a/backend/compiler/prompts/construction_prompts.py b/backend/compiler/prompts/construction_prompts.py index 2fd0e46..2c279da 100644 --- a/backend/compiler/prompts/construction_prompts.py +++ b/backend/compiler/prompts/construction_prompts.py @@ -89,6 +89,8 @@ def get_body_construction_system_prompt() -> str: 3. Write the NEXT body section that follows the outline 4. Set section_complete=true ONLY when ALL body sections from the outline are written +PROGRESSIVE SYSTEM: You will be called repeatedly — once per body section. Focus on writing ONE complete, rigorous section per turn rather than rushing through multiple sections. Write what you can do thoroughly and correctly this turn; you will be called again for the next section. + WHAT COUNTS AS BODY SECTIONS: - Definitions and Preliminaries - Main Results / Theorems @@ -856,6 +858,36 @@ def get_construction_json_schema() -> str: "new_string": "", "reasoning": "The abstract is complete. The paper is finished." } + +OPTIONAL - RETROACTIVE BRAINSTORM OPERATION (Autonomous Mode Only): + +During paper writing, you see the FULL brainstorm database alongside the paper. If you identify +an error, redundancy, or missing insight in the brainstorm, you may OPTIONALLY include a +brainstorm_operation field. This is validated INDEPENDENTLY from your paper operation. + +CRITICAL INDEPENDENT VALIDITY PRINCIPLE: +- Your paper edit must be correct even if the brainstorm operation is rejected +- Your brainstorm operation must be justified even if the paper edit is rejected +- NEVER write paper content that depends on a simultaneous brainstorm correction for correctness +- NEVER propose a brainstorm correction that is only justified by what you're writing in the paper + +Add this OPTIONAL field to your JSON response: +{ + ... (all standard fields above) ..., + "brainstorm_operation": { + "action": "edit | delete | add", + "submission_number": 5, + "new_content": "corrected or new content (empty for delete)", + "reasoning": "Independent justification - must stand alone without referencing paper edit" + } +} + +brainstorm_operation actions: +- "edit": Correct submission #N with new_content (submission_number required) +- "delete": Remove submission #N from brainstorm (submission_number required, new_content empty) +- "add": Add a new insight to the brainstorm (submission_number not needed) + +If no brainstorm correction is needed (most turns), simply omit the brainstorm_operation field. """ @@ -872,7 +904,8 @@ async def build_construction_prompt( section_phase: Optional[str] = None, rejection_feedback: Optional[str] = None, critique_feedback: Optional[str] = None, - pre_critique_paper: Optional[str] = None + pre_critique_paper: Optional[str] = None, + brainstorm_content: Optional[str] = None ) -> str: """ Build complete prompt for construction mode. @@ -887,6 +920,7 @@ async def build_construction_prompt( rejection_feedback: Feedback from a previous rejection to guide the model critique_feedback: Accepted critique feedback from peer review (for rewrites) pre_critique_paper: Paper state before critique phase (for rewrites - shows what failed) + brainstorm_content: Full brainstorm database with submission numbers (for retroactive corrections, autonomous mode) Returns: Complete prompt string @@ -985,6 +1019,11 @@ async def build_construction_prompt( parts.append("TASK: Write the NEXT logical portion following the section order (body → conclusion → intro → abstract).") parts.append("\n---\n") + + if brainstorm_content: + parts.append(f"BRAINSTORM DATABASE (editable - you may propose corrections via brainstorm_operation):\n{brainstorm_content}") + parts.append("\n---\n") + parts.append(f"AGGREGATOR DATABASE EVIDENCE:\n{rag_evidence}") parts.append("\n---\n") parts.append("Now generate your submission as JSON (remember to set section_complete appropriately):") @@ -1001,7 +1040,8 @@ async def build_phase_construction_prompt( is_first_in_phase: bool = False, rejection_feedback: Optional[str] = None, critique_feedback: Optional[str] = None, - pre_critique_paper: Optional[str] = None + pre_critique_paper: Optional[str] = None, + brainstorm_content: Optional[str] = None ) -> str: """ Build prompt for a specific construction phase. @@ -1018,6 +1058,7 @@ async def build_phase_construction_prompt( rejection_feedback: Feedback from a previous rejection to guide the model critique_feedback: Accepted critique feedback from peer review (for rewrites) pre_critique_paper: Paper state before critique phase (for rewrites) + brainstorm_content: Full brainstorm database with submission numbers (autonomous mode) Returns: Complete prompt string @@ -1031,7 +1072,8 @@ async def build_phase_construction_prompt( section_phase=phase, rejection_feedback=rejection_feedback, critique_feedback=critique_feedback, - pre_critique_paper=pre_critique_paper + pre_critique_paper=pre_critique_paper, + brainstorm_content=brainstorm_content ) @@ -1047,7 +1089,8 @@ async def build_body_construction_prompt( is_first_portion: bool = False, rejection_feedback: Optional[str] = None, critique_feedback: Optional[str] = None, - pre_critique_paper: Optional[str] = None + pre_critique_paper: Optional[str] = None, + brainstorm_content: Optional[str] = None ) -> str: """ Build prompt for BODY section construction phase. @@ -1061,6 +1104,7 @@ async def build_body_construction_prompt( rejection_feedback: Feedback from a previous rejection to guide the model critique_feedback: Accepted critique feedback from peer review (for rewrites only) pre_critique_paper: Paper state before critique phase (for rewrites - shows what failed) + brainstorm_content: Full brainstorm database with submission numbers (autonomous mode) """ return await build_phase_construction_prompt( user_prompt=user_prompt, @@ -1071,7 +1115,8 @@ async def build_body_construction_prompt( is_first_in_phase=is_first_portion, rejection_feedback=rejection_feedback, critique_feedback=critique_feedback, - pre_critique_paper=pre_critique_paper + pre_critique_paper=pre_critique_paper, + brainstorm_content=brainstorm_content ) @@ -1080,7 +1125,8 @@ async def build_conclusion_construction_prompt( current_outline: str, current_paper: str, rag_evidence: str, - rejection_feedback: Optional[str] = None + rejection_feedback: Optional[str] = None, + brainstorm_content: Optional[str] = None ) -> str: """Build prompt for CONCLUSION section construction phase.""" return await build_phase_construction_prompt( @@ -1090,7 +1136,8 @@ async def build_conclusion_construction_prompt( rag_evidence=rag_evidence, phase="conclusion", is_first_in_phase=True, - rejection_feedback=rejection_feedback + rejection_feedback=rejection_feedback, + brainstorm_content=brainstorm_content ) @@ -1099,7 +1146,8 @@ async def build_introduction_construction_prompt( current_outline: str, current_paper: str, rag_evidence: str, - rejection_feedback: Optional[str] = None + rejection_feedback: Optional[str] = None, + brainstorm_content: Optional[str] = None ) -> str: """Build prompt for INTRODUCTION section construction phase.""" return await build_phase_construction_prompt( @@ -1109,7 +1157,8 @@ async def build_introduction_construction_prompt( rag_evidence=rag_evidence, phase="introduction", is_first_in_phase=True, - rejection_feedback=rejection_feedback + rejection_feedback=rejection_feedback, + brainstorm_content=brainstorm_content ) @@ -1118,7 +1167,8 @@ async def build_abstract_construction_prompt( current_outline: str, current_paper: str, rag_evidence: str, - rejection_feedback: Optional[str] = None + rejection_feedback: Optional[str] = None, + brainstorm_content: Optional[str] = None ) -> str: """Build prompt for ABSTRACT section construction phase.""" return await build_phase_construction_prompt( @@ -1128,5 +1178,6 @@ async def build_abstract_construction_prompt( rag_evidence=rag_evidence, phase="abstract", is_first_in_phase=True, - rejection_feedback=rejection_feedback - ) + rejection_feedback=rejection_feedback, + brainstorm_content=brainstorm_content + ) \ No newline at end of file diff --git a/backend/compiler/prompts/critique_prompts.py b/backend/compiler/prompts/critique_prompts.py index 3e38373..b2ebbf8 100644 --- a/backend/compiler/prompts/critique_prompts.py +++ b/backend/compiler/prompts/critique_prompts.py @@ -53,6 +53,8 @@ def get_critique_submitter_system_prompt() -> str: YOUR TASK: Assess whether the body section needs substantive critique. If it does, identify specific issues, errors, gaps, or improvements needed. If it doesn't (academically acceptable), decline to critique. +PROGRESSIVE SYSTEM: You will be called multiple times (up to 5 total attempts). Focus on identifying ONE specific, well-substantiated critique per turn. Do not try to list every issue at once — address the most important issue thoroughly this turn, and you will have further opportunities to raise additional issues. + WHAT TO CRITIQUE - Focus on: - Mathematical errors or unsound reasoning - Missing proofs or incomplete arguments @@ -872,6 +874,7 @@ def build_iterative_edit_prompt( current_outline: str, critique_feedback: str, edits_applied: List[Dict], + reference_papers: Optional[str] = None, accumulated_critique_history: str = "" ) -> str: """ @@ -884,6 +887,7 @@ def build_iterative_edit_prompt( current_outline: The paper outline critique_feedback: All accepted critiques from this revision cycle edits_applied: List of edits already applied in this iteration + reference_papers: Optional reference paper content accumulated_critique_history: Critiques from previous failed versions (if any) Returns: @@ -915,6 +919,12 @@ def build_iterative_edit_prompt( f"CURRENT PAPER (after {len(edits_applied)} edit(s) applied):\n{current_paper}", "\n---\n", ]) + + if reference_papers: + parts.extend([ + f"REFERENCE PAPERS:\n{reference_papers}", + "\n---\n", + ]) # Show edits already applied if edits_applied: diff --git a/backend/compiler/validation/compiler_validator.py b/backend/compiler/validation/compiler_validator.py index 9715f1a..ac68edf 100644 --- a/backend/compiler/validation/compiler_validator.py +++ b/backend/compiler/validation/compiler_validator.py @@ -122,6 +122,21 @@ def normalize_whitespace(text: str) -> str: return re.sub(r' +', ' ', text) +def normalize_all_whitespace(text: str) -> str: + """ + Collapse all whitespace runs (spaces, newlines, tabs) to a single space. + + Handles the mismatch where models output paragraph breaks as '\\n\\n' but + the document has a single space (or vice versa). Used as a fallback after + the space-only normalization fails. + """ + if not text: + return text + + import re + return re.sub(r'\s+', ' ', text) + + def normalize_backslashes_for_matching(text: str) -> str: """ Collapse runs of 2+ consecutive backslashes to a single backslash for comparison. @@ -191,12 +206,31 @@ def find_with_normalized_hyphens(needle: str, haystack: str) -> Tuple[int, str]: logger.debug(f" Whitespace normalization matched: '{needle[:50]}...' found as '{actual_text[:50]}...'") return (match.start(), actual_text) + # Try full whitespace normalization (3b layer - handles newline vs space mismatches) + # Models may output \n\n where the document has a single space, or vice versa. + # Collapses ALL whitespace runs (spaces, newlines, tabs) to a single space. + aws_needle = normalize_all_whitespace(normalized_needle) + aws_haystack = normalize_all_whitespace(normalized_haystack) + + aws_pos = aws_haystack.find(aws_needle) + if aws_pos >= 0: + import re + escaped = re.escape(aws_needle) + flexible_pattern = escaped.replace(r'\ ', r'\s+') + + match = re.search(flexible_pattern, haystack) + if match: + actual_text = match.group(0) + logger.info(f"ALL_WHITESPACE_NORMALIZED_MATCH: Found at pos {match.start()}") + logger.debug(f" All-whitespace normalization matched: '{needle[:50]}...' found as '{actual_text[:50]}...'") + return (match.start(), actual_text) + # Try backslash normalization (4th layer - handles model over-escaping quirks) # e.g., model writes \\\\mathbb in JSON -> \\mathbb after json.loads, but document has \mathbb - bs_needle = normalize_backslashes_for_matching(ws_needle) - bs_haystack = normalize_backslashes_for_matching(ws_haystack) + bs_needle = normalize_backslashes_for_matching(aws_needle) + bs_haystack = normalize_backslashes_for_matching(aws_haystack) - if bs_needle != ws_needle: # Only attempt if backslash normalization actually changed the needle + if bs_needle != aws_needle: # Only attempt if backslash normalization actually changed the needle bs_pos = bs_haystack.find(bs_needle) if bs_pos >= 0: # Convert normalized needle to a regex that allows 1+ backslashes wherever @@ -228,8 +262,9 @@ def find_with_normalized_hyphens(needle: str, haystack: str) -> Tuple[int, str]: # === DEEP DIAGNOSTICS FOR COMPLETE FAILURE === logger.warning(f"MATCH_FAILED_COMPLETELY - Deep diagnostic analysis:") - logger.warning(f" Needle (first 200 chars): {repr(needle[:200])}") - logger.warning(f" Needle (last 200 chars): {repr(needle[-200:])}") + logger.warning(f" Needle FULL:\n{needle}") + logger.warning(f" Needle (first 200 chars repr): {repr(needle[:200])}") + logger.warning(f" Needle (last 200 chars repr): {repr(needle[-200:])}") logger.warning(f" Haystack (first 200 chars): {repr(haystack[:200])}") logger.warning(f" Haystack (last 200 chars): {repr(haystack[-200:])}") @@ -526,6 +561,7 @@ def _pre_validate_exact_string_match( logger.info(f"PRE_VALIDATE_START: mode={submission.mode}, operation={submission.operation}") if submission.old_string: logger.info(f" old_string preview: {repr(submission.old_string[:100])}{'...' if len(submission.old_string) > 100 else ''}") + logger.debug(f" old_string full: {repr(submission.old_string)}") logger.debug(f" old_string diagnostics: {_diagnostic_char_info(submission.old_string)}") # Determine which document to check against based on mode @@ -636,6 +672,7 @@ def _pre_validate_exact_string_match( if outline_confusion: # Provide targeted feedback for outline vs paper confusion logger.warning(f"Pre-validation failed: old_string found in OUTLINE but not in PAPER (outline confusion)") + logger.warning(f"FULL old_string that failed to match paper:\n{submission.old_string}") return CompilerValidationResult( submission_id=submission.submission_id, decision="reject", @@ -665,6 +702,7 @@ def _pre_validate_exact_string_match( fix_suggestion = f"\n\nNo similar text found. Verify the old_string matches something in the current {document_name} exactly." logger.warning(f"Pre-validation failed: old_string not found in {document_name}") + logger.warning(f"FULL old_string that failed to match:\n{submission.old_string}") return CompilerValidationResult( submission_id=submission.submission_id, decision="reject", @@ -692,6 +730,7 @@ def _pre_validate_exact_string_match( match_count = normalized_doc.count(normalized_old) if match_count > 1: logger.warning(f"Pre-validation failed: old_string appears {match_count} times in {document_name} (not unique)") + logger.warning(f"FULL old_string that matched multiple times:\n{submission.old_string}") return CompilerValidationResult( submission_id=submission.submission_id, decision="reject", @@ -1020,6 +1059,183 @@ async def validate_submission( validation_stage="internal_error" ) + async def validate_brainstorm_operation( + self, + brainstorm_op: "BrainstormRetroactiveOperation", + brainstorm_content: str + ) -> CompilerValidationResult: + """ + Validate a retroactive brainstorm operation independently. + + The validator sees ONLY the brainstorm database and the proposed operation. + It never sees the paper operation that may accompany this brainstorm operation. + Each operation must be justified on its own merits. + """ + from backend.shared.models import BrainstormRetroactiveOperation + logger.info(f"Validating brainstorm retroactive operation: {brainstorm_op.action}") + + prompt = self._build_brainstorm_validation_prompt(brainstorm_op, brainstorm_content) + + actual_prompt_tokens = count_tokens(prompt) + from backend.shared.config import system_config, rag_config + max_allowed_tokens = rag_config.get_available_input_tokens( + system_config.compiler_validator_context_window, + system_config.compiler_validator_max_output_tokens + ) + + if actual_prompt_tokens > max_allowed_tokens: + logger.error(f"Brainstorm validation prompt too large: {actual_prompt_tokens} > {max_allowed_tokens}") + return CompilerValidationResult( + submission_id=str(uuid.uuid4()), + decision="reject", + reasoning=f"Internal error: Brainstorm validation prompt too large ({actual_prompt_tokens} tokens)", + summary="Internal context overflow error", + json_valid=False, + validation_stage="internal_error" + ) + + task_id = self.get_current_task_id() + self.task_sequence += 1 + + if self.task_tracking_callback: + self.task_tracking_callback("started", task_id) + + try: + response = await api_client_manager.generate_completion( + task_id=task_id, + role_id=self.role_id, + model=self.model_name, + messages=[{"role": "user", "content": prompt}], + temperature=0.0, + max_tokens=system_config.compiler_validator_max_output_tokens + ) + + message = response["choices"][0]["message"] + llm_output = message.get("content") or message.get("reasoning") or "" + + validation_data = await self._parse_json_with_retry(llm_output, prompt, "", 0) + + decision = validation_data.get("decision", "reject") + reasoning = validation_data.get("reasoning", "No reasoning provided") + + result = CompilerValidationResult( + submission_id=str(uuid.uuid4()), + decision=decision, + reasoning=reasoning, + summary=reasoning[:750], + json_valid=True, + validation_stage="llm_validation" + ) + + if self.task_tracking_callback: + self.task_tracking_callback("completed", task_id) + + logger.info(f"Brainstorm operation validation: {decision}") + return result + + except FreeModelExhaustedError: + raise + except Exception as e: + logger.error(f"Brainstorm operation validation failed: {e}") + if self.task_tracking_callback: + self.task_tracking_callback("completed", task_id) + return CompilerValidationResult( + submission_id=str(uuid.uuid4()), + decision="reject", + reasoning=f"Validation error: {str(e)}", + summary=f"Validation error: {str(e)}"[:750], + json_valid=False, + validation_stage="internal_error" + ) + + def _build_brainstorm_validation_prompt( + self, + brainstorm_op: "BrainstormRetroactiveOperation", + brainstorm_content: str + ) -> str: + """Build prompt for brainstorm retroactive operation validation.""" + action = brainstorm_op.action + + system_prompt = f"""You are validating a retroactive correction to a brainstorm knowledge database. This correction was proposed during paper compilation by a submitter who identified an issue in the source material. + +You see ONLY the brainstorm database and the proposed operation. You do NOT see the paper or any paper edits. Your decision must be based solely on whether this operation improves the brainstorm database. + +OPERATION TYPE: {action.upper()} + +""" + if action == "delete": + system_prompt += """VALIDATION CRITERIA (DELETE): +A brainstorm submission should be REMOVED if it: +1. Contains mathematical errors or logically unsound reasoning +2. Is redundant with other submissions (content fully covered elsewhere) +3. Contradicts established mathematical principles evident in other submissions +4. Was marginally useful but provides no unique value given the current database state + +KEEP the submission if: +1. It provides ANY unique information not covered elsewhere +2. There is ANY doubt about whether it's truly harmful or redundant +3. It offers a different perspective even if related to other content + +CONSERVATIVE DEFAULT: When in doubt, reject the removal (keep the submission). +""" + elif action == "edit": + system_prompt += """VALIDATION CRITERIA (EDIT): +A brainstorm submission edit should be ACCEPTED if: +1. The corrected version fixes a genuine mathematical error +2. The corrected version is more accurate than the original +3. The correction improves the submission's value to the knowledge pool +4. The correction is mathematically sound and well-justified + +REJECT the edit if: +1. The original was not actually wrong +2. The edit introduces new errors or reduces quality +3. The reasoning for correction is weak or unconvincing +4. The edit is a stylistic preference rather than a substantive correction + +CONSERVATIVE DEFAULT: When in doubt, reject the edit (keep the original). +""" + elif action == "add": + system_prompt += """VALIDATION CRITERIA (ADD): +A new brainstorm submission should be ACCEPTED if: +1. It adds genuinely new mathematical insight not already in the database +2. It connects existing concepts in novel ways +3. It provides concrete methods, theorems, proofs, or techniques +4. It is grounded in established mathematical principles + +REJECT the addition if: +1. It is redundant with existing submissions +2. It contains trivial or commonly known information already present +3. It contains unsupported claims or logical fallacies +4. It is too vague or generic to be actionable +""" + + system_prompt += """ +Output your decision ONLY as JSON: +{ + "decision": "accept" or "reject", + "reasoning": "Detailed explanation of your decision" +} +""" + + parts = [system_prompt, "\n---\n"] + parts.append(f"BRAINSTORM DATABASE:\n{brainstorm_content}") + parts.append("\n---\n") + + if action == "delete": + parts.append(f"PROPOSED REMOVAL: Submission #{brainstorm_op.submission_number}") + parts.append(f"\nREASONING: {brainstorm_op.reasoning}") + elif action == "edit": + parts.append(f"PROPOSED EDIT: Submission #{brainstorm_op.submission_number}") + parts.append(f"\nNEW CONTENT:\n{brainstorm_op.new_content}") + parts.append(f"\nREASONING: {brainstorm_op.reasoning}") + elif action == "add": + parts.append(f"PROPOSED NEW SUBMISSION:\n{brainstorm_op.new_content}") + parts.append(f"\nREASONING: {brainstorm_op.reasoning}") + + parts.append("\n---\nNow validate this brainstorm operation as JSON:") + + return "\n".join(parts) + def _strip_placeholder_text(self, text: str) -> str: """ Strip any placeholder markers from text. diff --git a/backend/shared/api_client_manager.py b/backend/shared/api_client_manager.py index baba4a7..018eef4 100644 --- a/backend/shared/api_client_manager.py +++ b/backend/shared/api_client_manager.py @@ -22,8 +22,10 @@ ) from backend.shared.boost_manager import boost_manager from backend.shared.boost_logger import boost_logger +from backend.shared.config import rag_config from backend.shared.free_model_manager import free_model_manager from backend.shared.models import ModelConfig +from backend.shared.token_tracker import token_tracker logger = logging.getLogger(__name__) @@ -62,6 +64,9 @@ def __init__(self): # Current autonomous phase (set by autonomous coordinator) self._current_autonomous_phase: str = "unknown" + # Track roles that have already broadcast fallback_failed (prevent GUI log spam) + self._fallback_failed_notified: set = set() + # Lock for thread-safe state updates self._state_lock = asyncio.Lock() @@ -74,6 +79,43 @@ async def _broadcast(self, event: str, data: Dict[str, Any] = None) -> None: if self._broadcast_callback: await self._broadcast_callback(event, data or {}) + async def _with_hung_connection_watchdog( + self, + coro, + role_id: str, + model: str, + provider: str, + timeout_seconds: int = 900 + ): + """Wrap an API call coroutine with a watchdog that alerts after timeout_seconds (default 15 min).""" + async def _watchdog(): + await asyncio.sleep(timeout_seconds) + minutes = timeout_seconds // 60 + logger.warning( + f"API call for role '{role_id}' using {model} via {provider} " + f"has been running for {minutes}+ minutes — possible hung connection" + ) + await self._broadcast("hung_connection_alert", { + "role_id": role_id, + "model": model, + "provider": provider, + "elapsed_minutes": minutes, + "message": ( + f"API call to {model} via {provider} has been running for {minutes}+ minutes. " + f"The connection may be hung. Consider stopping and trying a different host/provider." + ) + }) + + watchdog_task = asyncio.create_task(_watchdog()) + try: + return await coro + finally: + watchdog_task.cancel() + try: + await watchdog_task + except asyncio.CancelledError: + pass + def set_model_tracking_callback(self, callback: Optional[Callable]) -> None: """ Set callback for model usage tracking during Tier 3 final answer generation. @@ -248,17 +290,29 @@ async def generate_completion( start_time = time.time() try: + boost_api_key = ( + boost_manager.boost_config.openrouter_api_key or + rag_config.openrouter_api_key + ) + if not boost_api_key: + raise RuntimeError("Boost requested but no OpenRouter API key is available") + # Create temporary client with boost API key - boost_client = OpenRouterClient(boost_manager.boost_config.openrouter_api_key) + boost_client = OpenRouterClient(boost_api_key) boost_provider = boost_manager.boost_config.boost_provider try: - result = await boost_client.generate_completion( + result = await self._with_hung_connection_watchdog( + boost_client.generate_completion( + model=boost_model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens or boost_manager.boost_config.boost_max_output_tokens, + response_format=response_format, + provider=boost_provider + ), + role_id=role_id, model=boost_model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens or boost_manager.boost_config.boost_max_output_tokens, - response_format=response_format, - provider=boost_provider + provider=boost_provider or "OpenRouter" ) # Calculate duration @@ -272,6 +326,11 @@ async def generate_completion( response_content = message.get("content") or message.get("reasoning") or "" if result.get("usage"): tokens_used = result["usage"].get("total_tokens") + _pt = result["usage"].get("prompt_tokens") + _ct = result["usage"].get("completion_tokens") + if _pt is not None and _ct is not None: + token_tracker.track(boost_model, _pt, _ct) + await self._broadcast("token_usage_updated", token_tracker.get_stats()) # Log the boost call await boost_logger.log_boost_call( @@ -554,13 +613,18 @@ async def generate_completion( try: logger.debug(f"Role {role_id} using OpenRouter: {openrouter_model}{provider_info}") - result = await self._openrouter_client.generate_completion( + result = await self._with_hung_connection_watchdog( + self._openrouter_client.generate_completion( + model=openrouter_model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens or role_config.max_output_tokens, + response_format=response_format, + provider=openrouter_provider + ), + role_id=role_id, model=openrouter_model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens or role_config.max_output_tokens, - response_format=response_format, - provider=openrouter_provider # Pass specific provider if configured + provider=openrouter_provider or "OpenRouter" ) # Calculate duration and extract response @@ -572,6 +636,11 @@ async def generate_completion( response_content = message.get("content") or message.get("reasoning") or "" if result.get("usage"): tokens_used = result["usage"].get("total_tokens") + _pt = result["usage"].get("prompt_tokens") + _ct = result["usage"].get("completion_tokens") + if _pt is not None and _ct is not None: + token_tracker.track(openrouter_model, _pt, _ct) + await self._broadcast("token_usage_updated", token_tracker.get_stats()) # Log to autonomous API logger if callback set if self._autonomous_logger_callback: @@ -752,11 +821,13 @@ async def generate_completion( f"fallback model in settings." ) logger.error(error_msg) - await self._broadcast("openrouter_fallback_failed", { - "role_id": role_id, - "reason": "no_fallback_configured", - "message": error_msg - }) + if role_id not in self._fallback_failed_notified: + self._fallback_failed_notified.add(role_id) + await self._broadcast("openrouter_fallback_failed", { + "role_id": role_id, + "reason": "no_fallback_configured", + "message": error_msg + }) raise RuntimeError(error_msg) # Fallback IS configured - use it @@ -822,13 +893,18 @@ async def generate_completion( start_time = time.time() try: - result = await lm_studio_client.generate_completion( + result = await self._with_hung_connection_watchdog( + lm_studio_client.generate_completion( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + response_format=response_format, + **kwargs + ), + role_id=role_id, model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - response_format=response_format, - **kwargs + provider="LM Studio" ) # Calculate duration and extract response @@ -840,6 +916,11 @@ async def generate_completion( response_content = message.get("content") or message.get("reasoning") or "" if result.get("usage"): tokens_used = result["usage"].get("total_tokens") + _pt = result["usage"].get("prompt_tokens") + _ct = result["usage"].get("completion_tokens") + if _pt is not None and _ct is not None: + token_tracker.track(model, _pt, _ct) + await self._broadcast("token_usage_updated", token_tracker.get_stats()) # Log to autonomous API logger if callback set if self._autonomous_logger_callback: @@ -921,14 +1002,25 @@ async def _try_free_model_rotation( "reason": "rate_limit", }) try: - result = await self._openrouter_client.generate_completion( + result = await self._with_hung_connection_watchdog( + self._openrouter_client.generate_completion( + model=alt_model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + response_format=response_format, + ), + role_id=role_id, model=alt_model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - response_format=response_format, + provider="OpenRouter (free rotation)" ) await self._track_model_usage(alt_model) + if result.get("usage"): + _pt = result["usage"].get("prompt_tokens") + _ct = result["usage"].get("completion_tokens") + if _pt is not None and _ct is not None: + token_tracker.track(alt_model, _pt, _ct) + await self._broadcast("token_usage_updated", token_tracker.get_stats()) if free_model_manager.is_account_exhausted(): free_model_manager.clear_account_exhaustion() return result @@ -948,14 +1040,25 @@ async def _try_free_model_rotation( "original_model": original_model, }) try: - result = await self._openrouter_client.generate_completion( + result = await self._with_hung_connection_watchdog( + self._openrouter_client.generate_completion( + model=auto_model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + response_format=response_format, + ), + role_id=role_id, model=auto_model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - response_format=response_format, + provider="OpenRouter (auto-selector)" ) await self._track_model_usage(auto_model) + if result.get("usage"): + _pt = result["usage"].get("prompt_tokens") + _ct = result["usage"].get("completion_tokens") + if _pt is not None and _ct is not None: + token_tracker.track(auto_model, _pt, _ct) + await self._broadcast("token_usage_updated", token_tracker.get_stats()) if free_model_manager.is_account_exhausted(): free_model_manager.clear_account_exhaustion() return result @@ -985,6 +1088,31 @@ def get_all_fallback_states(self) -> Dict[str, str]: """ return self._role_fallback_state.copy() + async def reset_openrouter_fallbacks(self) -> Dict[str, str]: + """ + Reset all roles that were originally configured for OpenRouter back to 'openrouter' state. + Called when user adds credits and wants to retry OpenRouter without restarting. + + Returns: + Dict of role_id -> new_state for roles that were reset + """ + reset_roles = {} + async with self._state_lock: + for role_id, config in self._role_model_configs.items(): + if config.provider == "openrouter" and self._role_fallback_state.get(role_id) == "lm_studio": + self._role_fallback_state[role_id] = "openrouter" + reset_roles[role_id] = "openrouter" + logger.info(f"Reset role '{role_id}' back to OpenRouter (was fallen back to LM Studio)") + + if reset_roles: + self._fallback_failed_notified.difference_update(reset_roles.keys()) + await self._broadcast("openrouter_fallbacks_reset", { + "reset_roles": list(reset_roles.keys()), + "message": f"Reset {len(reset_roles)} role(s) back to OpenRouter" + }) + + return reset_roles + async def get_embeddings(self, texts: List[str], model: str = None) -> List[List[float]]: """ Get embeddings, routing to LM Studio first, then OpenRouter fallback. diff --git a/backend/shared/boost_manager.py b/backend/shared/boost_manager.py index f79438a..4417933 100644 --- a/backend/shared/boost_manager.py +++ b/backend/shared/boost_manager.py @@ -35,6 +35,8 @@ "comp_hp": "High-Param Submitter", "comp_val": "Compiler Validator", # Autonomous + "auto_te": "Topic Explorer", + "auto_tev": "Topic Explorer Validator", "auto_ts": "Topic Selector", "auto_tv": "Topic Validator", "auto_cr": "Completion Reviewer", @@ -340,6 +342,8 @@ def get_available_categories(self, mode: str = "all") -> List[Dict[str, str]]: if mode in ("autonomous", "all"): categories.extend([ + {"id": "auto_te", "label": "Topic Explore", "group": "Autonomous"}, + {"id": "auto_tev", "label": "Topic Explore Val", "group": "Autonomous"}, {"id": "auto_ts", "label": "Topic Sel", "group": "Autonomous"}, {"id": "auto_tv", "label": "Topic Val", "group": "Autonomous"}, {"id": "auto_cr", "label": "Completion", "group": "Autonomous"}, diff --git a/backend/shared/config.py b/backend/shared/config.py index 441ceb5..c07c4e4 100644 --- a/backend/shared/config.py +++ b/backend/shared/config.py @@ -34,6 +34,7 @@ class RAGConfig(BaseSettings): # Memory limits max_documents: int = 10000 # For RAG document cache; user files never evicted; high for infinite runtime + max_chunks_per_size: int = 10000 # Per-size chunk cap; oldest non-permanent trimmed when exceeded max_shared_training_insights: int = 999999 # Effectively unlimited for infinite runtime max_local_rejections: int = 5 # Per rules: "last 5 rejections" @@ -57,8 +58,8 @@ class RAGConfig(BaseSettings): embedding_model: str = "text-embedding-nomic-embed-text-v1.5" # OpenRouter API (Global Configuration) - # This is the global API key used for per-role OpenRouter model selection - # Separate from boost API key which is stored in BoostConfig + # This is the default OpenRouter API key used for per-role model selection. + # API Boost can also reuse it unless the boost modal supplies an override key. openrouter_api_key: Optional[str] = None openrouter_enabled: bool = False # True when API key is set and validated @@ -151,7 +152,6 @@ class SystemConfig(BaseSettings): autonomous_completion_review_interval: int = 10 # Every 10 acceptances autonomous_paper_redundancy_interval: int = 3 # Every 3 completed papers autonomous_max_reference_papers: int = 6 # Max papers for reference context - autonomous_topic_selection_retry_limit: int = 3 # Wolfram Alpha integration (optional) wolfram_alpha_enabled: bool = False diff --git a/backend/shared/critique_prompts.py b/backend/shared/critique_prompts.py index af09e9c..fd5a7e8 100644 --- a/backend/shared/critique_prompts.py +++ b/backend/shared/critique_prompts.py @@ -2,8 +2,14 @@ Paper Critique Prompts Module. Contains the default critique prompt and helper functions for building -critique requests to the validator model. +critique requests to the validator model. Also provides lenient parsing +for critique responses that may be truncated by max_tokens limits. """ +import json +import re +import logging + +logger = logging.getLogger(__name__) # Default critique prompt that can be customized by users DEFAULT_CRITIQUE_PROMPT = """You are an expert academic reviewer providing an honest, thorough critique of a research paper. @@ -85,3 +91,151 @@ def get_default_critique_prompt() -> str: """ return DEFAULT_CRITIQUE_PROMPT + +def parse_critique_response(response_content: str) -> dict: + """ + Parse a critique LLM response with lenient fallback for truncated JSON. + + Critique responses are especially prone to truncation because reasoning models + burn tokens on internal thinking before the JSON, and the full_critique field + (the last and longest field) often gets cut off right before the closing '}'. + + Strategy: + 1. Try strict parse_json() first + 2. If truncated, try repairing by appending closing characters + 3. If still fails, extract ratings and feedback via regex + + Returns: + Parsed critique dict with all expected fields + """ + from backend.shared.json_parser import parse_json, sanitize_json_response + + # Step 1: Try strict parsing + try: + return parse_json(response_content) + except Exception as strict_err: + logger.info(f"Strict critique parse failed ({strict_err}), attempting truncation repair") + + # Step 2: Try repairing truncated JSON + # Common case: model wrote all content but ran out of tokens before closing '}' + try: + sanitized = sanitize_json_response(response_content) + except (ValueError, Exception): + # sanitize_json_response raises ValueError on truncation - that's expected + # Fall through to repair attempts using raw content + sanitized = _strip_to_json(response_content) + + repaired = _try_repair_json(sanitized) + if repaired is not None: + logger.info("Critique JSON repaired after truncation - recovered all fields") + return repaired + + # Step 3: Regex extraction fallback + logger.warning("Critique JSON repair failed, falling back to regex extraction") + return _regex_extract_critique(response_content) + + +def _strip_to_json(raw: str) -> str: + """Strip thinking tokens, markdown, and prefixes to get to the JSON content.""" + content = raw.strip() + + # Strip ... + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE).strip() + content = re.sub(r'', '', content, flags=re.IGNORECASE).strip() + content = re.sub(r'', '', content, flags=re.IGNORECASE).strip() + + # Strip markdown code blocks + if content.startswith('```'): + lines = content.split('\n') + if len(lines) > 2: + closing = -1 + for i in range(1, len(lines)): + if lines[i].strip() == '```': + closing = i + break + if closing > 0: + content = '\n'.join(lines[1:closing]).strip() + + # Strip prefix before first '{' + brace = content.find('{') + if brace > 0: + content = content[brace:] + + return content + + +def _try_repair_json(content: str): + """ + Attempt to repair truncated critique JSON by appending missing closing characters. + Only repairs simple truncation (missing '}', or string cut off mid-value). + """ + if not content or '{' not in content: + return None + + # Try progressively more aggressive repairs + repairs = [ + '}', # Missing only closing brace + '"}', # String value ended, missing quote + brace + '..."}', # Truncated mid-word in last string value + ] + + for suffix in repairs: + candidate = content.rstrip() + suffix + try: + result = json.loads(candidate) + if isinstance(result, dict) and result.get("novelty_rating"): + return result + except (json.JSONDecodeError, ValueError): + continue + + # More aggressive: find last complete key-value pair and close from there + # Handles case where truncation happened mid-field-value + last_complete = content.rfind('","') + if last_complete > 0: + truncated = content[:last_complete + 1] + '}' + try: + result = json.loads(truncated) + if isinstance(result, dict) and result.get("novelty_rating"): + return result + except (json.JSONDecodeError, ValueError): + pass + + return None + + +def _regex_extract_critique(raw: str) -> dict: + """ + Last-resort extraction of critique fields from raw text via regex. + Ratings appear early in the JSON and are almost always present even in + heavily truncated responses. + """ + def extract_rating(field: str) -> int: + m = re.search(rf'"{field}"\s*:\s*(\d+)', raw) + if m: + val = int(m.group(1)) + return val if 1 <= val <= 10 else 0 + return 0 + + def extract_string(field: str) -> str: + m = re.search(rf'"{field}"\s*:\s*"((?:[^"\\]|\\.)*)"', raw, re.DOTALL) + return m.group(1) if m else "" + + novelty = extract_rating("novelty_rating") + correctness = extract_rating("correctness_rating") + impact = extract_rating("impact_rating") + + result = { + "novelty_rating": novelty, + "novelty_feedback": extract_string("novelty_feedback") or ("Unable to parse structured response" if novelty == 0 else ""), + "correctness_rating": correctness, + "correctness_feedback": extract_string("correctness_feedback") or ("Unable to parse structured response" if correctness == 0 else ""), + "impact_rating": impact, + "impact_feedback": extract_string("impact_feedback") or ("Unable to parse structured response" if impact == 0 else ""), + "full_critique": extract_string("full_critique") or raw, + } + + recovered = sum(1 for k in ["novelty_rating", "correctness_rating", "impact_rating"] if result[k] > 0) + logger.info(f"Regex extraction recovered {recovered}/3 ratings: N={novelty}, C={correctness}, I={impact}") + + return result + diff --git a/backend/shared/models.py b/backend/shared/models.py index a617630..830e483 100644 --- a/backend/shared/models.py +++ b/backend/shared/models.py @@ -184,6 +184,10 @@ class CompilerSubmission(BaseModel): For outline_create mode, uses full_content operation where content is the complete outline. For other modes, content stores the submission for logging while old_string/new_string specify the edit. + + Retroactive brainstorm operations (optional, autonomous mode only): + - brainstorm_operation: Optional operation on the source brainstorm database. + Validated independently from paper operations. Each must stand on its own merits. """ submission_id: str mode: Literal["outline_create", "outline_update", "construction", "review", "rigor"] @@ -201,10 +205,31 @@ class CompilerSubmission(BaseModel): needs_edit: Optional[bool] = None # For review mode: False = no edit needed needs_enhancement: Optional[bool] = None # For rigor mode: False = no enhancement needed needs_update: Optional[bool] = None # For outline_update mode: False = no update needed + + # Retroactive brainstorm correction (optional, autonomous paper writing only) + brainstorm_operation: Optional["BrainstormRetroactiveOperation"] = None + timestamp: datetime = Field(default_factory=datetime.now) metadata: Dict[str, Any] = Field(default_factory=dict) +class BrainstormRetroactiveOperation(BaseModel): + """Optional retroactive operation on the source brainstorm database. + + Proposed by the compiler submitter during paper writing and validated + independently from the paper operation. The validator sees ONLY the + brainstorm context when validating this, never the paper operation. + Each operation must be independently justified. + """ + action: Literal["edit", "delete", "add"] + submission_number: Optional[int] = None # Required for edit/delete, None for add + new_content: str = "" # Required for edit/add, empty for delete + reasoning: str # Independent justification (must not depend on paper operation) + + +CompilerSubmission.model_rebuild() + + class CompilerValidationResult(BaseModel): """Result of validation by compiler validator.""" submission_id: str @@ -238,7 +263,7 @@ class CompilerState(BaseModel): review_acceptances: int = 0 review_rejections: int = 0 review_declines: int = 0 - miniscule_edit_count: int = 0 + minuscule_edit_count: int = 0 in_critique_phase: bool = False critique_acceptances: int = 0 paper_version: int = 1 @@ -326,9 +351,16 @@ class TopicValidationResult(BaseModel): """Result of topic validation.""" decision: Literal["accept", "reject"] reasoning: str + summary: str = "" # Rejection feedback (max 750 chars) timestamp: datetime = Field(default_factory=datetime.now) +class BrainstormContinuationDecision(BaseModel): + """Decision on whether to write another paper from the same brainstorm or move on.""" + decision: Literal["write_another_paper", "move_on"] + reasoning: str + + class CompletionReviewResult(BaseModel): """Result of brainstorm completion review.""" decision: Literal["continue_brainstorm", "write_paper"] diff --git a/backend/shared/token_tracker.py b/backend/shared/token_tracker.py new file mode 100644 index 0000000..4505d9f --- /dev/null +++ b/backend/shared/token_tracker.py @@ -0,0 +1,85 @@ +""" +Token Tracker - Tracks cumulative input/output token usage across the session, +with per-model breakdown and a research timer. +""" +import logging +import time +from typing import Dict, Any, Optional + +logger = logging.getLogger(__name__) + + +class TokenTracker: + """ + Singleton that accumulates prompt_tokens and completion_tokens + from every successful LLM completion call, broken down by model. + Also provides a simple elapsed-time research timer. + """ + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._initialized = False + return cls._instance + + def __init__(self): + if self._initialized: + return + self._initialized = True + self._total_input = 0 + self._total_output = 0 + self._by_model: Dict[str, Dict[str, int]] = {} + self._start_time: Optional[float] = None + self._stopped_elapsed: float = 0.0 + logger.info("TokenTracker initialized") + + def track(self, model_id: str, prompt_tokens: int, completion_tokens: int) -> None: + """Record token usage for one successful API call.""" + self._total_input += prompt_tokens + self._total_output += completion_tokens + if model_id not in self._by_model: + self._by_model[model_id] = {"input": 0, "output": 0} + self._by_model[model_id]["input"] += prompt_tokens + self._by_model[model_id]["output"] += completion_tokens + + def start_timer(self) -> None: + """Start (or resume) the research timer.""" + if self._start_time is None: + self._start_time = time.time() + logger.info("TokenTracker timer started") + + def stop_timer(self) -> None: + """Pause the timer, preserving elapsed time so it can be resumed.""" + if self._start_time is not None: + self._stopped_elapsed += time.time() - self._start_time + self._start_time = None + logger.info(f"TokenTracker timer stopped (elapsed: {self._stopped_elapsed:.1f}s)") + + def get_elapsed_seconds(self) -> float: + """Return total elapsed seconds (running + previously stopped segments).""" + elapsed = self._stopped_elapsed + if self._start_time is not None: + elapsed += time.time() - self._start_time + return elapsed + + def get_stats(self) -> Dict[str, Any]: + """Return current cumulative stats for the frontend.""" + return { + "total_input": self._total_input, + "total_output": self._total_output, + "by_model": dict(self._by_model), + "elapsed_seconds": round(self.get_elapsed_seconds(), 1), + } + + def reset(self) -> None: + """Clear all counters and timer for a new session.""" + self._total_input = 0 + self._total_output = 0 + self._by_model.clear() + self._start_time = None + self._stopped_elapsed = 0.0 + logger.info("TokenTracker reset") + + +token_tracker = TokenTracker() diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 65a00b4..f90b2df 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1,12 +1,12 @@ { "name": "asi-aggregator-frontend", - "version": "1.0.4", + "version": "1.0.5", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "asi-aggregator-frontend", - "version": "1.0.4", + "version": "1.0.5", "license": "MIT", "dependencies": { "dompurify": "^3.2.4", @@ -1163,9 +1163,9 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", "engines": { @@ -1362,9 +1362,9 @@ } }, "node_modules/vite": { - "version": "7.3.0", - "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.0.tgz", - "integrity": "sha512-dZwN5L1VlUBewiP6H9s2+B3e3Jg96D0vzN+Ry73sOefebhYr9f94wwkMNN/9ouoU8pV1BqA1d1zGk8928cx0rg==", + "version": "7.3.2", + "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz", + "integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==", "dev": true, "license": "MIT", "dependencies": { diff --git a/frontend/package.json b/frontend/package.json index 2268980..62fb80b 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "asi-aggregator-frontend", - "version": "1.0.4", + "version": "1.0.5", "description": "Frontend UI for MOTO S.T.E.M. Mathematics Variant - Autonomous ASI Research System for Novel S.T.E.M. Mathematical Paper Generation", "author": "Intrafere LLC", "license": "MIT", diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 2a63048..9d32b8c 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -1,4 +1,4 @@ -import React, { useState, useEffect } from 'react'; +import React, { useState, useEffect, useRef, useCallback } from 'react'; import AggregatorInterface from './components/aggregator/AggregatorInterface'; import AggregatorSettings from './components/aggregator/AggregatorSettings'; import AggregatorLogs from './components/aggregator/AggregatorLogs'; @@ -11,6 +11,7 @@ import { AutonomousResearchInterface, BrainstormList, PaperLibrary, + Stage2PaperHistory, AutonomousResearchSettings, AutonomousResearchLogs, FinalAnswerView, @@ -19,23 +20,79 @@ import { import WorkflowPanel from './components/WorkflowPanel'; import BoostControlModal from './components/BoostControlModal'; import BoostLogs from './components/BoostLogs'; +import StartupProviderSetupModal from './components/StartupProviderSetupModal'; import OpenRouterApiKeyModal from './components/OpenRouterApiKeyModal'; import OpenRouterPrivacyWarningModal from './components/OpenRouterPrivacyWarningModal'; import CritiqueNotificationStack from './components/CritiqueNotificationStack'; +import CreditExhaustionNotificationStack from './components/CreditExhaustionNotificationStack'; +import HungConnectionNotificationStack from './components/HungConnectionNotificationStack'; import PaperCritiqueModal from './components/PaperCritiqueModal'; import { websocket } from './services/websocket'; import { api, autonomousAPI, openRouterAPI } from './services/api'; +import { + LM_STUDIO_STARTUP_CHOICE, + RECOMMENDED_PROFILE_KEY, + STARTUP_PROVIDER_CHOICE_STORAGE_KEY, + applyAutonomousProfileSelection, + applyLmStudioStartupDefaults, + getStoredAutonomousSettings, + settingsToAutonomousConfig, + persistAutonomousSettings, +} from './utils/autonomousProfiles'; + +const APP_MODE_STORAGE_KEY = 'appMode'; +const AUTONOMOUS_TAB_STORAGE_KEY = 'autonomousActiveTab'; +const MANUAL_TAB_STORAGE_KEY = 'manualActiveTab'; +const LEGACY_SINGLE_PAPER_WRITER_STORAGE_KEY = 'singlePaperWriterExpanded'; +const EMBEDDING_MODEL_HINTS = ['embed', 'embedding', 'nomic', 'bge', 'e5', 'gte']; + +function normalizeLoadedLmStudioModelId(modelId = '') { + return String(modelId).replace(/:\d+$/, ''); +} + +function isLikelyEmbeddingModel(modelId = '') { + const normalizedModelId = normalizeLoadedLmStudioModelId(modelId).toLowerCase(); + return EMBEDDING_MODEL_HINTS.some((hint) => normalizedModelId.includes(hint)); +} + +function getUsableLoadedLmStudioChatModelId(loadedModels = []) { + for (const loadedModelId of loadedModels) { + const normalizedModelId = normalizeLoadedLmStudioModelId(loadedModelId); + if (!normalizedModelId || isLikelyEmbeddingModel(normalizedModelId)) { + continue; + } + return normalizedModelId; + } + + return ''; +} function App() { - const [activeTab, setActiveTab] = useState('auto-interface'); - - // Single Paper Writer expandable section state - const [showSinglePaperWriter, setShowSinglePaperWriter] = useState(() => { - const saved = localStorage.getItem('singlePaperWriterExpanded'); - return saved ? JSON.parse(saved) : false; - }); + const [appMode, setAppMode] = useState(() => { + const savedMode = localStorage.getItem(APP_MODE_STORAGE_KEY); + if (savedMode === 'autonomous' || savedMode === 'manual') { + return savedMode; + } + + const legacyExpanded = localStorage.getItem(LEGACY_SINGLE_PAPER_WRITER_STORAGE_KEY); + if (!legacyExpanded) { + return 'autonomous'; + } - const [singlePaperWriterActiveTab, setSinglePaperWriterActiveTab] = useState('aggregator-interface'); + try { + return JSON.parse(legacyExpanded) ? 'manual' : 'autonomous'; + } catch { + return 'autonomous'; + } + }); + const [autonomousActiveTab, setAutonomousActiveTab] = useState( + () => localStorage.getItem(AUTONOMOUS_TAB_STORAGE_KEY) || 'auto-interface' + ); + const [manualActiveTab, setManualActiveTab] = useState( + () => localStorage.getItem(MANUAL_TAB_STORAGE_KEY) || 'aggregator-interface' + ); + const [utilityActiveTab, setUtilityActiveTab] = useState(null); + const activeTab = utilityActiveTab || (appMode === 'manual' ? manualActiveTab : autonomousActiveTab); // Models list (fetched from API) const [models, setModels] = useState([]); @@ -49,6 +106,15 @@ function App() { // LM Studio availability state (for determining default provider) const [lmStudioAvailable, setLmStudioAvailable] = useState(true); + const [lmStudioStatus, setLmStudioStatus] = useState({ + available: true, + has_models: false, + model_count: 0, + models: [], + error: null, + usable_chat_model_id: '', + has_usable_chat_model: false, + }); const [hasOpenRouterKey, setHasOpenRouterKey] = useState(false); // Track if any workflow is running (for WorkflowPanel visibility) @@ -59,6 +125,22 @@ function App() { const savedState = localStorage.getItem('workflow_panel_collapsed'); return savedState === 'true'; }); + + useEffect(() => { + localStorage.setItem(APP_MODE_STORAGE_KEY, appMode); + localStorage.setItem( + LEGACY_SINGLE_PAPER_WRITER_STORAGE_KEY, + JSON.stringify(appMode === 'manual') + ); + }, [appMode]); + + useEffect(() => { + localStorage.setItem(AUTONOMOUS_TAB_STORAGE_KEY, autonomousActiveTab); + }, [autonomousActiveTab]); + + useEffect(() => { + localStorage.setItem(MANUAL_TAB_STORAGE_KEY, manualActiveTab); + }, [manualActiveTab]); // Initialize config from localStorage or use defaults // CRITICAL: Read from 'aggregator_settings' (used by AggregatorSettings component) @@ -157,6 +239,9 @@ function App() { // Disclaimer modal state (shows on every app load) const [showDisclaimer, setShowDisclaimer] = useState(true); + const [showStartupSetupModal, setShowStartupSetupModal] = useState(false); + const [startupSetupMessage, setStartupSetupMessage] = useState(''); + const [checkingLmStudioStartupChoice, setCheckingLmStudioStartupChoice] = useState(false); // OpenRouter privacy warning modal state const [showPrivacyWarning, setShowPrivacyWarning] = useState(false); @@ -170,96 +255,40 @@ function App() { const [selectedCritiquePaper, setSelectedCritiquePaper] = useState(null); const [showCritiqueModal, setShowCritiqueModal] = useState(false); + // Credit exhaustion notification state (persistent until dismissed) + const [creditExhaustionNotifications, setCreditExhaustionNotifications] = useState([]); + + // Hung connection notification state (persistent until dismissed) + const [hungConnectionNotifications, setHungConnectionNotifications] = useState([]); + + // Live refs used by websocket listeners (which are registered once) + const autonomousRunningRef = useRef(autonomousRunning); + const autonomousTierRef = useRef(autonomousStatus?.current_tier || null); + const openRouterKeyJustSavedRef = useRef(false); + + useEffect(() => { + autonomousRunningRef.current = autonomousRunning; + }, [autonomousRunning]); + + useEffect(() => { + autonomousTierRef.current = autonomousStatus?.current_tier || null; + }, [autonomousStatus]); + // Autonomous config with localStorage persistence // CRITICAL: Read from 'autonomous_research_settings' (used by AutonomousResearchSettings component) const [autonomousConfig, setAutonomousConfig] = useState(() => { - // Try to load from the settings component key first - const settingsConfig = localStorage.getItem('autonomous_research_settings'); - if (settingsConfig) { - try { - const settings = JSON.parse(settingsConfig); - const localConfig = settings.localConfig || {}; - return { - submitter_configs: settings.submitterConfigs || [ - { submitterId: 1, provider: 'lm_studio', modelId: '', openrouterProvider: null, lmStudioFallbackId: null, contextWindow: 131072, maxOutputTokens: 25000 }, - { submitterId: 2, provider: 'lm_studio', modelId: '', openrouterProvider: null, lmStudioFallbackId: null, contextWindow: 131072, maxOutputTokens: 25000 }, - { submitterId: 3, provider: 'lm_studio', modelId: '', openrouterProvider: null, lmStudioFallbackId: null, contextWindow: 131072, maxOutputTokens: 25000 } - ], - validator_provider: localConfig.validator_provider, - validator_model: localConfig.validator_model, - validator_openrouter_provider: localConfig.validator_openrouter_provider, - validator_lm_studio_fallback: localConfig.validator_lm_studio_fallback, - validator_context_window: localConfig.validator_context_window, - validator_max_tokens: localConfig.validator_max_tokens, - high_context_provider: localConfig.high_context_provider, - high_context_model: localConfig.high_context_model, - high_context_openrouter_provider: localConfig.high_context_openrouter_provider, - high_context_lm_studio_fallback: localConfig.high_context_lm_studio_fallback, - high_context_context_window: localConfig.high_context_context_window, - high_context_max_tokens: localConfig.high_context_max_tokens, - high_param_provider: localConfig.high_param_provider, - high_param_model: localConfig.high_param_model, - high_param_openrouter_provider: localConfig.high_param_openrouter_provider, - high_param_lm_studio_fallback: localConfig.high_param_lm_studio_fallback, - high_param_context_window: localConfig.high_param_context_window, - high_param_max_tokens: localConfig.high_param_max_tokens, - critique_submitter_provider: localConfig.critique_submitter_provider, - critique_submitter_model: localConfig.critique_submitter_model, - critique_submitter_openrouter_provider: localConfig.critique_submitter_openrouter_provider, - critique_submitter_lm_studio_fallback: localConfig.critique_submitter_lm_studio_fallback, - critique_submitter_context_window: localConfig.critique_submitter_context_window, - critique_submitter_max_tokens: localConfig.critique_submitter_max_tokens, - tier3_enabled: settings.tier3Enabled ?? false - }; - } catch (e) { - console.error('Failed to parse autonomous_research_settings:', e); - } - } - - // Final fallback - use ACTUAL working defaults (OpenRouter API IDs) - return { - submitter_configs: [ - { submitterId: 1, provider: 'openrouter', modelId: 'openai/gpt-oss-120b', openrouterProvider: 'Google', lmStudioFallbackId: null, contextWindow: 131072, maxOutputTokens: 25000 }, - { submitterId: 2, provider: 'openrouter', modelId: 'openai/gpt-oss-20b', openrouterProvider: 'Groq', lmStudioFallbackId: null, contextWindow: 131072, maxOutputTokens: 25000 }, - { submitterId: 3, provider: 'openrouter', modelId: 'openai/gpt-oss-120b', openrouterProvider: 'Google', lmStudioFallbackId: null, contextWindow: 131072, maxOutputTokens: 25000 } - ], - validator_provider: 'openrouter', - validator_model: 'openai/gpt-oss-120b', - validator_openrouter_provider: 'Google', - validator_lm_studio_fallback: null, - validator_context_window: 131072, - validator_max_tokens: 25000, - high_context_provider: 'openrouter', - high_context_model: 'openai/gpt-oss-120b', - high_context_openrouter_provider: 'Google', - high_context_lm_studio_fallback: null, - high_context_context_window: 131072, - high_context_max_tokens: 25000, - high_param_provider: 'openrouter', - high_param_model: 'openai/gpt-oss-120b', - high_param_openrouter_provider: 'Google', - high_param_lm_studio_fallback: null, - high_param_context_window: 131072, - high_param_max_tokens: 25000, - critique_submitter_provider: 'openrouter', - critique_submitter_model: 'openai/gpt-oss-120b', - critique_submitter_openrouter_provider: 'Google', - critique_submitter_lm_studio_fallback: null, - critique_submitter_context_window: 131072, - critique_submitter_max_tokens: 25000, - tier3_enabled: false - }; + return settingsToAutonomousConfig(getStoredAutonomousSettings()); }); // Save autonomous config to localStorage - // CRITICAL: Save to BOTH keys to maintain backward compatibility useEffect(() => { - localStorage.setItem('autonomousConfig', JSON.stringify(autonomousConfig)); - // Also save to autonomous_research_settings in the format expected by AutonomousResearchSettings - const settingsToSave = { - numSubmitters: autonomousConfig.submitter_configs?.length || 3, - submitterConfigs: autonomousConfig.submitter_configs || [], + const existingSettings = getStoredAutonomousSettings(); + persistAutonomousSettings({ + ...existingSettings, + numSubmitters: autonomousConfig.submitter_configs?.length || existingSettings.numSubmitters || 3, + submitterConfigs: autonomousConfig.submitter_configs || existingSettings.submitterConfigs, localConfig: { + ...existingSettings.localConfig, validator_provider: autonomousConfig.validator_provider, validator_model: autonomousConfig.validator_model, validator_openrouter_provider: autonomousConfig.validator_openrouter_provider, @@ -283,67 +312,137 @@ function App() { critique_submitter_openrouter_provider: autonomousConfig.critique_submitter_openrouter_provider, critique_submitter_lm_studio_fallback: autonomousConfig.critique_submitter_lm_studio_fallback, critique_submitter_context_window: autonomousConfig.critique_submitter_context_window, - critique_submitter_max_tokens: autonomousConfig.critique_submitter_max_tokens + critique_submitter_max_tokens: autonomousConfig.critique_submitter_max_tokens, }, - freeOnly: false, // Default value - tier3Enabled: autonomousConfig.tier3_enabled ?? false - }; - localStorage.setItem('autonomous_research_settings', JSON.stringify(settingsToSave)); + tier3Enabled: autonomousConfig.tier3_enabled ?? existingSettings.tier3Enabled ?? false, + }); }, [autonomousConfig]); - // Check LM Studio availability and fetch models on mount + const syncProviderAvailability = useCallback(async () => { + let lmResult = { + available: false, + has_models: false, + model_count: 0, + models: [], + error: null, + }; + + try { + lmResult = await openRouterAPI.checkLMStudioAvailability(); + } catch (err) { + console.error('Failed to check LM Studio availability:', err); + lmResult = { + available: false, + has_models: false, + model_count: 0, + models: [], + error: err.message || 'Failed to check LM Studio availability.', + }; + } + + const usableLmStudioChatModelId = getUsableLoadedLmStudioChatModelId(lmResult.models || []); + const hasUsableLmStudioChatModel = Boolean(usableLmStudioChatModelId); + const lmAvailable = Boolean(lmResult.available && lmResult.has_models); + setLmStudioStatus({ + ...lmResult, + usable_chat_model_id: usableLmStudioChatModelId, + has_usable_chat_model: hasUsableLmStudioChatModel, + }); + setLmStudioAvailable(lmAvailable); + + let keyStatus = { has_key: false }; + try { + keyStatus = await openRouterAPI.getApiKeyStatus(); + } catch (err) { + console.error('Failed to check OpenRouter key status:', err); + } + + let finalHasOpenRouterKey = Boolean(keyStatus.has_key); + const storedKey = localStorage.getItem('openrouter_api_key'); + if (storedKey && !finalHasOpenRouterKey) { + try { + await openRouterAPI.setApiKey(storedKey); + finalHasOpenRouterKey = true; + } catch (err) { + console.error('Failed to restore OpenRouter key:', err); + localStorage.removeItem('openrouter_api_key'); + } + } + setHasOpenRouterKey(finalHasOpenRouterKey); + + let availableModels = []; + if (lmAvailable) { + try { + const data = await api.getModels(); + availableModels = data.models || data || []; + setModels(availableModels); + } catch (err) { + console.error('Failed to fetch LM Studio models:', err); + setModels([]); + } + } else { + setModels([]); + } + + return { + lmAvailable, + hasOpenRouterKey: finalHasOpenRouterKey, + hasUsableLmStudioChatModel, + lmStudioStatus: { + ...lmResult, + usable_chat_model_id: usableLmStudioChatModelId, + has_usable_chat_model: hasUsableLmStudioChatModel, + }, + defaultLmStudioModelId: usableLmStudioChatModelId, + }; + }, []); + + useEffect(() => { + syncProviderAvailability(); + }, [syncProviderAvailability]); + useEffect(() => { - const checkAvailability = async () => { + const restoreWolframKey = async () => { + const storedWolframKey = localStorage.getItem('wolfram_alpha_api_key'); + if (!storedWolframKey) { + return; + } + + try { + await api.setWolframApiKey(storedWolframKey); + } catch (err) { + console.error('Failed to restore Wolfram Alpha key:', err); + localStorage.removeItem('wolfram_alpha_api_key'); + } + }; + + restoreWolframKey(); + }, []); + + // Periodically re-check OpenRouter key status to keep indicator in sync + useEffect(() => { + const interval = setInterval(async () => { try { - // Check LM Studio availability - const lmResult = await openRouterAPI.checkLMStudioAvailability(); - const lmAvailable = lmResult.available && lmResult.has_models; - setLmStudioAvailable(lmAvailable); - - // Check if OpenRouter API key is configured const keyStatus = await openRouterAPI.getApiKeyStatus(); setHasOpenRouterKey(keyStatus.has_key); - - // Also check localStorage for saved key and sync with backend - const storedKey = localStorage.getItem('openrouter_api_key'); - if (storedKey && !keyStatus.has_key) { - // Restore key to backend from localStorage - try { - await openRouterAPI.setApiKey(storedKey); - setHasOpenRouterKey(true); - } catch (err) { - console.error('Failed to restore OpenRouter key:', err); - localStorage.removeItem('openrouter_api_key'); + + if (!keyStatus.has_key) { + const storedKey = localStorage.getItem('openrouter_api_key'); + if (storedKey) { + try { + await openRouterAPI.setApiKey(storedKey); + setHasOpenRouterKey(true); + } catch { + // Silent retry next interval + } } } - - // If LM Studio not available and no OpenRouter key, prompt for key - if (!lmAvailable && !keyStatus.has_key && !storedKey) { - console.log('LM Studio not available, prompting for OpenRouter API key...'); - setOpenRouterKeyReason('lm_studio_unavailable'); - setShowOpenRouterKeyModal(true); - } - - // Fetch LM Studio models if available - if (lmAvailable) { - api.getModels().then(data => { - setModels(data.models || data); - }).catch(err => { - console.error('Failed to fetch LM Studio models:', err); - }); - } - } catch (err) { - console.error('Failed to check availability:', err); - // Fallback to fetching models directly - api.getModels().then(data => { - setModels(data.models || data); - }).catch(modelErr => { - console.error('Failed to fetch models:', modelErr); - }); + } catch { + // Backend unreachable, skip this cycle } - }; - - checkAvailability(); + }, 30000); + + return () => clearInterval(interval); }, []); // Check autonomous research status on mount (handles page refresh while running) @@ -399,6 +498,86 @@ function App() { const addActivity = (event) => { setAutonomousActivity(prev => [...prev, event].slice(-MAX_ACTIVITY_EVENTS)); }; + const isAutonomousTier2Active = () => + autonomousRunningRef.current && autonomousTierRef.current === 'tier2_paper_writing'; + const formatCompilerMode = (mode) => { + switch (mode) { + case 'outline_create': + return 'Outline creation'; + case 'construction': + return 'Construction'; + case 'outline_update': + return 'Outline update'; + case 'review': + return 'Review'; + case 'rigor': + return 'Rigor'; + default: + return mode || 'Compiler'; + } + }; + const formatReason = (reasoning, maxLen = 140) => { + if (!reasoning) return ''; + const cleaned = String(reasoning).replace(/\s+/g, ' ').trim(); + if (!cleaned) return ''; + return cleaned.length > maxLen ? `${cleaned.slice(0, maxLen)}...` : cleaned; + }; + + // Topic exploration events (pre-brainstorm candidate collection) + unsubscribers.push(websocket.on('topic_exploration_started', (data) => { + addActivity({ + event: 'topic_exploration_started', + timestamp: new Date().toISOString(), + message: `Topic exploration started (target: ${data.target || 5} candidates${data.resumed_count ? `, resuming with ${data.resumed_count}` : ''})`, + data + }); + })); + + unsubscribers.push(websocket.on('topic_exploration_progress', (data) => { + addActivity({ + event: 'topic_exploration_progress', + timestamp: new Date().toISOString(), + message: `Exploration candidate ${data.accepted}/${data.target} accepted: ${data.latest_question ? data.latest_question.substring(0, 100) + '...' : ''}`, + data + }); + })); + + unsubscribers.push(websocket.on('topic_exploration_complete', (data) => { + addActivity({ + event: 'topic_exploration_complete', + timestamp: new Date().toISOString(), + message: `Topic exploration complete: ${data.accepted_count} candidates collected from ${data.total_attempts} attempts`, + data + }); + })); + + // Paper title exploration events (pre-title-selection candidate collection) + unsubscribers.push(websocket.on('paper_title_exploration_started', (data) => { + addActivity({ + event: 'paper_title_exploration_started', + timestamp: new Date().toISOString(), + message: `Title exploration started (target: ${data.target || 5} candidate titles)`, + data + }); + })); + + unsubscribers.push(websocket.on('paper_title_exploration_progress', (data) => { + addActivity({ + event: 'paper_title_exploration_progress', + timestamp: new Date().toISOString(), + message: `Title candidate ${data.accepted}/${data.target} accepted`, + data + }); + })); + + unsubscribers.push(websocket.on('paper_title_exploration_complete', (data) => { + addActivity({ + event: 'paper_title_exploration_complete', + timestamp: new Date().toISOString(), + message: `Title exploration complete: ${data.accepted_count} candidates collected from ${data.total_attempts} attempts`, + data + }); + })); // Topic selection events unsubscribers.push(websocket.on('topic_selected', (data) => { @@ -470,6 +649,7 @@ function App() { // Paper events unsubscribers.push(websocket.on('paper_writing_started', (data) => { + autonomousTierRef.current = 'tier2_paper_writing'; addActivity({ event: 'paper_writing_started', timestamp: new Date().toISOString(), @@ -477,6 +657,54 @@ function App() { data }); })); + + // Compiler writing activity events (Tier 2 paper writing internals) + unsubscribers.push(websocket.on('compiler_acceptance', (data) => { + if (!isAutonomousTier2Active()) return; + const modeLabel = formatCompilerMode(data.mode); + const iterationSuffix = data.iteration ? ` (iteration ${data.iteration})` : ''; + addActivity({ + event: 'compiler_acceptance', + timestamp: new Date().toISOString(), + message: `${modeLabel}: ✓ ACCEPTED${iterationSuffix}`, + data + }); + })); + + unsubscribers.push(websocket.on('compiler_rejection', (data) => { + if (!isAutonomousTier2Active()) return; + const modeLabel = formatCompilerMode(data.mode); + const iterationSuffix = data.iteration ? ` (iteration ${data.iteration})` : ''; + const reason = formatReason(data.reasoning); + addActivity({ + event: 'compiler_rejection', + timestamp: new Date().toISOString(), + message: `${modeLabel}: ✗ REJECTED${iterationSuffix}${reason ? ` - ${reason}` : ''}`, + data + }); + })); + + unsubscribers.push(websocket.on('compiler_decline', (data) => { + if (!isAutonomousTier2Active()) return; + const modeLabel = formatCompilerMode(data.mode); + const reason = formatReason(data.reasoning, 100); + addActivity({ + event: 'compiler_decline', + timestamp: new Date().toISOString(), + message: `${modeLabel}: ↷ DECLINED${reason ? ` - ${reason}` : ''}`, + data + }); + })); + + unsubscribers.push(websocket.on('outline_locked', (data) => { + if (!isAutonomousTier2Active()) return; + addActivity({ + event: 'outline_locked', + timestamp: new Date().toISOString(), + message: `Outline locked after ${data.total_iterations || data.iteration || '?'} iteration(s)`, + data + }); + })); // Critique phase events (paper writing substages) unsubscribers.push(websocket.on('critique_phase_started', (data) => { @@ -578,6 +806,9 @@ function App() { // Handle resume after crash/restart - sync running state console.log('Autonomous research resumed:', data); setAutonomousRunning(true); + if (data?.tier) { + autonomousTierRef.current = data.tier; + } addActivity({ event: 'auto_research_resumed', timestamp: new Date().toISOString(), @@ -592,10 +823,13 @@ function App() { unsubscribers.push(websocket.on('auto_research_stopped', () => { setAutonomousRunning(false); + autonomousTierRef.current = null; + setHungConnectionNotifications([]); })); // Tier 3 events unsubscribers.push(websocket.on('tier3_started', (data) => { + autonomousTierRef.current = 'tier3_final_answer'; addActivity({ event: 'tier3_started', timestamp: new Date().toISOString(), @@ -701,6 +935,7 @@ function App() { // Paper writing resumed (after crash recovery) unsubscribers.push(websocket.on('paper_writing_resumed', (data) => { + autonomousTierRef.current = 'tier2_paper_writing'; addActivity({ event: 'paper_writing_resumed', timestamp: new Date().toISOString(), @@ -847,6 +1082,111 @@ function App() { message: `❌ Account free credits depleted: ${data.message}`, ...data }); + setCreditExhaustionNotifications(prev => { + const roleId = data.role_id || 'Account'; + if (prev.some(n => n.role_id === roleId && n.reason === 'account_credits_exhausted')) return prev; + return [...prev, { + id: `account_exhausted_${Date.now()}`, + role_id: roleId, + reason: 'account_credits_exhausted', + message: data.message || 'Account free credits depleted.', + timestamp: new Date().toISOString() + }]; + }); + })); + + // OpenRouter fallback event (credit exhaustion triggered fallback to LM Studio) + unsubscribers.push(websocket.on('openrouter_fallback', (data) => { + console.warn('OpenRouter fallback triggered:', data); + addActivity({ + event: 'openrouter_fallback', + timestamp: new Date().toISOString(), + message: `⚠️ OpenRouter credits exhausted for ${data.role_id} — fell back to ${data.fallback_model || 'LM Studio'}`, + ...data + }); + setCreditExhaustionNotifications(prev => { + const reason = data.reason || 'credit_exhaustion'; + if (prev.some(n => n.role_id === data.role_id && n.reason === reason)) return prev; + return [...prev, { + id: `fallback_${data.role_id}_${Date.now()}`, + role_id: data.role_id, + reason, + message: data.message, + fallback_model: data.fallback_model, + timestamp: new Date().toISOString() + }]; + }); + })); + + // OpenRouter fallback failed (no fallback configured — role stopped) + unsubscribers.push(websocket.on('openrouter_fallback_failed', (data) => { + console.error('OpenRouter fallback failed:', data); + addActivity({ + event: 'openrouter_fallback_failed', + timestamp: new Date().toISOString(), + message: `🛑 OpenRouter credits exhausted for ${data.role_id} — NO FALLBACK configured!`, + ...data + }); + setCreditExhaustionNotifications(prev => { + if (prev.some(n => n.role_id === data.role_id && n.reason === 'no_fallback_configured')) return prev; + return [...prev, { + id: `fallback_failed_${data.role_id}_${Date.now()}`, + role_id: data.role_id, + reason: 'no_fallback_configured', + message: data.message, + timestamp: new Date().toISOString() + }]; + }); + })); + + // Boost credits exhausted + unsubscribers.push(websocket.on('boost_credits_exhausted', (data) => { + console.warn('Boost credits exhausted:', data); + addActivity({ + event: 'boost_credits_exhausted', + timestamp: new Date().toISOString(), + message: `⚠️ Boost credits exhausted for task ${data.task_id}`, + ...data + }); + setCreditExhaustionNotifications(prev => { + if (prev.some(n => n.reason === 'boost_credits_exhausted')) return prev; + return [...prev, { + id: `boost_exhausted_${Date.now()}`, + role_id: `Boost (${data.task_id || 'unknown'})`, + reason: 'boost_credits_exhausted', + message: data.message || 'Boost API credits exhausted. Falling back to primary model.', + timestamp: new Date().toISOString() + }]; + }); + })); + + unsubscribers.push(websocket.on('openrouter_fallbacks_reset', (data) => { + console.info('OpenRouter fallbacks reset:', data); + addActivity({ + event: 'openrouter_fallbacks_reset', + timestamp: new Date().toISOString(), + message: `OpenRouter reset: ${data.message}`, + ...data + }); + setCreditExhaustionNotifications([]); + setHungConnectionNotifications([]); + })); + + unsubscribers.push(websocket.on('hung_connection_alert', (data) => { + console.warn('Hung connection alert:', data); + addLog({ + type: 'warning', + message: `⏳ Possible hung connection: ${data.model} via ${data.provider} (${data.elapsed_minutes}+ min)`, + ...data + }); + setHungConnectionNotifications(prev => { + if (prev.some(n => n.role_id === data.role_id)) return prev; + return [...prev, { + id: `hung_${data.role_id}_${Date.now()}`, + ...data, + timestamp: Date.now() + }]; + }); })); unsubscribers.push(websocket.on('final_answer_complete', (data) => { @@ -1001,7 +1341,7 @@ function App() { setAutonomousRunning(true); setAutonomousActivity([]); } catch (error) { - alert(`Failed to start autonomous research: ${error.message}`); + alert(`Failed to start autonomous research: ${error.details || error.message}`); } }; @@ -1061,7 +1401,7 @@ function App() { // Determine Final Answer tab label based on Tier 3 status const getFinalAnswerLabel = () => { if (autonomousStatus?.is_tier3_active) { - return 'Stage 3:FINAL ANSWER IN PROGRESS'; + return 'Autonomous Stage 3: FINAL ANSWER IN PROGRESS'; } if (autonomousStatus?.tier3_status === 'complete') { return 'Stage 3: FINAL ANSWER COMPLETE ✓'; @@ -1083,6 +1423,41 @@ function App() { setShowCritiqueModal(false); setSelectedCritiquePaper(null); }; + + const handleModeChange = (nextMode) => { + setAppMode(nextMode); + setUtilityActiveTab(null); + }; + + const handleAutonomousTabSelect = (tabId) => { + setAutonomousActiveTab(tabId); + setUtilityActiveTab(null); + if (appMode !== 'autonomous') { + setAppMode('autonomous'); + } + }; + + const handleManualTabSelect = (tabId) => { + setManualActiveTab(tabId); + setUtilityActiveTab(null); + if (appMode !== 'manual') { + setAppMode('manual'); + } + }; + + const handleUtilityTabSelect = (tabId) => { + setUtilityActiveTab(tabId); + }; + + // Credit exhaustion notification handler + const handleDismissCreditNotification = (notificationId) => { + setCreditExhaustionNotifications(prev => prev.filter(n => n.id !== notificationId)); + }; + + // Hung connection notification handler + const handleDismissHungNotification = (notificationId) => { + setHungConnectionNotifications(prev => prev.filter(n => n.id !== notificationId)); + }; // Critique modal API functions const handleGenerateCritique = async (customPrompt, validatorConfig) => { @@ -1103,35 +1478,126 @@ function App() { return response; }; + const handleDisclaimerAcknowledge = async () => { + setShowDisclaimer(false); + setStartupSetupMessage(''); + + const { + lmAvailable, + hasOpenRouterKey: keyPresent, + hasUsableLmStudioChatModel, + } = await syncProviderAvailability(); + if (keyPresent) { + return; + } + + const startupChoice = localStorage.getItem(STARTUP_PROVIDER_CHOICE_STORAGE_KEY); + if (startupChoice === LM_STUDIO_STARTUP_CHOICE && lmAvailable && hasUsableLmStudioChatModel) { + return; + } + + if (startupChoice === LM_STUDIO_STARTUP_CHOICE && (!lmAvailable || !hasUsableLmStudioChatModel)) { + setStartupSetupMessage( + 'LM Studio was previously selected, but it is not fully ready. Start LM Studio, load nomic-ai/nomic-embed-text-v1.5 and at least one usable local chat model, then try again.' + ); + } + + setShowStartupSetupModal(true); + }; + + const handleStartupOpenRouterChoice = () => { + setStartupSetupMessage(''); + setShowStartupSetupModal(false); + setOpenRouterKeyReason('startup_setup'); + setShowOpenRouterKeyModal(true); + }; + + const handleCloseOpenRouterKeyModal = () => { + const keyWasJustSaved = openRouterKeyJustSavedRef.current; + const shouldReturnToStartup = openRouterKeyReason === 'startup_setup' && !keyWasJustSaved && !hasOpenRouterKey; + openRouterKeyJustSavedRef.current = false; + setShowOpenRouterKeyModal(false); + + if (shouldReturnToStartup) { + setShowStartupSetupModal(true); + } + }; + + const handleStartupLmStudioChoice = async () => { + setCheckingLmStudioStartupChoice(true); + setStartupSetupMessage(''); + + try { + const { lmAvailable, hasUsableLmStudioChatModel, defaultLmStudioModelId } = await syncProviderAvailability(); + + if (!lmAvailable) { + setStartupSetupMessage( + 'LM Studio is not detected with a loaded model yet. Install LM Studio, start the local server, load nomic-ai/nomic-embed-text-v1.5, and then try again.' + ); + return; + } + + if (!hasUsableLmStudioChatModel || !defaultLmStudioModelId) { + setStartupSetupMessage( + 'LM Studio is running, but no usable chat model is currently loaded. Load at least one local chat model in addition to nomic-ai/nomic-embed-text-v1.5, then try again.' + ); + return; + } + + const { config: nextAutonomousConfig } = applyLmStudioStartupDefaults(defaultLmStudioModelId); + setAutonomousConfig(nextAutonomousConfig); + localStorage.setItem(STARTUP_PROVIDER_CHOICE_STORAGE_KEY, LM_STUDIO_STARTUP_CHOICE); + setShowStartupSetupModal(false); + } finally { + setCheckingLmStudioStartupChoice(false); + } + }; + + const handleOpenRouterKeySet = async () => { + if (openRouterKeyReason === 'startup_setup') { + const { config: nextAutonomousConfig } = await applyAutonomousProfileSelection(RECOMMENDED_PROFILE_KEY); + setAutonomousConfig(nextAutonomousConfig); + setShowStartupSetupModal(false); + setStartupSetupMessage(''); + } + + openRouterKeyJustSavedRef.current = true; + setHasOpenRouterKey(true); + console.log('OpenRouter API key set successfully'); + }; + const mainTabs = [ { id: 'auto-interface', label: 'Start Here: Autonomous Deep Research Controller', group: 'autonomous-main' }, - { id: 'auto-brainstorms', label: 'Stage 1: Brainstorms', group: 'autonomous-main' }, - { id: 'auto-papers', label: 'Stage 2: Short-Form Final Answer(s)', subtext: '(Less Hallucinatory - Short-Form Final Answers)', subtextClass: 'green', group: 'autonomous-main' }, + { id: 'auto-brainstorms', label: 'Autonomous Stage 1: Brainstorms', group: 'autonomous-main' }, + { id: 'auto-papers', label: 'Autonomous Stage 2: Papers', subtext: '(Less Hallucinatory - Recommended Output)', subtextClass: 'green', group: 'autonomous-main' }, ...(autonomousConfig.tier3_enabled ? [ { id: 'auto-final-answer', label: getFinalAnswerLabel(), subtext: '(Very Experimental and Hallucinatory)', group: 'autonomous-main' }, ] : []), ]; const autonomousSettingsTabs = [ - { id: 'auto-final-answer-library', label: 'Long-Form Final Answer History', subtext: '(Very Experimental and Hallucinatory)', group: 'autonomous-settings' }, + { id: 'auto-stage2-history', label: 'Stage 2 Final Answers History', group: 'autonomous-settings' }, + { id: 'auto-final-answer-library', label: 'Stage 3 Final Answers History', subtext: '(Very Experimental and Hallucinatory)', group: 'autonomous-settings' }, { id: 'auto-logs', label: 'API Call Logs', group: 'autonomous-settings' }, { id: 'auto-settings', label: 'Autonomous Model Selection & Settings', group: 'autonomous-settings' }, ]; - const singlePaperWriterTabs = { - aggregator: [ - { id: 'aggregator-interface', label: 'Interface' }, - { id: 'aggregator-settings', label: 'Settings' }, - { id: 'aggregator-logs', label: 'Logs' }, - { id: 'aggregator-results', label: 'Live Results' }, - ], - compiler: [ - { id: 'compiler-interface', label: 'Interface' }, - { id: 'compiler-settings', label: 'Settings' }, - { id: 'compiler-logs', label: 'Logs' }, - { id: 'compiler-live-paper', label: 'Live Paper' }, - ] - }; + const manualTabs = [ + { id: 'aggregator-interface', label: 'Aggregator', subtext: 'Part 1', subtextClass: 'green', group: 'aggregator' }, + { id: 'aggregator-settings', label: 'Aggregator Settings', group: 'aggregator' }, + { id: 'aggregator-logs', label: 'Aggregator Logs', group: 'aggregator' }, + { id: 'aggregator-results', label: 'Live Results', subtext: 'Part 1 Live Results', subtextClass: 'green', group: 'aggregator' }, + { id: 'compiler-interface', label: 'Compiler', subtext: 'Part 2', subtextClass: 'green', group: 'compiler' }, + { id: 'compiler-settings', label: 'Compiler Settings', group: 'compiler' }, + { id: 'compiler-logs', label: 'Compiler Logs', group: 'compiler' }, + { id: 'compiler-live-paper', label: 'Live Paper', subtext: 'Part 2 Live Results', subtextClass: 'green', group: 'compiler' }, + ]; + + useEffect(() => { + if (!autonomousConfig.tier3_enabled && autonomousActiveTab === 'auto-final-answer') { + setAutonomousActiveTab('auto-interface'); + } + }, [autonomousConfig.tier3_enabled, autonomousActiveTab]); // Sync with WorkflowPanel collapse state (stored in localStorage) useEffect(() => { @@ -1177,10 +1643,10 @@ function App() {

M.O.T.O. - Deep Research Harness + Autonomous ASI

-

A Prototype Super Intelligence - Creative Math Researcher Variant for S.T.E.M. (High Risk, High Reward Outputs)

By Intrafere Research Group

+

A Prototype Artificial Superintelligence - Novelty Seeking Autonomous S.T.E.M. Researcher For Automated Theorem Generation

@@ -1189,6 +1655,20 @@ function App() { {/* They are visible at program launch and stay visible forever */} {/* Slide with WorkflowPanel collapse/expand animation */}
+
+ + +
-
- {mainTabs.map((tab, index) => { - const prevTab = mainTabs[index - 1]; - const showSeparator = prevTab && prevTab.group !== tab.group; - - // Special styling for Final Answer tab - const isFinalAnswerTab = tab.id === 'auto-final-answer'; - const tier3Classes = isFinalAnswerTab - ? (autonomousStatus?.tier3_status === 'complete' - ? 'tab-tier3-complete' - : (autonomousStatus?.is_tier3_active ? 'tab-tier3-active' : '')) - : ''; - - return ( - - {showSeparator &&
} - - - ); - })} - - {/* Large spacer for settings group */} -
- - {autonomousSettingsTabs.map(tab => { - return ( - - - - ); - })} -
- - {/* Expandable Single Paper Writer Section */} -
- - - {showSinglePaperWriter && ( -
-
-
AGGREGATOR
-
- {singlePaperWriterTabs.aggregator.map(tab => ( +
+ {appMode === 'autonomous' ? ( + <> + {mainTabs.map((tab, index) => { + const prevTab = mainTabs[index - 1]; + const showSeparator = prevTab && prevTab.group !== tab.group; + + // Special styling for Final Answer tab + const isFinalAnswerTab = tab.id === 'auto-final-answer'; + const tier3Classes = isFinalAnswerTab + ? (autonomousStatus?.tier3_status === 'complete' + ? 'tab-tier3-complete' + : (autonomousStatus?.is_tier3_active ? 'tab-tier3-active' : '')) + : ''; + + return ( + + {showSeparator &&
} - ))} -
-
+ + ); + })} + + {/* Large spacer for settings group */} +
-
-
COMPILER
-
- {singlePaperWriterTabs.compiler.map(tab => ( + {autonomousSettingsTabs.map(tab => { + return ( + - ))} -
-
-
+ + ); + })} + + ) : ( + <> + {manualTabs.map((tab, index) => { + const prevTab = manualTabs[index - 1]; + const showSeparator = prevTab && prevTab.group !== tab.group; + + return ( + + {showSeparator &&
} + + + ); + })} + )}
@@ -1383,6 +1823,7 @@ function App() { {activeTab === 'auto-interface' && ( )} + {activeTab === 'auto-stage2-history' && ( + { + await Promise.all([refreshPapers(), refreshBrainstorms()]); + }} + /> + )} {activeTab === 'auto-final-answer-library' && ( )} @@ -1431,20 +1879,28 @@ function App() { )} {activeTab === 'boost-logs' && } - {/* Single Paper Writer Content - ONLY when section is expanded */} - {showSinglePaperWriter && singlePaperWriterActiveTab === 'aggregator-interface' && ( - + {activeTab === 'aggregator-interface' && ( + )} - {showSinglePaperWriter && singlePaperWriterActiveTab === 'aggregator-settings' && ( + {activeTab === 'aggregator-settings' && ( )} - {showSinglePaperWriter && singlePaperWriterActiveTab === 'aggregator-logs' && } - {showSinglePaperWriter && singlePaperWriterActiveTab === 'aggregator-results' && } + {activeTab === 'aggregator-logs' && } + {activeTab === 'aggregator-results' && } - {showSinglePaperWriter && singlePaperWriterActiveTab === 'compiler-interface' && } - {showSinglePaperWriter && singlePaperWriterActiveTab === 'compiler-settings' && } - {showSinglePaperWriter && singlePaperWriterActiveTab === 'compiler-logs' && } - {showSinglePaperWriter && singlePaperWriterActiveTab === 'compiler-live-paper' && } + {activeTab === 'compiler-interface' && ( + + )} + {activeTab === 'compiler-settings' && } + {activeTab === 'compiler-logs' && } + {activeTab === 'compiler-live-paper' && }
@@ -1469,32 +1925,62 @@ function App() {
e.stopPropagation()} />
-

- In-Development Program Disclaimer +

+ Disclaimer & Quickstart

-

- Disclaimer: This program is a prototype super intelligence and is actively in development. MOTO operates by forcing your selected AI to attempt to output novel solutions toward your user prompt. Quality, correctness or any other aspects of a given solution are not guaranteed and should be examined with care and scrutiny. MOTO is not meant to produce a single paper, the first paper may lack in quality, MOTO is intended to generate many papers and improve with each completely new paper, best results show after 10+ papers. - Monitor the harness, logs and API keys for infinite loops, wasted API calls, and any other bugs. The paper text rendering system is experimental—display issues are not reflective of paper quality. If formatting appears messy, try a 3rd-party LaTeX renderer or copy the raw text into another LLM chat for verification. -

-

- QUICKSTART: (Optional) Load your Nomic embedding agent on LM STUDIO, or use an OpenRouter API key-only instead of LM STUDIO and go straight to picking your models, and then start the program - expect it to run for at the VERY LEAST hours to days once you hit run. You must leave your PC on and awake during runtime. -

-

- Please report all bugs and issues to project the repo at GitHub. -

-

- Trouble shoot and modify this program easily using the code's specialized rules for AIs and Cursor.com's agentic code editing app - no programming experience required! +

+ QUICKSTART: In LM Studio, load the embedding model nomic-ai/nomic-embed-text-v1.5 by Nomic AI (optional but recommended), or use only an OpenRouter API key instead of LM Studio. You must leave your PC on and awake during runtime, the program will often run for days without interruption.

+
+

+ Legal Disclaimer +

+

+ MOTO is an experimental prototype system and remains under active development. It directs selected AI models to generate novel solution attempts in response to your prompt. Outputs may be incorrect, incomplete, misleading, fabricated, poorly reasoned, or otherwise unsuitable for reliance without independent review, especially for high-stakes, academic, financial, legal, medical, engineering, or operational use. +
+
+ This software and all generated content are provided as-is and at your own risk. By using MOTO, you acknowledge that you are solely responsible for reviewing, validating, and deciding how to use any output, and that the developers, operators, and contributors are not responsible or liable for incorrect solutions, hallucinations, omissions, formatting issues, infinite loops, wasted API calls, model or provider failures, data loss, third-party charges, or any direct or indirect loss, damage, cost, or liability resulting from use of the program or its outputs. +

+
)} + + {/* Boost Control Modal */} setShowOpenRouterKeyModal(false)} - onKeySet={(key) => { - setHasOpenRouterKey(true); - console.log('OpenRouter API key set successfully'); - }} + onClose={handleCloseOpenRouterKeyModal} + onKeySet={handleOpenRouterKeySet} reason={openRouterKeyReason} /> @@ -1527,6 +2010,19 @@ function App() { onClickNotification={handleClickNotification} /> + {/* Credit Exhaustion Notification Stack - Persists until user dismisses */} + setCreditExhaustionNotifications([])} + /> + + {/* Hung Connection Notification Stack - Persists until user dismisses */} + + {/* Critique Modal - Opens when notification is clicked */} {showCritiqueModal && selectedCritiquePaper && ( ℹ️ - About M.O.T.O. + Read More About MOTO ASI MOTO News and Updates + + Purchase a Custom ASI Program + + + + Star Us on GitHub for More ASI Programs +
diff --git a/frontend/src/components/BoostControlModal.css b/frontend/src/components/BoostControlModal.css index 8c296bb..fbb5939 100644 --- a/frontend/src/components/BoostControlModal.css +++ b/frontend/src/components/BoostControlModal.css @@ -4,23 +4,24 @@ left: 0; right: 0; bottom: 0; - background: rgba(0, 0, 0, 0.7); + background: rgba(0, 0, 0, 0.65); display: flex; align-items: center; justify-content: center; z-index: 2000; + backdrop-filter: blur(4px); } .boost-modal { - background: #1a1a1a; - border: 1px solid #333; - border-radius: 8px; + background: var(--surface-2, #1a1a22); + border: 1px solid var(--border-subtle, rgba(255,255,255,0.07)); + border-radius: var(--radius-lg, 14px); width: 90%; max-width: 600px; max-height: 90vh; display: flex; flex-direction: column; - box-shadow: 0 10px 40px rgba(0, 0, 0, 0.5); + box-shadow: var(--shadow-lg, 0 10px 40px rgba(0, 0, 0, 0.5)); } .modal-header { @@ -28,20 +29,21 @@ justify-content: space-between; align-items: center; padding: 1.5rem; - border-bottom: 1px solid #333; - background: #0f0f0f; + border-bottom: 1px solid var(--border-subtle, rgba(255,255,255,0.07)); + background: var(--surface-0, #0b0b0f); + border-radius: var(--radius-lg, 14px) var(--radius-lg, 14px) 0 0; } .modal-header h2 { margin: 0; font-size: 1.5rem; - color: #ffd700; + color: var(--gold, #1eff1c); } .close-btn { background: none; border: none; - color: #888; + color: var(--text-muted, #666); font-size: 2rem; cursor: pointer; padding: 0; @@ -54,7 +56,7 @@ } .close-btn:hover { - color: #fff; + color: var(--text-primary, #e0e0e0); } .modal-body { @@ -76,14 +78,14 @@ } .boost-status-banner.enabled { - background: linear-gradient(135deg, #ffd700 0%, #ffed4e 100%); + background: linear-gradient(135deg, #1eff1c 0%, #7dff6f 100%); color: #000; } .boost-status-banner.disabled { - background: #2a2a2a; - color: #888; - border: 1px solid #444; + background: var(--surface-3, #22222c); + color: var(--text-muted, #666); + border: 1px solid var(--border-default, rgba(255,255,255,0.1)); } .boost-model { @@ -108,41 +110,42 @@ border-radius: 4px; } -.form-group { +.boost-form-group { margin-bottom: 1.5rem; } -.form-group label { +.boost-form-group label { display: block; margin-bottom: 0.5rem; font-weight: 600; color: #fff; } -.form-group input, -.form-group select { +.boost-form-group input, +.boost-form-group select { width: 100%; padding: 0.75rem; - background: #2a2a2a; - border: 1px solid #444; - border-radius: 4px; - color: #fff; + background: var(--surface-3, #22222c); + border: 1px solid var(--border-default, rgba(255,255,255,0.1)); + border-radius: var(--radius-sm, 6px); + color: var(--text-primary, #e0e0e0); font-size: 1rem; } -.form-group input:focus, -.form-group select:focus { +.boost-form-group input:focus, +.boost-form-group select:focus { outline: none; - border-color: #ffd700; + border-color: var(--gold, #1eff1c); + box-shadow: 0 0 0 3px var(--gold-glow, rgba(30, 255, 28, 0.12)); } -.form-group input:disabled, -.form-group select:disabled { +.boost-form-group input:disabled, +.boost-form-group select:disabled { opacity: 0.5; cursor: not-allowed; } -.form-group small { +.boost-form-group small { display: block; margin-top: 0.5rem; color: #888; @@ -155,13 +158,13 @@ gap: 1rem; } -.button-group { +.boost-button-group { display: flex; gap: 0.75rem; margin-bottom: 1.5rem; } -.button-group button { +.boost-button-group button { flex: 1; } @@ -185,23 +188,23 @@ } .info-box { - background: #242424; - border: 1px solid #444; - border-radius: 6px; + background: var(--surface-3, #22222c); + border: 1px solid var(--border-default, rgba(255,255,255,0.1)); + border-radius: var(--radius-sm, 6px); padding: 1rem; margin-top: 1.5rem; } .info-box h4 { margin: 0 0 0.75rem 0; - color: #4CAF50; + color: var(--accent-green, #4CAF50); font-size: 1rem; } .info-box ul { margin: 0; padding-left: 1.5rem; - color: #aaa; + color: var(--text-secondary, #9d9db0); } .info-box li { @@ -214,8 +217,9 @@ justify-content: flex-end; gap: 0.75rem; padding: 1.5rem; - border-top: 1px solid #333; - background: #0f0f0f; + border-top: 1px solid var(--border-subtle, rgba(255,255,255,0.07)); + background: var(--surface-0, #0b0b0f); + border-radius: 0 0 var(--radius-lg, 14px) var(--radius-lg, 14px); } button { @@ -234,23 +238,23 @@ button:disabled { } button.primary { - background: linear-gradient(135deg, #ffd700 0%, #ffed4e 100%); + background: linear-gradient(135deg, #1eff1c 0%, #7dff6f 100%); color: #000; } button.primary:hover:not(:disabled) { transform: translateY(-1px); - box-shadow: 0 4px 12px rgba(255, 215, 0, 0.3); + box-shadow: 0 4px 12px rgba(30, 255, 28, 0.3); } button.secondary { - background: #2a2a2a; - color: #fff; - border: 1px solid #444; + background: var(--surface-3, #22222c); + color: var(--text-primary, #e0e0e0); + border: 1px solid var(--border-default, rgba(255,255,255,0.1)); } button.secondary:hover:not(:disabled) { - background: #333; + background: var(--surface-4, #2a2a36); } button.danger { diff --git a/frontend/src/components/BoostControlModal.jsx b/frontend/src/components/BoostControlModal.jsx index 14371f0..62f736d 100644 --- a/frontend/src/components/BoostControlModal.jsx +++ b/frontend/src/components/BoostControlModal.jsx @@ -1,5 +1,5 @@ import React, { useState, useEffect } from 'react'; -import { api, boostAPI } from '../services/api'; +import { boostAPI, openRouterAPI } from '../services/api'; import './BoostControlModal.css'; export default function BoostControlModal({ isOpen, onClose }) { @@ -17,49 +17,21 @@ export default function BoostControlModal({ isOpen, onClose }) { const [success, setSuccess] = useState(''); const [boostStatus, setBoostStatus] = useState(null); const [freeOnly, setFreeOnly] = useState(false); + const [hasGlobalKey, setHasGlobalKey] = useState(false); - // Load saved API key from localStorage - useEffect(() => { - const savedKey = localStorage.getItem('openrouter_api_key'); - if (savedKey) { - setApiKey(savedKey); - } - }, []); - - // Fetch boost status - useEffect(() => { - if (isOpen) { - fetchBoostStatus(); - } - }, [isOpen]); + const hasAvailableKey = Boolean(apiKey.trim() || hasGlobalKey); - const fetchBoostStatus = async () => { - try { - const response = await boostAPI.getStatus(); - if (response.status) { - setBoostStatus(response.status); - if (response.status.enabled) { - setBoostModel(response.status.model_id); - setSelectedProvider(response.status.provider || ''); - setContextWindow(response.status.context_window); - setMaxOutputTokens(response.status.max_output_tokens); - } - } - } catch (error) { - console.error('Failed to fetch boost status:', error); - } - }; - - // Fetch providers when model is selected - const fetchProviders = async (modelId) => { - if (!apiKey || !modelId) { + const fetchProviders = async (modelId, keyOverride = undefined) => { + if (!modelId) { setProviders([]); return; } + const effectiveKey = keyOverride === undefined ? apiKey.trim() : keyOverride; + setLoadingProviders(true); try { - const response = await boostAPI.getModelProviders(apiKey, modelId); + const response = await boostAPI.getModelProviders(effectiveKey || null, modelId); if (response.providers) { setProviders(response.providers); } else { @@ -73,6 +45,30 @@ export default function BoostControlModal({ isOpen, onClose }) { } }; + const fetchBoostStatus = async (keyOverride = undefined) => { + const effectiveKey = keyOverride === undefined ? apiKey.trim() : keyOverride; + + try { + const response = await boostAPI.getStatus(); + if (response.status) { + setBoostStatus(response.status); + if (response.status.enabled) { + setBoostModel(response.status.model_id); + setSelectedProvider(response.status.provider || ''); + setContextWindow(response.status.context_window); + setMaxOutputTokens(response.status.max_output_tokens); + if (response.status.model_id) { + await fetchProviders(response.status.model_id, effectiveKey); + } + } else { + setProviders([]); + } + } + } catch (error) { + console.error('Failed to fetch boost status:', error); + } + }; + // Handle model selection change const handleModelChange = (modelId) => { setBoostModel(modelId); @@ -84,53 +80,96 @@ export default function BoostControlModal({ isOpen, onClose }) { } }; - const fetchModels = async (freeFilter = freeOnly) => { - if (!apiKey) { - setError('Please enter an API key first'); - return; - } + const fetchModels = async ( + freeFilter = freeOnly, + { silent = false, keyOverride = undefined } = {} + ) => { + const effectiveKey = keyOverride === undefined ? apiKey.trim() : keyOverride; setLoading(true); - setError(''); + if (!silent) { + setError(''); + setSuccess(''); + } try { - const response = await boostAPI.getOpenRouterModels(apiKey); + const response = await boostAPI.getOpenRouterModels(effectiveKey || null); if (response.models) { - // Filter for free models only if enabled - const filtered = freeFilter + const filtered = freeFilter ? response.models.filter(model => model.pricing && model.pricing.prompt === '0' && model.pricing.completion === '0') : response.models; setModels(filtered); - setSuccess(`Models loaded successfully (${filtered.length} ${freeFilter ? 'free ' : ''}models)`); + if (!silent) { + setSuccess(`Models loaded successfully (${filtered.length} ${freeFilter ? 'free ' : ''}models)`); + } } } catch (error) { - setError(error.message || 'Failed to fetch models'); + if (!silent) { + setError(error.message || 'Failed to fetch models'); + } } finally { setLoading(false); } }; + useEffect(() => { + if (!isOpen) { + return; + } + + const initializeModal = async () => { + const savedKey = (localStorage.getItem('openrouter_api_key') || '').trim(); + setApiKey(savedKey); + setError(''); + setSuccess(''); + + let useGlobalKey = false; + try { + const keyStatus = await openRouterAPI.getApiKeyStatus(); + useGlobalKey = Boolean(keyStatus.has_key); + setHasGlobalKey(useGlobalKey); + } catch (error) { + console.error('Failed to check OpenRouter key status for boost modal:', error); + setHasGlobalKey(false); + } + + const preferredKey = useGlobalKey ? null : savedKey; + await fetchBoostStatus(preferredKey); + + if (useGlobalKey || savedKey) { + await fetchModels(freeOnly, { silent: true, keyOverride: preferredKey }); + } else { + setModels([]); + } + }; + + initializeModal(); + }, [isOpen]); + // Refetch models when free-only toggle changes useEffect(() => { - if (apiKey && models.length > 0) { - fetchModels(freeOnly); + if (isOpen && hasAvailableKey && models.length > 0) { + fetchModels(freeOnly, { silent: true }); } }, [freeOnly]); const testConnection = async () => { - if (!apiKey) { - setError('Please enter an API key'); + if (!hasAvailableKey) { + setError('Please enter an API key or use an active OpenRouter key'); return; } + const effectiveKey = apiKey.trim() || null; + const usingGlobalKey = !apiKey.trim() && hasGlobalKey; + setTesting(true); setError(''); setSuccess(''); try { - const response = await boostAPI.getOpenRouterModels(apiKey); + const response = await boostAPI.getOpenRouterModels(effectiveKey); if (response.models && response.models.length > 0) { - setSuccess(`✓ Connected successfully! Found ${response.models.length} models.`); + setSuccess(`✓ Connected successfully${usingGlobalKey ? ' using the active OpenRouter key' : ''}! Found ${response.models.length} models.`); setModels(response.models); } else { setError('Connected but no models found'); @@ -143,11 +182,13 @@ export default function BoostControlModal({ isOpen, onClose }) { }; const enableBoost = async () => { - if (!apiKey || !boostModel) { - setError('Please enter API key and select a model'); + if (!boostModel) { + setError('Please select a model'); return; } + const trimmedApiKey = apiKey.trim(); + setLoading(true); setError(''); setSuccess(''); @@ -155,7 +196,7 @@ export default function BoostControlModal({ isOpen, onClose }) { try { const config = { enabled: true, - openrouter_api_key: apiKey, + openrouter_api_key: trimmedApiKey, boost_model_id: boostModel, boost_provider: selectedProvider || null, boost_context_window: contextWindow, @@ -171,8 +212,9 @@ export default function BoostControlModal({ isOpen, onClose }) { response = await boostAPI.updateModel(config); if (response.success) { - // Save API key to localStorage - localStorage.setItem('openrouter_api_key', apiKey); + if (trimmedApiKey) { + localStorage.setItem('openrouter_api_key', trimmedApiKey); + } setSuccess(`✓ Boost model updated! State preserved: ${response.preserved_state.boost_next_count} next calls`); await fetchBoostStatus(); @@ -185,8 +227,9 @@ export default function BoostControlModal({ isOpen, onClose }) { response = await boostAPI.enable(config); if (response.success) { - // Save API key to localStorage - localStorage.setItem('openrouter_api_key', apiKey); + if (trimmedApiKey) { + localStorage.setItem('openrouter_api_key', trimmedApiKey); + } setSuccess('✓ Boost enabled successfully!'); await fetchBoostStatus(); @@ -254,7 +297,7 @@ export default function BoostControlModal({ isOpen, onClose }) {
)} -
+
- Your API key is stored locally and never sent to our servers + Leave this blank to reuse the active OpenRouter key, or paste a different key just for boost.
-
+
-
+
-
+
-
+
Click tasks in the MOTO Workflow panel to toggle boost
  • Boosted tasks use your OpenRouter model instead of LM Studio
  • If credits run out, system falls back to LM Studio automatically
  • -
  • You can continuously select which tasks use the boost
  • +
  • You can toggle which tasks use the boost at any time
  • @@ -394,7 +437,7 @@ export default function BoostControlModal({ isOpen, onClose }) { <> +
    +
    + ); +} + +function CreditExhaustionNotification({ notification, onDismiss }) { + const [isHovered, setIsHovered] = React.useState(false); + const [isExiting, setIsExiting] = React.useState(false); + + const handleDismiss = (e) => { + e.stopPropagation(); + setIsExiting(true); + setTimeout(() => { + onDismiss(notification.id); + }, 300); + }; + + const roleLabel = notification.role_id + ? notification.role_id.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase()) + : 'Unknown Role'; + + const isNoFallback = notification.reason === 'no_fallback_configured'; + + return ( +
    setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + style={{ + width: '320px', + minHeight: '90px', + background: `linear-gradient(135deg, ${isHovered ? 'rgba(180, 30, 30, 0.97)' : 'rgba(60, 15, 15, 0.96)'}, ${isHovered ? 'rgba(140, 20, 20, 0.97)' : 'rgba(40, 10, 10, 0.96)'})`, + backdropFilter: 'blur(8px)', + borderRadius: '12px', + padding: '14px', + boxShadow: isHovered + ? '0 20px 40px -12px rgba(231, 76, 60, 0.6), 0 0 0 1px rgba(231, 76, 60, 0.5)' + : '0 10px 30px -12px rgba(0, 0, 0, 0.8), 0 0 0 1px rgba(231, 76, 60, 0.4)', + border: `1px solid ${isHovered ? 'rgba(231, 76, 60, 0.7)' : 'rgba(231, 76, 60, 0.5)'}`, + transition: 'all 0.3s cubic-bezier(0.4, 0, 0.2, 1)', + transform: isExiting + ? 'translateX(-360px) scale(0.8)' + : `scale(${isHovered ? 1.02 : 1})`, + opacity: isExiting ? 0 : 1, + pointerEvents: 'auto', + animation: isExiting ? 'none' : 'creditSlideIn 0.4s cubic-bezier(0.4, 0, 0.2, 1)', + }} + > + {/* Header */} +
    +
    +
    + +
    +
    +
    + Credits Exhausted +
    +
    + OpenRouter +
    +
    +
    + + {/* Dismiss button */} + +
    + + {/* Role info */} +
    + {roleLabel} +
    + + {/* Message */} +
    + {isNoFallback + ? 'No LM Studio fallback configured. This role has stopped. Configure a fallback model or add credits.' + : notification.fallback_model + ? `Fell back to LM Studio model: ${notification.fallback_model}` + : (notification.message || 'OpenRouter credits have been exhausted for this role.')} +
    + + +
    + ); +} diff --git a/frontend/src/components/CritiqueNotificationStack.jsx b/frontend/src/components/CritiqueNotificationStack.jsx index 1237003..01e5f31 100644 --- a/frontend/src/components/CritiqueNotificationStack.jsx +++ b/frontend/src/components/CritiqueNotificationStack.jsx @@ -18,9 +18,9 @@ const IconStar = ({ className }) => ( * Get color classes based on average rating */ function getRatingColor(rating) { - if (rating >= 8) return { text: 'text-emerald-400', bg: 'bg-emerald-500', gradient: 'from-emerald-600 to-emerald-500' }; - if (rating >= 6.25) return { text: 'text-blue-400', bg: 'bg-blue-500', gradient: 'from-blue-600 to-blue-500' }; - return { text: 'text-gray-400', bg: 'bg-gray-500', gradient: 'from-gray-600 to-gray-500' }; + if (rating >= 8) return { text: '#34d399', bg: '#10b981' }; + if (rating >= 6.25) return { text: '#60a5fa', bg: '#3b82f6' }; + return { text: '#9ca3af', bg: '#6b7280' }; } /** @@ -98,14 +98,14 @@ function CritiqueNotification({ notification, index, onDismiss, onClickNotificat style={{ width: '280px', minHeight: '80px', - background: `linear-gradient(135deg, ${isHovered ? 'rgba(88, 28, 135, 0.95)' : 'rgba(26, 26, 46, 0.95)'}, ${isHovered ? 'rgba(30, 58, 138, 0.95)' : 'rgba(17, 24, 39, 0.95)'})`, + background: `linear-gradient(135deg, ${isHovered ? 'rgba(20, 83, 45, 0.95)' : 'rgba(26, 26, 46, 0.95)'}, ${isHovered ? 'rgba(15, 60, 15, 0.95)' : 'rgba(17, 24, 39, 0.95)'})`, backdropFilter: 'blur(8px)', borderRadius: '12px', padding: '12px', boxShadow: isHovered - ? '0 20px 40px -12px rgba(147, 51, 234, 0.6), 0 0 0 1px rgba(147, 51, 234, 0.5)' - : '0 10px 30px -12px rgba(0, 0, 0, 0.8), 0 0 0 1px rgba(147, 51, 234, 0.3)', - border: `1px solid ${isHovered ? 'rgba(147, 51, 234, 0.6)' : 'rgba(147, 51, 234, 0.4)'}`, + ? '0 20px 40px -12px rgba(30, 255, 28, 0.5), 0 0 0 1px rgba(30, 255, 28, 0.5)' + : '0 10px 30px -12px rgba(0, 0, 0, 0.8), 0 0 0 1px rgba(30, 255, 28, 0.3)', + border: `1px solid ${isHovered ? 'rgba(30, 255, 28, 0.6)' : 'rgba(30, 255, 28, 0.4)'}`, cursor: 'pointer', transition: 'all 0.3s cubic-bezier(0.4, 0, 0.2, 1)', transform: isExiting @@ -122,20 +122,20 @@ function CritiqueNotification({ notification, index, onDismiss, onClickNotificat
    - +
    High Score
    -
    +
    {notification.average_rating.toFixed(1)}/10
    @@ -165,7 +165,7 @@ function CritiqueNotification({ notification, index, onDismiss, onClickNotificat e.target.style.color = '#9ca3af'; }} > - +
    @@ -191,7 +191,7 @@ function CritiqueNotification({ notification, index, onDismiss, onClickNotificat
    ( + + + + +); + +const IconClock = ({ style }) => ( + + + + +); + +/** + * Persistent notification stack for hung API connection alerts. + * Amber-themed, stays visible until the user explicitly dismisses each notification. + * + * Props: + * - notifications: Array of { id, role_id, model, provider, elapsed_minutes, message, timestamp } + * - onDismiss: (id) => void + */ +export default function HungConnectionNotificationStack({ notifications, onDismiss }) { + if (!notifications || notifications.length === 0) { + return null; + } + + return ( +
    + {notifications.map((notification) => ( + + ))} +
    + ); +} + +function HungConnectionNotification({ notification, onDismiss }) { + const [isHovered, setIsHovered] = React.useState(false); + const [isExiting, setIsExiting] = React.useState(false); + + const handleDismiss = (e) => { + e.stopPropagation(); + setIsExiting(true); + setTimeout(() => { + onDismiss(notification.id); + }, 300); + }; + + const modelLabel = notification.model || 'Unknown Model'; + const providerLabel = notification.provider || 'Unknown Provider'; + + return ( +
    setIsHovered(true)} + onMouseLeave={() => setIsHovered(false)} + style={{ + width: '320px', + minHeight: '90px', + background: `linear-gradient(135deg, ${isHovered ? 'rgba(180, 120, 20, 0.97)' : 'rgba(60, 40, 10, 0.96)'}, ${isHovered ? 'rgba(140, 90, 10, 0.97)' : 'rgba(40, 25, 5, 0.96)'})`, + backdropFilter: 'blur(8px)', + borderRadius: '12px', + padding: '14px', + boxShadow: isHovered + ? '0 20px 40px -12px rgba(255, 165, 0, 0.6), 0 0 0 1px rgba(255, 165, 0, 0.5)' + : '0 10px 30px -12px rgba(0, 0, 0, 0.8), 0 0 0 1px rgba(255, 165, 0, 0.4)', + border: `1px solid ${isHovered ? 'rgba(255, 165, 0, 0.7)' : 'rgba(255, 165, 0, 0.5)'}`, + transition: 'all 0.3s cubic-bezier(0.4, 0, 0.2, 1)', + transform: isExiting + ? 'translateX(-360px) scale(0.8)' + : `scale(${isHovered ? 1.02 : 1})`, + opacity: isExiting ? 0 : 1, + pointerEvents: 'auto', + animation: isExiting ? 'none' : 'hungSlideIn 0.4s cubic-bezier(0.4, 0, 0.2, 1)', + }} + > + {/* Header */} +
    +
    +
    + +
    +
    +
    + Possible Hung Connection +
    +
    + {notification.elapsed_minutes}+ Minutes +
    +
    +
    + + +
    + + {/* Model info */} +
    + {modelLabel} via {providerLabel} +
    + + {/* Message */} +
    + Connection may be hung. Consider stopping and trying a different host/provider. +
    + + +
    + ); +} diff --git a/frontend/src/components/LatexRenderer.css b/frontend/src/components/LatexRenderer.css index 025f22a..5b163f2 100644 --- a/frontend/src/components/LatexRenderer.css +++ b/frontend/src/components/LatexRenderer.css @@ -10,28 +10,28 @@ align-items: center; justify-content: space-between; padding: 0.5rem 1rem; - background: #1a1a1a; - border-bottom: 1px solid #333; + background: var(--surface-2, #1a1a22); + border-bottom: 1px solid var(--border-subtle, rgba(255,255,255,0.07)); flex-shrink: 0; } .latex-toggle-buttons { display: flex; gap: 0.25rem; - background: #0f0f0f; + background: var(--surface-0, #0b0b0f); padding: 0.25rem; - border-radius: 6px; + border-radius: var(--radius-sm, 6px); } .latex-toggle-btn { padding: 0.5rem 1rem; background: transparent; border: none; - color: #999; + color: var(--text-secondary, #999); cursor: pointer; font-size: 0.85rem; font-weight: 500; - border-radius: 4px; + border-radius: var(--radius-sm, 6px); transition: all 0.2s ease; display: flex; align-items: center; @@ -39,33 +39,33 @@ } .latex-toggle-btn:hover { - color: #fff; - background: rgba(255, 255, 255, 0.1); + color: var(--text-primary, #fff); + background: rgba(255, 255, 255, 0.06); } .latex-toggle-btn.active { - background: #4CAF50; + background: var(--accent-green, #4CAF50); color: white; font-weight: 600; } .latex-indicator { font-size: 0.8rem; - color: #4CAF50; + color: var(--accent-green, #4CAF50); display: flex; align-items: center; gap: 0.25rem; } .latex-indicator.no-latex { - color: #666; + color: var(--text-muted, #666); } .latex-content-container { flex: 1; overflow-y: visible; padding: 3rem 4.5rem; - background: #0a0a0a; + background: var(--surface-1, #131318); min-height: 400px; } @@ -194,12 +194,12 @@ } .latex-theorem { - border-color: #ffd700; - background: rgba(255, 215, 0, 0.05); + border-color: #1eff1c; + background: rgba(30, 255, 28, 0.05); } .latex-theorem strong { - color: #ffd700; + color: #1eff1c; } .latex-lemma { @@ -326,11 +326,11 @@ .latex-chapter { font-size: 2rem !important; - border-bottom: 3px solid #ffd700 !important; + border-bottom: 3px solid #1eff1c !important; padding-bottom: 1rem !important; margin-top: 3rem !important; margin-bottom: 1.5rem !important; - color: #ffd700 !important; + color: #1eff1c !important; } .latex-section { @@ -596,18 +596,18 @@ } .latex-content-container::-webkit-scrollbar-track { - background: #1a1a1a; + background: var(--surface-2, #1a1a22); border-radius: 5px; } .latex-content-container::-webkit-scrollbar-thumb { - background: #444; + background: var(--surface-4, #2a2a36); border-radius: 5px; - border: 2px solid #1a1a1a; + border: 2px solid var(--surface-2, #1a1a22); } .latex-content-container::-webkit-scrollbar-thumb:hover { - background: #555; + background: var(--border-strong, rgba(255,255,255,0.16)); } /* ================================ diff --git a/frontend/src/components/LatexRenderer.jsx b/frontend/src/components/LatexRenderer.jsx index 43fe6ce..f746ac5 100644 --- a/frontend/src/components/LatexRenderer.jsx +++ b/frontend/src/components/LatexRenderer.jsx @@ -1121,14 +1121,14 @@ const LatexRenderer = ({ }} title="Show rendered LaTeX" > - 📐 Rendered {isLargeDoc ? '' : '(Experimental)'} + Rendered View
    {viewMode === 'rendered' && chunks.length > 1 && ( diff --git a/frontend/src/components/OpenRouterApiKeyModal.jsx b/frontend/src/components/OpenRouterApiKeyModal.jsx index dd1414a..87d52a0 100644 --- a/frontend/src/components/OpenRouterApiKeyModal.jsx +++ b/frontend/src/components/OpenRouterApiKeyModal.jsx @@ -1,9 +1,10 @@ import React, { useState, useEffect } from 'react'; import { openRouterAPI } from '../services/api'; +import './settings-common.css'; /** * Modal for configuring the global OpenRouter API key. - * This key is used for per-role OpenRouter model selection (separate from boost). + * This key is used for per-role OpenRouter model selection and can also be reused by boost. * * Shows when: * 1. User clicks "Use OpenRouter" on any role but no API key is configured @@ -76,7 +77,7 @@ export default function OpenRouterApiKeyModal({ isOpen, onClose, onKeySet, reaso // Notify parent if (onKeySet) { - onKeySet(apiKey.trim()); + await onKeySet(apiKey.trim()); } onClose(); @@ -103,38 +104,29 @@ export default function OpenRouterApiKeyModal({ isOpen, onClose, onKeySet, reaso const reasonMessages = { setup: 'Configure your OpenRouter API key to use OpenRouter models for any role.', + startup_setup: 'Save your OpenRouter API key to unlock cloud models. MOTO will apply the recommended default profile immediately, and you can switch to your team profile or another default profile later in Settings.', lm_studio_unavailable: 'LM Studio is not available. Configure OpenRouter to continue.', no_key: 'An OpenRouter API key is required to use OpenRouter models.', }; return (
    e.target === e.currentTarget && onClose()} >
    -
    +

    OpenRouter API Key

    @@ -167,18 +159,12 @@ export default function OpenRouterApiKeyModal({ isOpen, onClose, onKeySet, reaso value={apiKey} onChange={(e) => setApiKey(e.target.value)} placeholder="sk-or-v1-..." + className="input-dark" style={{ - width: '100%', - padding: '0.75rem', - backgroundColor: '#0d0d1a', - border: '1px solid #333', - borderRadius: '6px', - color: '#fff', fontSize: '0.95rem', - boxSizing: 'border-box', }} /> - + Get your API key at{' '} {error}
    @@ -208,14 +188,8 @@ export default function OpenRouterApiKeyModal({ isOpen, onClose, onKeySet, reaso {/* Test Result */} {testResult && testResult.connected && ( -
    Connection successful! {testResult.model_count} models available.
    @@ -265,15 +239,10 @@ export default function OpenRouterApiKeyModal({ isOpen, onClose, onKeySet, reaso {apiKey && (
    diff --git a/frontend/src/components/OpenRouterPrivacyWarningModal.jsx b/frontend/src/components/OpenRouterPrivacyWarningModal.jsx index ecd08d1..a610ff0 100644 --- a/frontend/src/components/OpenRouterPrivacyWarningModal.jsx +++ b/frontend/src/components/OpenRouterPrivacyWarningModal.jsx @@ -1,4 +1,5 @@ import React from 'react'; +import './settings-common.css'; /** * Modal that displays OpenRouter privacy policy warning. @@ -21,20 +22,11 @@ function OpenRouterPrivacyWarningModal({ isOpen, onClose, errorData }) { return (
    diff --git a/frontend/src/components/PaperCritiqueModal.jsx b/frontend/src/components/PaperCritiqueModal.jsx index e86a445..fed01df 100644 --- a/frontend/src/components/PaperCritiqueModal.jsx +++ b/frontend/src/components/PaperCritiqueModal.jsx @@ -1,5 +1,6 @@ import React, { useState, useEffect } from 'react'; import { createPortal } from 'react-dom'; +import './critique-modal.css'; // Simple inline icon components const IconX = ({ className }) => ( @@ -39,54 +40,20 @@ const IconAlertCircle = ({ className }) => ( ); -/** - * Get color class based on rating value (1-10) - */ function getRatingColor(rating) { - if (rating >= 8) return 'text-emerald-400'; - if (rating >= 6) return 'text-blue-400'; - if (rating >= 4) return 'text-yellow-400'; - if (rating >= 2) return 'text-orange-400'; - return 'text-red-400'; + if (rating >= 8) return 'critique-color--emerald'; + if (rating >= 6) return 'critique-color--blue'; + if (rating >= 4) return 'critique-color--yellow'; + if (rating >= 2) return 'critique-color--orange'; + return 'critique-color--red'; } function getRatingBgColor(rating) { - if (rating >= 8) return 'bg-emerald-500'; - if (rating >= 6) return 'bg-blue-500'; - if (rating >= 4) return 'bg-yellow-500'; - if (rating >= 2) return 'bg-orange-500'; - return 'bg-red-500'; -} - -/** - * Rating display component with progress bar - */ -function RatingDisplay({ label, rating, feedback }) { - const percentage = (rating / 10) * 100; - - return ( -
    -
    - {label} - - {rating > 0 ? rating : '—'}/10 - -
    - - {/* Progress bar */} -
    -
    -
    - - {/* Feedback text */} - {feedback && ( -

    {feedback}

    - )} -
    - ); + if (rating >= 8) return 'critique-bg--emerald'; + if (rating >= 6) return 'critique-bg--blue'; + if (rating >= 4) return 'critique-bg--yellow'; + if (rating >= 2) return 'critique-bg--orange'; + return 'critique-bg--red'; } /** @@ -201,20 +168,6 @@ export default function PaperCritiqueModal({ const modalContent = (
    { // Close when clicking the backdrop if (e.target === e.currentTarget) { @@ -223,144 +176,64 @@ export default function PaperCritiqueModal({ }} >
    e.stopPropagation()} > {/* Header - Compact */} -
    -
    -
    - +
    +
    +
    +
    -

    Validator Critique

    -

    +

    Validator Critique

    +

    {paperTitle || 'Paper'}

    {/* Content - Scrollable */} -
    +
    {loading ? ( -
    -
    -
    -

    Loading critiques...

    +
    +
    +
    +

    Loading critiques...

    ) : error ? ( -
    -
    - +
    +
    +
    -

    Error

    -

    {error}

    +

    Error

    +

    {error}

    ) : selectedCritique ? ( -
    +
    {/* Critic Identity - Compact */} -
    -
    +
    +
    -

    Critique by

    -

    {selectedCritique.model_id}

    +

    Critique by

    +

    {selectedCritique.model_id}

    {selectedCritique.host_provider && ( -

    via {selectedCritique.host_provider}

    +

    via {selectedCritique.host_provider}

    )}
    -
    -
    - +
    +
    + {formatDate(selectedCritique.date)}
    @@ -368,7 +241,7 @@ export default function PaperCritiqueModal({
    {/* Ratings - Compact Grid */} -
    +
    @@ -376,24 +249,9 @@ export default function PaperCritiqueModal({ {/* Full Critique - Expanded to fill space */} {selectedCritique.full_critique && ( -
    -

    Full Critique

    -

    +

    +

    Full Critique

    +

    {selectedCritique.full_critique}

    @@ -401,45 +259,21 @@ export default function PaperCritiqueModal({ {/* History - Compact */} {critiques.length > 1 && ( -
    +
    {historyOpen && ( -
    +
    {critiques.map((critique, idx) => (
    - - {/* Keyframes for spinner animation */} -
    ); @@ -613,46 +358,25 @@ function CompactRating({ label, rating, feedback }) { const percentage = (rating / 10) * 100; return ( -
    -
    - {label} - +
    +
    + {label} + {rating > 0 ? rating : '—'}
    {/* Progress bar */} -
    +
    {/* Feedback text - full display */} {feedback && ( -

    {feedback}

    +

    {feedback}

    )}
    ); diff --git a/frontend/src/components/StartupProviderSetupModal.jsx b/frontend/src/components/StartupProviderSetupModal.jsx new file mode 100644 index 0000000..259c5ec --- /dev/null +++ b/frontend/src/components/StartupProviderSetupModal.jsx @@ -0,0 +1,163 @@ +import React from 'react'; +import './settings-common.css'; + +export default function StartupProviderSetupModal({ + isOpen, + lmStudioAvailable, + hasUsableLmStudioChatModel = false, + lmStudioModelCount = 0, + lmStudioError = '', + statusMessage = '', + isCheckingLmStudio = false, + onChooseOpenRouter, + onConfirmLmStudio, +}) { + if (!isOpen) return null; + + return ( +
    e.stopPropagation()} + > +
    +
    +

    + Choose Your Startup Setup +

    +
    + +

    + MOTO needs an OpenRouter API key or a running LM Studio server before you start. + The best experience is to use both: OpenRouter for cloud models and LM Studio for free, faster local RAG and embeddings. +

    + +
    + Highly recommended: install LM Studio even if you plan to use OpenRouter. LM Studio + gives MOTO free local embedding/RAG calls and noticeably faster retrieval than OpenRouter embeddings. +
    + +
    +
    +

    OpenRouter Setup

    +
      +
    1. Create or sign in to your account at openrouter.ai.
    2. +
    3. Generate an API key at openrouter.ai/keys.
    4. +
    5. Paste that key into MOTO. The recommended default profile will be applied right away.
    6. +
    + +
    + +
    +

    LM Studio Setup

    +
      +
    1. Install LM Studio from lmstudio.ai.
    2. +
    3. Enable Developer or Power User mode if needed, then open the server tab.
    4. +
    5. Load the embedding model nomic-ai/nomic-embed-text-v1.5.
    6. +
    7. Optionally load one or more local chat models, then start the local server on http://127.0.0.1:1234.
    8. +
    + +
    +
    + +
    + {lmStudioAvailable && hasUsableLmStudioChatModel + ? `LM Studio is currently detected with ${lmStudioModelCount} loaded model${lmStudioModelCount === 1 ? '' : 's'}, including a usable chat model.` + : lmStudioAvailable + ? 'LM Studio is running, but you still need at least one loaded chat model in addition to embeddings.' + : `LM Studio is not detected yet${lmStudioError ? `: ${lmStudioError}` : '.'}`} +
    + + {statusMessage && ( +
    + {statusMessage} +
    + )} + +

    + After setup, open Autonomous Model Selection & Settings to pick your saved team + profile or switch to any built-in default profile. +

    +
    +
    + ); +} diff --git a/frontend/src/components/TextFileUploader.css b/frontend/src/components/TextFileUploader.css index 0d33804..0d19392 100644 --- a/frontend/src/components/TextFileUploader.css +++ b/frontend/src/components/TextFileUploader.css @@ -8,15 +8,15 @@ /* Upload Button - Matches button.secondary pattern */ .text-upload-btn { - background: #666; /* Matches button.secondary from index.css line 345 */ + background: var(--surface-4, #2a2a36); color: white; - padding: 0.75rem 1.5rem; /* Standard button padding from index.css line 324 */ + padding: 0.75rem 1.5rem; border: none; - border-radius: 4px; + border-radius: var(--radius-sm, 6px); cursor: pointer; font-size: 0.95rem; font-weight: 600; - transition: background 0.2s; + transition: all var(--transition-fast, 0.15s); display: inline-flex; align-items: center; gap: 0.5rem; @@ -25,17 +25,17 @@ } .text-upload-btn:hover:not(:disabled) { - background: #777; /* Matches button.secondary:hover from index.css line 348 */ + filter: brightness(1.2); } .text-upload-btn:disabled { - background: #555; /* Matches button:disabled from index.css line 340 */ + background: var(--surface-3, #22222c); cursor: not-allowed; opacity: 0.7; } .text-upload-btn:focus { - outline: 2px solid #4CAF50; + outline: 2px solid var(--accent-green, #4CAF50); outline-offset: 2px; } diff --git a/frontend/src/components/WorkflowPanel.css b/frontend/src/components/WorkflowPanel.css index 20047f9..36f885f 100644 --- a/frontend/src/components/WorkflowPanel.css +++ b/frontend/src/components/WorkflowPanel.css @@ -4,12 +4,13 @@ top: 0; height: 100vh; width: 320px; - background: #1a1a1a; - border-left: 1px solid #333; + background: var(--surface-1); + border-left: 1px solid var(--border-subtle); display: flex; flex-direction: column; z-index: 1000; transition: width 0.15s ease; + box-shadow: -4px 0 20px rgba(0, 0, 0, 0.3); } .workflow-panel.collapsed { @@ -21,27 +22,27 @@ justify-content: space-between; align-items: center; padding: 1rem; - border-bottom: 1px solid #333; - background: #0f0f0f; + border-bottom: 1px solid var(--border-subtle); + background: var(--surface-0); } .workflow-header h3 { margin: 0; font-size: 1.1rem; - color: #4CAF50; + color: var(--accent-green); } .collapse-btn { background: none; border: none; - color: #888; + color: var(--text-muted); font-size: 1.2rem; cursor: pointer; padding: 0.25rem 0.5rem; } .collapse-btn:hover { - color: #fff; + color: var(--text-primary); } .workflow-panel.collapsed .workflow-header h3 { @@ -50,14 +51,14 @@ .workflow-mode { padding: 0.75rem 1rem; - background: #242424; - border-bottom: 1px solid #333; + background: var(--surface-2); + border-bottom: 1px solid var(--border-subtle); font-size: 0.9rem; - color: #aaa; + color: var(--text-secondary); } -.mode-badge { - color: #4CAF50; +.wf-mode-badge { + color: var(--accent-green); font-weight: 600; text-transform: capitalize; } @@ -65,8 +66,8 @@ /* Boost Controls */ .boost-controls { padding: 0.75rem 1rem; - background: #1f1f1f; - border-bottom: 1px solid #333; + background: var(--surface-1); + border-bottom: 1px solid var(--border-subtle); } .boost-section { @@ -80,7 +81,7 @@ .boost-label { display: block; font-size: 0.75rem; - color: #888; + color: var(--text-muted); margin-bottom: 0.5rem; text-transform: uppercase; letter-spacing: 0.5px; @@ -95,49 +96,49 @@ .boost-next-input { width: 60px; padding: 0.4rem 0.5rem; - background: #2a2a2a; - border: 1px solid #444; - border-radius: 4px; - color: #fff; + background: var(--surface-3); + border: 1px solid var(--border-default); + border-radius: var(--radius-sm); + color: var(--text-primary); font-size: 0.9rem; } .boost-next-input:focus { outline: none; - border-color: #ffd700; + border-color: var(--gold-dim); } .boost-next-input::placeholder { - color: #666; + color: var(--text-muted); } .boost-apply-btn { padding: 0.4rem 0.75rem; - background: #ffd700; + background: var(--gold); border: none; - border-radius: 4px; + border-radius: var(--radius-sm); color: #000; font-size: 0.8rem; font-weight: 600; cursor: pointer; - transition: background 0.2s; + transition: all var(--transition-fast); } .boost-apply-btn:hover:not(:disabled) { - background: #ffed4e; + background: #7dff6f; } .boost-apply-btn:disabled { - background: #555; - color: #888; + background: var(--surface-4); + color: var(--text-muted); cursor: not-allowed; } .boost-count-badge { - background: #ffd700; + background: var(--gold); color: #000; padding: 0.25rem 0.5rem; - border-radius: 10px; + border-radius: var(--radius-pill); font-size: 0.7rem; font-weight: 600; } @@ -150,34 +151,34 @@ .category-btn { padding: 0.3rem 0.6rem; - background: #2a2a2a; - border: 1px solid #444; - border-radius: 4px; - color: #aaa; + background: var(--surface-3); + border: 1px solid var(--border-default); + border-radius: var(--radius-sm); + color: var(--text-secondary); font-size: 0.75rem; cursor: pointer; - transition: all 0.2s; + transition: all var(--transition-fast); } .category-btn:hover { - background: #333; - border-color: #555; - color: #fff; + background: var(--surface-4); + border-color: var(--border-strong); + color: var(--text-primary); } .category-btn.active { - background: linear-gradient(135deg, #ffd700 0%, #ffed4e 100%); - border-color: #ffd700; + background: linear-gradient(135deg, var(--gold) 0%, #7dff6f 100%); + border-color: var(--gold); color: #000; font-weight: 600; } .boost-disabled-notice { padding: 0.75rem 1rem; - background: #1f1f1f; - border-bottom: 1px solid #333; + background: var(--surface-1); + border-bottom: 1px solid var(--border-subtle); font-size: 0.8rem; - color: #666; + color: var(--text-muted); text-align: center; font-style: italic; } @@ -191,32 +192,32 @@ .no-tasks { text-align: center; padding: 2rem 1rem; - color: #666; + color: var(--text-muted); font-style: italic; } .task-card { - background: #2a2a2a; - border: 1px solid #444; - border-radius: 6px; + background: var(--surface-3); + border: 1px solid var(--border-subtle); + border-radius: var(--radius-sm); padding: 0.75rem; margin-bottom: 0.5rem; display: grid; grid-template-columns: auto 1fr auto auto; gap: 0.75rem; align-items: center; - transition: all 0.2s ease; + transition: all var(--transition-fast); } .task-card:hover:not(.completed) { - background: #333; - border-color: #555; + background: var(--surface-4); + border-color: var(--border-default); transform: translateX(-2px); } .task-card.boosted { - background: linear-gradient(135deg, #ffd700 0%, #ffed4e 100%); - border-color: #ffd700; + background: linear-gradient(135deg, var(--gold) 0%, #7dff6f 100%); + border-color: var(--gold); color: #000; } @@ -242,15 +243,15 @@ } .task-card.completed { - background: #2d5f2d; - border-color: #4CAF50; + background: rgba(76, 175, 80, 0.1); + border-color: rgba(76, 175, 80, 0.3); opacity: 0.7; } .task-number { font-weight: 700; font-size: 0.85rem; - color: #888; + color: var(--text-muted); min-width: 30px; } @@ -277,7 +278,7 @@ .task-mode { font-size: 0.75rem; - color: #aaa; + color: var(--text-secondary); } .task-card.boosted .task-mode { @@ -318,10 +319,10 @@ .task-provider { font-size: 0.7rem; font-weight: 600; - color: #666; - background: #1a1a1a; + color: var(--text-muted); + background: var(--surface-2); padding: 0.25rem 0.5rem; - border-radius: 4px; + border-radius: var(--radius-sm); } .task-card.boosted .task-provider { @@ -331,8 +332,8 @@ .workflow-legend { padding: 0.75rem 1rem; - border-top: 1px solid #333; - background: #0f0f0f; + border-top: 1px solid var(--border-subtle); + background: var(--surface-0); display: grid; grid-template-columns: 1fr 1fr; gap: 0.5rem; @@ -343,22 +344,22 @@ display: flex; align-items: center; gap: 0.5rem; - color: #aaa; + color: var(--text-secondary); } .legend-color { width: 16px; height: 16px; - border-radius: 3px; - border: 1px solid #444; + border-radius: var(--radius-sm); + border: 1px solid var(--border-default); } .legend-color.default { - background: #2a2a2a; + background: var(--surface-3); } .legend-color.boosted { - background: linear-gradient(135deg, #ffd700 0%, #ffed4e 100%); + background: linear-gradient(135deg, var(--gold) 0%, #7dff6f 100%); } .legend-color.active { @@ -366,7 +367,143 @@ } .legend-color.completed { - background: #2d5f2d; + background: rgba(76, 175, 80, 0.25); +} + +/* Token Stats & Research Timer */ +.token-stats-section { + padding: 0.75rem 1rem; + background: var(--surface-0); + border-bottom: 1px solid var(--border-subtle); +} + +.research-timer { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 0.6rem; + padding-bottom: 0.6rem; + border-bottom: 1px solid var(--border-subtle); +} + +.timer-label { + font-size: 0.75rem; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: 0.5px; +} + +.timer-value { + font-family: 'Courier New', Courier, monospace; + font-size: 1rem; + font-weight: 700; + color: var(--accent-green); +} + +.token-totals { + display: flex; + flex-direction: column; + gap: 0.25rem; + margin-bottom: 0.5rem; +} + +.token-row { + display: flex; + justify-content: space-between; + align-items: center; +} + +.token-label { + font-size: 0.75rem; + color: var(--text-muted); +} + +.token-value { + font-family: 'Courier New', Courier, monospace; + font-size: 0.85rem; + color: var(--text-secondary); +} + +.token-total-row { + border-top: 1px solid var(--border-subtle); + padding-top: 0.3rem; + margin-top: 0.15rem; +} + +.token-total-row .token-label { + color: var(--text-secondary); + font-weight: 600; +} + +.token-total-row .token-value { + color: var(--text-primary); + font-weight: 600; +} + +.per-model-section { + margin-top: 0.25rem; +} + +.per-model-toggle { + background: none; + border: none; + color: var(--text-muted); + font-size: 0.75rem; + cursor: pointer; + padding: 0.25rem 0; + transition: color var(--transition-fast); +} + +.per-model-toggle:hover { + color: var(--text-secondary); +} + +.per-model-list { + margin-top: 0.4rem; + display: flex; + flex-direction: column; + gap: 0.35rem; + max-height: 180px; + overflow-y: auto; +} + +.per-model-list::-webkit-scrollbar { + width: 4px; +} + +.per-model-list::-webkit-scrollbar-thumb { + background: #444; + border-radius: 2px; +} + +.model-row { + background: var(--surface-1); + border-radius: var(--radius-sm); + padding: 0.4rem 0.5rem; +} + +.model-name { + font-size: 0.7rem; + color: var(--text-secondary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + margin-bottom: 0.15rem; +} + +.model-tokens { + display: flex; + gap: 0.75rem; + font-family: 'Courier New', Courier, monospace; + font-size: 0.7rem; +} + +.model-in { + color: #6aa7e8; +} + +.model-out { + color: #e8a86a; } /* Scrollbar styling */ @@ -375,15 +512,15 @@ } .workflow-tasks::-webkit-scrollbar-track { - background: #1a1a1a; + background: var(--surface-1); } .workflow-tasks::-webkit-scrollbar-thumb { - background: #444; + background: var(--surface-4); border-radius: 3px; } .workflow-tasks::-webkit-scrollbar-thumb:hover { - background: #555; + background: var(--border-strong); } diff --git a/frontend/src/components/WorkflowPanel.jsx b/frontend/src/components/WorkflowPanel.jsx index dfc3640..efe0569 100644 --- a/frontend/src/components/WorkflowPanel.jsx +++ b/frontend/src/components/WorkflowPanel.jsx @@ -1,8 +1,17 @@ -import React, { useState, useEffect, useCallback } from 'react'; +import React, { useState, useEffect, useCallback, useRef } from 'react'; import { websocket } from '../services/websocket'; import { boostAPI, workflowAPI } from '../services/api'; import './WorkflowPanel.css'; +const formatNumber = (n) => n.toLocaleString(); + +const formatTime = (totalSeconds) => { + const h = Math.floor(totalSeconds / 3600); + const m = Math.floor((totalSeconds % 3600) / 60); + const s = Math.floor(totalSeconds % 60); + return `${String(h).padStart(2, '0')}h ${String(m).padStart(2, '0')}m ${String(s).padStart(2, '0')}s`; +}; + export default function WorkflowPanel({ isRunning }) { const [tasks, setTasks] = useState([]); const [collapsed, setCollapsed] = useState(false); @@ -15,6 +24,17 @@ export default function WorkflowPanel({ isRunning }) { const [availableCategories, setAvailableCategories] = useState([]); const [boostEnabled, setBoostEnabled] = useState(false); + // Token tracking & timer state + const [tokenStats, setTokenStats] = useState({ total_input: 0, total_output: 0, by_model: {}, elapsed_seconds: 0 }); + const [showPerModel, setShowPerModel] = useState(false); + const [localElapsed, setLocalElapsed] = useState(0); + const lastSyncRef = useRef(Date.now()); + + const expandPanel = useCallback(() => { + setCollapsed(false); + localStorage.setItem('workflow_panel_collapsed', 'false'); + }, []); + // Fetch boost status and categories when running const fetchBoostStatus = useCallback(async () => { try { @@ -45,6 +65,12 @@ export default function WorkflowPanel({ isRunning }) { return () => clearInterval(interval); }, [fetchBoostStatus]); + useEffect(() => { + if (boostEnabled) { + expandPanel(); + } + }, [boostEnabled, expandPanel]); + // Handle setting boost next count const handleSetBoostNextCount = async () => { const count = parseInt(boostNextInput, 10); @@ -73,6 +99,41 @@ export default function WorkflowPanel({ isRunning }) { } }; + // Token stats: initial fetch on mount and when isRunning changes + useEffect(() => { + const fetchTokenStats = async () => { + try { + const resp = await workflowAPI.getTokenStats(); + if (resp.success) { + setTokenStats(resp); + setLocalElapsed(resp.elapsed_seconds || 0); + lastSyncRef.current = Date.now(); + } + } catch { /* ignore */ } + }; + fetchTokenStats(); + }, [isRunning]); + + // Token stats: listen for real-time WebSocket updates + useEffect(() => { + const handleTokenUpdate = (data) => { + setTokenStats(data); + setLocalElapsed(data.elapsed_seconds || 0); + lastSyncRef.current = Date.now(); + }; + websocket.on('token_usage_updated', handleTokenUpdate); + return () => websocket.off('token_usage_updated', handleTokenUpdate); + }, []); + + // Local 1-second timer tick for smooth elapsed display + useEffect(() => { + if (!isRunning) return; + const interval = setInterval(() => { + setLocalElapsed(prev => prev + 1); + }, 1000); + return () => clearInterval(interval); + }, [isRunning]); + // Poll for workflow predictions when running useEffect(() => { if (!isRunning) { @@ -288,6 +349,7 @@ export default function WorkflowPanel({ isRunning }) { // NEW: Handle boost enabled/disabled const handleBoostEnabled = () => { setBoostEnabled(true); + expandPanel(); fetchBoostStatus(); }; @@ -318,7 +380,7 @@ export default function WorkflowPanel({ isRunning }) { websocket.off('boost_enabled', handleBoostEnabled); websocket.off('boost_disabled', handleBoostDisabled); }; - }, [isRunning, fetchBoostStatus]); + }, [isRunning, fetchBoostStatus, expandPanel]); const handleTaskClick = async (task) => { if (task.completed) return; // Can't toggle completed tasks @@ -379,7 +441,7 @@ export default function WorkflowPanel({ isRunning }) { {!collapsed && ( <>
    - Mode: {mode} + Mode: {mode}
    {/* BOOST CONTROLS - ETERNAL (always visible, even when boost not enabled) */} @@ -443,6 +505,55 @@ export default function WorkflowPanel({ isRunning }) { )}
    + {/* RESEARCH TIMER & TOKEN STATS */} +
    +
    + Elapsed + {formatTime(localElapsed)} +
    + +
    +
    + Input + {formatNumber(tokenStats.total_input)} +
    +
    + Output + {formatNumber(tokenStats.total_output)} +
    +
    + Total + {formatNumber(tokenStats.total_input + tokenStats.total_output)} +
    +
    + + {Object.keys(tokenStats.by_model || {}).length > 0 && ( +
    + + {showPerModel && ( +
    + {Object.entries(tokenStats.by_model) + .sort((a, b) => (b[1].input + b[1].output) - (a[1].input + a[1].output)) + .map(([modelId, usage]) => ( +
    +
    {modelId}
    +
    + In: {formatNumber(usage.input)} + Out: {formatNumber(usage.output)} +
    +
    + ))} +
    + )} +
    + )} +
    +
    {tasks.length === 0 ? (
    Loading workflow...
    diff --git a/frontend/src/components/aggregator/AggregatorInterface.jsx b/frontend/src/components/aggregator/AggregatorInterface.jsx index 9701ec4..343790d 100644 --- a/frontend/src/components/aggregator/AggregatorInterface.jsx +++ b/frontend/src/components/aggregator/AggregatorInterface.jsx @@ -1,8 +1,9 @@ import React, { useState, useEffect } from 'react'; import { api } from '../../services/api'; import TextFileUploader from '../TextFileUploader'; +import '../settings-common.css'; -export default function AggregatorInterface({ config, setConfig }) { +export default function AggregatorInterface({ config, setConfig, anyWorkflowRunning = false }) { const [isRunning, setIsRunning] = useState(false); const [status, setStatus] = useState(null); const [uploadedFiles, setUploadedFiles] = useState([]); @@ -49,6 +50,11 @@ export default function AggregatorInterface({ config, setConfig }) { }; const handleStart = async () => { + if (anyWorkflowRunning && !isRunning) { + alert('Another workflow is already running. Stop it before starting the Aggregator.'); + return; + } + if (!config.userPrompt.trim()) { alert('Please enter a user prompt'); return; @@ -97,7 +103,7 @@ export default function AggregatorInterface({ config, setConfig }) { setIsRunning(true); } catch (error) { console.error('Failed to start aggregator:', error); - alert('Failed to start aggregator. Check console for details.'); + alert(`Failed to start aggregator: ${error.details || error.message}`); } }; @@ -159,7 +165,9 @@ export default function AggregatorInterface({ config, setConfig }) {
    {!isRunning ? ( - + ) : ( )} @@ -179,7 +187,7 @@ export default function AggregatorInterface({ config, setConfig }) {
    Rejected
    -
    +
    {status.total_rejections}
    diff --git a/frontend/src/components/aggregator/AggregatorLogs.jsx b/frontend/src/components/aggregator/AggregatorLogs.jsx index 34746a0..c1eebec 100644 --- a/frontend/src/components/aggregator/AggregatorLogs.jsx +++ b/frontend/src/components/aggregator/AggregatorLogs.jsx @@ -1,6 +1,7 @@ import React, { useState, useEffect } from 'react'; import { websocket } from '../../services/websocket'; import { api } from '../../services/api'; +import '../settings-common.css'; export default function AggregatorLogs() { const [events, setEvents] = useState([]); @@ -213,16 +214,16 @@ export default function AggregatorLogs() { {recoveryStatus && recoveryStatus.in_recovery && (
    -

    +

    Model Recovery in Progress

    -
    +
    Model: {recoveryStatus.recovering_model}
    Stage: {recoveryStatus.recovery_stage}
    @@ -246,7 +247,7 @@ export default function AggregatorLogs() {
    = recoveryStatus.corruption_threshold ? '#f44336' : '#ff9800' }}> -
    {model}
    +
    {model}
    = recoveryStatus.corruption_threshold ? '#f44336' : '#ff9800' }}> Failures: {count}/{recoveryStatus.corruption_threshold} @@ -271,7 +272,7 @@ export default function AggregatorLogs() {
    Submissions: {submitter.total_submissions}
    Acceptances: {submitter.total_acceptances}
    -
    Consecutive Rejections: {submitter.consecutive_rejections}
    +
    Consecutive Rejections: {submitter.consecutive_rejections}
    ))} diff --git a/frontend/src/components/aggregator/AggregatorSettings.jsx b/frontend/src/components/aggregator/AggregatorSettings.jsx index ed5ca1d..a7baffb 100644 --- a/frontend/src/components/aggregator/AggregatorSettings.jsx +++ b/frontend/src/components/aggregator/AggregatorSettings.jsx @@ -1,5 +1,6 @@ import React, { useState, useEffect } from 'react'; import { api, openRouterAPI } from '../../services/api'; +import '../settings-common.css'; const DEFAULT_SUBMITTER_CONFIG = { submitterId: 1, @@ -294,22 +295,13 @@ export default function AggregatorSettings({ config, setConfig }) { return ( <> {/* Provider Toggle */} -
    - -
    +
    + +
    @@ -317,16 +309,7 @@ export default function AggregatorSettings({ config, setConfig }) { type="button" onClick={() => hasOpenRouterKey && onProviderChange('openrouter')} disabled={!hasOpenRouterKey} - style={{ - flex: 1, - padding: '0.5rem', - backgroundColor: provider === 'openrouter' ? '#6c5ce7' : '#333', - border: 'none', - borderRadius: '4px', - color: hasOpenRouterKey ? '#fff' : '#666', - cursor: hasOpenRouterKey ? 'pointer' : 'not-allowed', - fontSize: '0.8rem' - }} + className={`provider-toggle-btn${provider === 'openrouter' ? ' active-or' : ''}`} title={!hasOpenRouterKey ? 'Set OpenRouter API key first' : 'Use OpenRouter'} > OpenRouter @@ -335,12 +318,12 @@ export default function AggregatorSettings({ config, setConfig }) {
    {/* Model Selection */} -
    - +
    + onOpenrouterProviderChange(e.target.value || null)} - style={{ fontSize: '0.85rem' }} + className="select--sm" > {providers.map(p => ( @@ -378,21 +361,21 @@ export default function AggregatorSettings({ config, setConfig }) { {/* LM Studio Fallback (only for OpenRouter) */} {provider === 'openrouter' && ( -
    -