diff --git a/AGENTS.md b/AGENTS.md index c102442..e96d2a9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,5 +1,14 @@ # AGENTS.md + +## Communication Contract (Global) +- Follow `/Users/d/.codex/policies/communication/BigPictureReportingV1.md` for all user-facing updates. +- Use exact section labels from `BigPictureReportingV1.md` for default status/progress updates. +- Keep default updates beginner-friendly, big-picture, and low-noise. +- Keep technical details in internal artifacts unless explicitly requested by the user. +- Honor toggles literally: `simple mode`, `show receipts`, `tech mode`, `debug mode`. + + ## Project goal Build a personal, local-only RAG system for macOS with a CLI-first workflow. The system must remain simple, inspectable, and testable from day one. @@ -75,6 +84,16 @@ ruff check . --fix - When a dependency is optional or heavyweight, isolate it behind a module boundary - Treat `ingestion_runs` as SQLite-inspectable operational history unless a task explicitly adds a first-class CLI view for them +## Codex App Usage + +- Use Codex App Projects for repo-specific implementation, review, and verification in this checkout. +- Use a Worktree when changing retrieval behavior, storage schemas, CLI contracts, desktop wiring, or model-runtime boundaries. +- Use the in-app browser or Playwright for desktop UI and FastAPI-backed browser workflow checks. +- Use computer use only for GUI-only macOS/Tauri behavior that cannot be verified through tests, browser tooling, MCP, or CLI commands. +- Use artifacts for reusable evaluation notes, retrieval examples, screenshots, and handoff packets. +- Keep connectors read-first and task-scoped. Do not introduce cloud services, hosted databases, external API keys, or connector-backed app behavior unless explicitly requested. +- Keep `.codex/verify.commands` as the verification authority; Codex App tools add evidence but do not replace the required local gates. + ## Done criteria A task is done only when all of the following are true: @@ -83,3 +102,46 @@ A task is done only when all of the following are true: - Relevant tests were run, or the exact reason they were not run is stated - Docs or repo rules were updated when behavior or workflow changed - Assumptions, risks, and next steps were summarized + + +# Portfolio Context + +## What This Project Is + +GPT_RAG is an active local project in the /Users/d/Projects portfolio. + +## Current State + +Portfolio truth currently marks this project as `recent` with `boilerplate` context. Phase 104 recovered minimum-viable context so future sessions can resume without rediscovery. + +## Stack + +| Layer | Technology | +|-------|------------| +| Language | Python 3.11+ | +| CLI | Typer + Rich | +| Database | SQLite (FTS5) + LanceDB | +| Embeddings / inference | Ollama | +| Reranker | sentence-transformers (Qwen3-Reranker-4B) | +| Document parsing | pypdf, BeautifulSoup4 | +| Desktop shell | Tauri v2 + React + TypeScript | +| Desktop API | FastAPI + Uvicorn | +| Validation | Pydantic v2 | + +## How To Run + +```bash +rag init +rag ingest ~/Documents/my-notes +rag ask "What did I write about distributed systems?" +``` + +## Known Risks + +- This repo only has minimum-viable recovery context today; deeper handoff details may still live in the README and supporting docs. + +## Next Recommended Move + +Use this context plus the README and supporting docs to resume the next active task, then promote the repo beyond minimum-viable by capturing a dedicated handoff, roadmap, or discovery artifact. + + diff --git a/src/gpt_rag/cli.py b/src/gpt_rag/cli.py index 0ef61c6..5d684cc 100644 --- a/src/gpt_rag/cli.py +++ b/src/gpt_rag/cli.py @@ -3977,3 +3977,7 @@ def eval_answer_diff( def main() -> None: app() + + +if __name__ == "__main__": + main() diff --git a/tests/test_cli.py b/tests/test_cli.py index 0c7935d..eb3fa5b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import re from dataclasses import dataclass from pathlib import Path @@ -653,11 +654,15 @@ def fail_if_called(settings): assert saved_report["status"] == "status" +_ANSI_ESCAPE_RE = re.compile(r"\x1b\[[0-9;]*m") + + def test_reindex_vectors_status_rejects_mutating_flags() -> None: result = runner.invoke(app, ["reindex-vectors", "--status", "--batch-size", "2"]) assert result.exit_code == 2 - assert "--status cannot be combined" in result.output + stripped_output = _ANSI_ESCAPE_RE.sub("", result.output) + assert "--status cannot be combined" in stripped_output def test_reindex_vectors_status_does_not_create_state_on_fresh_home( @@ -1010,9 +1015,7 @@ def test_diff_command_compares_saved_trace_to_current_results(tmp_path: Path, mo reranker.scores_by_text[ "# Socket Timeout Guide\n\nSocket timeout troubleshooting and startup checks." ] = 0.7 - reranker.scores_by_text[ - "Socket Notes\n\nThe socket timeout happens during startup." - ] = 0.99 + reranker.scores_by_text["Socket Notes\n\nThe socket timeout happens during startup."] = 0.99 diff_result = runner.invoke( app, @@ -1114,9 +1117,7 @@ def test_ask_command_can_persist_trace_artifact(tmp_path: Path, monkeypatch) -> assert trace_payload["retrieval_snapshot"]["diversity"]["document_capped_count"] >= 0 assert trace_payload["retrieval_snapshot"]["diversity"]["unique_document_count"] >= 1 assert trace_payload["retrieval_results"] - assert ( - trace_payload["generated_answer"]["retrieval_summary"]["cited_chunk_count"] == 2 - ) + assert trace_payload["generated_answer"]["retrieval_summary"]["cited_chunk_count"] == 2 assert trace_payload["answer_context_diversity"]["used_chunk_count"] == 2 assert trace_payload["answer_context_diversity"]["unique_document_count"] == 2 assert trace_payload["generated_answer"]["citations"][0]["chunk_id"] > 0 @@ -1262,9 +1263,7 @@ def test_answer_diff_command_compares_ask_traces(tmp_path: Path, monkeypatch) -> reranker.scores_by_text[ "# Socket Timeout Guide\n\nSocket timeout troubleshooting and startup checks." ] = 0.7 - reranker.scores_by_text[ - "Socket Notes\n\nThe socket timeout happens during startup." - ] = 0.99 + reranker.scores_by_text["Socket Notes\n\nThe socket timeout happens during startup."] = 0.99 monkeypatch.setattr("gpt_rag.cli.build_generation_client", lambda settings: after_generator) after_result = runner.invoke( @@ -2397,18 +2396,21 @@ def test_trace_verify_reports_invalid_artifacts(tmp_path: Path, monkeypatch) -> issues_by_path = { Path(report["path"]).name: report["issues"] for report in payload["reports"] } - assert "could not read a JSON object" in issues_by_path[ - "20260101T000000Z-inspect-broken.json" - ] - assert "ask trace must contain generated_answer" in issues_by_path[ - "20260101T000100Z-ask-wrong.json" - ] - assert "ask trace must contain retrieval_snapshot" in issues_by_path[ - "20260101T000100Z-ask-wrong.json" - ] - assert "ask trace must contain retrieval_results" in issues_by_path[ - "20260101T000100Z-ask-wrong.json" - ] + assert ( + "could not read a JSON object" in issues_by_path["20260101T000000Z-inspect-broken.json"] + ) + assert ( + "ask trace must contain generated_answer" + in issues_by_path["20260101T000100Z-ask-wrong.json"] + ) + assert ( + "ask trace must contain retrieval_snapshot" + in issues_by_path["20260101T000100Z-ask-wrong.json"] + ) + assert ( + "ask trace must contain retrieval_results" + in issues_by_path["20260101T000100Z-ask-wrong.json"] + ) finally: load_settings.cache_clear()