Merge pull request #14 from saagpatel/chore/codex-scaffolding-and-main-guard

saagpatel · web-flow · commit 7a9c713520dc · 2026-05-10T21:40:22.000-07:00
chore: add Codex harness scaffolding and cli __main__ guard
diff --git a/AGENTS.md b/AGENTS.md
@@ -1,5 +1,14 @@
 # AGENTS.md
 
+<!-- comm-contract:start -->
+## Communication Contract (Global)
+- Follow `/Users/d/.codex/policies/communication/BigPictureReportingV1.md` for all user-facing updates.
+- Use exact section labels from `BigPictureReportingV1.md` for default status/progress updates.
+- Keep default updates beginner-friendly, big-picture, and low-noise.
+- Keep technical details in internal artifacts unless explicitly requested by the user.
+- Honor toggles literally: `simple mode`, `show receipts`, `tech mode`, `debug mode`.
+<!-- comm-contract:end -->
+
 ## Project goal
 
 Build a personal, local-only RAG system for macOS with a CLI-first workflow. The system must remain simple, inspectable, and testable from day one.
@@ -75,6 +84,16 @@ ruff check . --fix
 - When a dependency is optional or heavyweight, isolate it behind a module boundary
 - Treat `ingestion_runs` as SQLite-inspectable operational history unless a task explicitly adds a first-class CLI view for them
 
+## Codex App Usage
+
+- Use Codex App Projects for repo-specific implementation, review, and verification in this checkout.
+- Use a Worktree when changing retrieval behavior, storage schemas, CLI contracts, desktop wiring, or model-runtime boundaries.
+- Use the in-app browser or Playwright for desktop UI and FastAPI-backed browser workflow checks.
+- Use computer use only for GUI-only macOS/Tauri behavior that cannot be verified through tests, browser tooling, MCP, or CLI commands.
+- Use artifacts for reusable evaluation notes, retrieval examples, screenshots, and handoff packets.
+- Keep connectors read-first and task-scoped. Do not introduce cloud services, hosted databases, external API keys, or connector-backed app behavior unless explicitly requested.
+- Keep `.codex/verify.commands` as the verification authority; Codex App tools add evidence but do not replace the required local gates.
+
 ## Done criteria
 
 A task is done only when all of the following are true:
@@ -83,3 +102,46 @@ A task is done only when all of the following are true:
 - Relevant tests were run, or the exact reason they were not run is stated
 - Docs or repo rules were updated when behavior or workflow changed
 - Assumptions, risks, and next steps were summarized
+
+<!-- portfolio-context:start -->
+# Portfolio Context
+
+## What This Project Is
+
+GPT_RAG is an active local project in the /Users/d/Projects portfolio.
+
+## Current State
+
+Portfolio truth currently marks this project as `recent` with `boilerplate` context. Phase 104 recovered minimum-viable context so future sessions can resume without rediscovery.
+
+## Stack
+
+| Layer | Technology |
+|-------|------------|
+| Language | Python 3.11+ |
+| CLI | Typer + Rich |
+| Database | SQLite (FTS5) + LanceDB |
+| Embeddings / inference | Ollama |
+| Reranker | sentence-transformers (Qwen3-Reranker-4B) |
+| Document parsing | pypdf, BeautifulSoup4 |
+| Desktop shell | Tauri v2 + React + TypeScript |
+| Desktop API | FastAPI + Uvicorn |
+| Validation | Pydantic v2 |
+
+## How To Run
+
+```bash
+rag init
+rag ingest ~/Documents/my-notes
+rag ask "What did I write about distributed systems?"
+```
+
+## Known Risks
+
+- This repo only has minimum-viable recovery context today; deeper handoff details may still live in the README and supporting docs.
+
+## Next Recommended Move
+
+Use this context plus the README and supporting docs to resume the next active task, then promote the repo beyond minimum-viable by capturing a dedicated handoff, roadmap, or discovery artifact.
+
+<!-- portfolio-context:end -->
diff --git a/src/gpt_rag/cli.py b/src/gpt_rag/cli.py
@@ -3977,3 +3977,7 @@ def eval_answer_diff(
 
 def main() -> None:
     app()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+import re
 from dataclasses import dataclass
 from pathlib import Path
 
@@ -653,11 +654,15 @@ def fail_if_called(settings):
     assert saved_report["status"] == "status"
 
 
+_ANSI_ESCAPE_RE = re.compile(r"\x1b\[[0-9;]*m")
+
+
 def test_reindex_vectors_status_rejects_mutating_flags() -> None:
     result = runner.invoke(app, ["reindex-vectors", "--status", "--batch-size", "2"])
 
     assert result.exit_code == 2
-    assert "--status cannot be combined" in result.output
+    stripped_output = _ANSI_ESCAPE_RE.sub("", result.output)
+    assert "--status cannot be combined" in stripped_output
 
 
 def test_reindex_vectors_status_does_not_create_state_on_fresh_home(
@@ -1010,9 +1015,7 @@ def test_diff_command_compares_saved_trace_to_current_results(tmp_path: Path, mo
     reranker.scores_by_text[
         "# Socket Timeout Guide\n\nSocket timeout troubleshooting and startup checks."
     ] = 0.7
-    reranker.scores_by_text[
-        "Socket Notes\n\nThe socket timeout happens during startup."
-    ] = 0.99
+    reranker.scores_by_text["Socket Notes\n\nThe socket timeout happens during startup."] = 0.99
 
     diff_result = runner.invoke(
         app,
@@ -1114,9 +1117,7 @@ def test_ask_command_can_persist_trace_artifact(tmp_path: Path, monkeypatch) ->
     assert trace_payload["retrieval_snapshot"]["diversity"]["document_capped_count"] >= 0
     assert trace_payload["retrieval_snapshot"]["diversity"]["unique_document_count"] >= 1
     assert trace_payload["retrieval_results"]
-    assert (
-        trace_payload["generated_answer"]["retrieval_summary"]["cited_chunk_count"] == 2
-    )
+    assert trace_payload["generated_answer"]["retrieval_summary"]["cited_chunk_count"] == 2
     assert trace_payload["answer_context_diversity"]["used_chunk_count"] == 2
     assert trace_payload["answer_context_diversity"]["unique_document_count"] == 2
     assert trace_payload["generated_answer"]["citations"][0]["chunk_id"] > 0
@@ -1262,9 +1263,7 @@ def test_answer_diff_command_compares_ask_traces(tmp_path: Path, monkeypatch) ->
     reranker.scores_by_text[
         "# Socket Timeout Guide\n\nSocket timeout troubleshooting and startup checks."
     ] = 0.7
-    reranker.scores_by_text[
-        "Socket Notes\n\nThe socket timeout happens during startup."
-    ] = 0.99
+    reranker.scores_by_text["Socket Notes\n\nThe socket timeout happens during startup."] = 0.99
     monkeypatch.setattr("gpt_rag.cli.build_generation_client", lambda settings: after_generator)
 
     after_result = runner.invoke(
@@ -2397,18 +2396,21 @@ def test_trace_verify_reports_invalid_artifacts(tmp_path: Path, monkeypatch) ->
         issues_by_path = {
             Path(report["path"]).name: report["issues"] for report in payload["reports"]
         }
-        assert "could not read a JSON object" in issues_by_path[
-            "20260101T000000Z-inspect-broken.json"
-        ]
-        assert "ask trace must contain generated_answer" in issues_by_path[
-            "20260101T000100Z-ask-wrong.json"
-        ]
-        assert "ask trace must contain retrieval_snapshot" in issues_by_path[
-            "20260101T000100Z-ask-wrong.json"
-        ]
-        assert "ask trace must contain retrieval_results" in issues_by_path[
-            "20260101T000100Z-ask-wrong.json"
-        ]
+        assert (
+            "could not read a JSON object" in issues_by_path["20260101T000000Z-inspect-broken.json"]
+        )
+        assert (
+            "ask trace must contain generated_answer"
+            in issues_by_path["20260101T000100Z-ask-wrong.json"]
+        )
+        assert (
+            "ask trace must contain retrieval_snapshot"
+            in issues_by_path["20260101T000100Z-ask-wrong.json"]
+        )
+        assert (
+            "ask trace must contain retrieval_results"
+            in issues_by_path["20260101T000100Z-ask-wrong.json"]
+        )
     finally:
         load_settings.cache_clear()