nikivdev
diff --git a/‎docs/commands/ai.md‎
Lines changed: 99 additions & 1 deletion b/‎docs/commands/ai.md‎
Lines changed: 99 additions & 1 deletion
diff --git a/‎docs/flow-toml-spec.md‎
Lines changed: 13 additions & 2 deletions b/‎docs/flow-toml-spec.md‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎flow.toml‎
Lines changed: 20 additions & 0 deletions b/‎flow.toml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎scripts/codex-flow-wrapper‎
Lines changed: 106 additions & 0 deletions b/‎scripts/codex-flow-wrapper‎
Lines changed: 106 additions & 0 deletions
@@ -83,6 +83,7 @@ f codex open "continue the deploy work"
 f codex open "resume latest"
 f codex open --path ~/work/example-project "what was I doing here"
 f codex resolve "https://linear.app/fl2024008/project/llm-proxy-v1-6cd0a041bd76/overview" --json
+f codex doctor --path ~/work/example-project
 ```
 
 Behavior:
@@ -94,6 +95,7 @@ Behavior:
 - otherwise: start a new session with the raw query and no extra wrapper text
 
 This keeps prompt cost flat unless Flow has a strong reason to recover or unroll context.
+Use `f codex doctor` to confirm whether wrapper transport, runtime skills, and context budgets are actually active for the current repo.
 
 ### Optional `flow.toml` resolver config
 
@@ -102,11 +104,13 @@ You can teach `f codex open` and `f codex resolve` to unroll repo-specific refer
 ```toml
 [codex]
 auto_resolve_references = true
+prompt_context_budget_chars = 900
+max_resolved_references = 1
 
 [[codex.reference_resolver]]
 name = "linear"
 match = ["https://linear.app/*/issue/*", "https://linear.app/*/project/*"]
-command = "forge linear inspect {{ref}} --json"
+command = "my-linear-tool inspect {{ref}} --json"
 inject_as = "linear"
 ```
 
@@ -116,6 +120,100 @@ Notes:
 - `{{ref}}`, `{{query}}`, and `{{cwd}}` are available in resolver commands
 - built-in Linear URL parsing works even without a custom resolver
 - resolver output is compacted before prompt injection
+- `prompt_context_budget_chars` hard-caps injected context before your request is appended
+- `max_resolved_references` prevents broad unrolling from bloating one turn
+
+### Optional runtime skill transport
+
+Flow can also materialize tiny per-launch runtime skills for current upstream Codex without forking Codex.
+
+Enable it with:
+
+```toml
+[codex]
+runtime_skills = true
+
+[options]
+codex_bin = "~/code/flow/scripts/codex-flow-wrapper"
+```
+
+Current first-slice behavior:
+
+- `f codex open "write plan"` can attach a tiny plan-writing runtime skill
+- the runtime skill is exposed only for the launched Codex process
+- Flow keeps the generated runtime state under `~/.config/flow/codex/runtime`
+
+Inspect or clear runtime state:
+
+```bash
+f codex runtime show
+f codex runtime clear
+f codex doctor
+```
+
+Built-in plan writer:
+
+```bash
+cat <<'EOF' | f codex runtime write-plan --title "Example Plan"
+# Example Plan
+
+- item
+EOF
+```
+
+### Skill eval and background refresh
+
+Flow can learn which runtime skills are actually worth injecting from local
+Codex usage history without replaying Codex in the hot path.
+
+Useful commands:
+
+```bash
+f codex skill-eval show --path ~/work/example-project
+f codex skill-eval run --path ~/work/example-project
+f codex skill-eval cron --limit 400 --max-targets 12 --within-hours 168
+f codex skill-source list --path ~/work/example-project
+f codex skill-source sync --path ~/work/example-project --skill find-skills
+```
+
+What `cron` does:
+
+- scans only recent logged Flow Codex events
+- skips missing/moved repo paths
+- rebuilds scorecards for a bounded number of recent repos
+- never launches Codex or replays network work in the background
+
+For your use case, this keeps learning cheap and safe enough to run regularly.
+
+### macOS launchd schedule for skill-eval
+
+If you want scorecards to stay fresh automatically on macOS:
+
+```bash
+f codex-skill-eval-launchd-install
+f codex-skill-eval-launchd-status
+f codex-skill-eval-launchd-logs
+```
+
+Default schedule:
+
+- every 30 minutes
+- scan up to 400 recent events
+- rebuild up to 12 recent repo scorecards
+- ignore repos not seen in the last 168 hours
+
+You can tune install-time bounds:
+
+```bash
+f codex-skill-eval-launchd-install --minutes 20 --limit 600 --max-targets 16 --within-hours 72
+f codex-skill-eval-launchd-install --dry-run
+```
+
+Remove it with:
+
+```bash
+f codex-skill-eval-launchd-uninstall
+```
 
 ### Cursor behavior
 
 
@@ -38,10 +38,12 @@ install = ["linear"]  # optional: ensure skills are installed (local ~/.codex/sk
 # task_skill_allow_implicit_invocation = false
 [codex]               # optional: Codex-first open/resolve behavior
 # auto_resolve_references = true
+# prompt_context_budget_chars = 1200
+# max_resolved_references = 2
 [[codex.reference_resolver]]
 # name = "linear"
 # match = ["https://linear.app/*/issue/*", "https://linear.app/*/project/*"]
-# command = "forge linear inspect {{ref}} --json"
+# command = "my-linear-tool inspect {{ref}} --json"
 # inject_as = "linear"
 [skills.seq]          # optional: seq-backed dependency skill fetching defaults
 # seq_repo = "~/code/seq"
@@ -139,6 +141,9 @@ fr = "f run"
 - `[skills.codex]`: optional Codex tuning; task skill `agents/openai.yaml` generation, post-sync force reload, and implicit invocation policy defaults.
 - `[codex]`: optional Codex-first control-plane settings for `f codex open` / `f codex resolve`.
   - `auto_resolve_references`: when true, matched resolver output is compacted and injected into new-session prompts.
+  - `prompt_context_budget_chars`: hard cap for injected context before the raw user request is appended.
+  - `max_resolved_references`: maximum number of resolved references Flow may inject into one prompt.
+  - `runtime_skills`: when true, `f codex open` may materialize Flow-managed per-launch runtime skills for wrapper transports.
   - `[[codex.reference_resolver]]`: repo-specific reference unrollers with wildcard `match` patterns and a shell `command` template.
   - command templates support `{{ref}}`, `{{query}}`, and `{{cwd}}`.
 - `[skills.seq]`: optional defaults for `f skills fetch ...` (local seq scraper integration).
@@ -175,13 +180,19 @@ task_skill_allow_implicit_invocation = false
 
 [codex]
 auto_resolve_references = true
+prompt_context_budget_chars = 900
+max_resolved_references = 1
+runtime_skills = true
 
 [[codex.reference_resolver]]
 name = "linear"
 match = ["https://linear.app/*/issue/*", "https://linear.app/*/project/*"]
-command = "forge linear inspect {{ref}} --json"
+command = "my-linear-tool inspect {{ref}} --json"
 inject_as = "linear"
 
+[options]
+codex_bin = "~/code/flow/scripts/codex-flow-wrapper"
+
 [commit.testing]
 mode = "block"
 runner = "bun"
 
@@ -382,6 +382,26 @@ name = "ai-taskd-launchd-logs"
 command = "python3 ./scripts/ai-taskd-launchd.py logs $@"
 description = "Show ai-taskd launch agent logs"
 
+[[tasks]]
+name = "codex-skill-eval-launchd-install"
+command = "python3 ./scripts/codex-skill-eval-launchd.py install $@"
+description = "Install scheduled Codex skill-eval scorecard refresh (launchd)"
+
+[[tasks]]
+name = "codex-skill-eval-launchd-uninstall"
+command = "python3 ./scripts/codex-skill-eval-launchd.py uninstall"
+description = "Remove scheduled Codex skill-eval scorecard refresh (launchd)"
+
+[[tasks]]
+name = "codex-skill-eval-launchd-status"
+command = "python3 ./scripts/codex-skill-eval-launchd.py status"
+description = "Show scheduled Codex skill-eval launch agent status"
+
+[[tasks]]
+name = "codex-skill-eval-launchd-logs"
+command = "python3 ./scripts/codex-skill-eval-launchd.py logs $@"
+description = "Show scheduled Codex skill-eval launch agent logs"
+
 [[tasks]]
 name = "test-args"
 command = "echo \"arg1=$1 arg2=$2 all=$@\""
 
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import json
+import os
+import signal
+import subprocess
+import sys
+from pathlib import Path
+
+
+RUNTIME_PREFIX = "flow-runtime-"
+
+
+def real_codex_bin() -> str:
+    value = os.environ.get("FLOW_CODEX_REAL_BIN", "").strip()
+    return value or "codex"
+
+
+def agents_skill_root() -> Path:
+    return Path.home() / ".agents" / "skills"
+
+
+def load_runtime_state() -> dict | None:
+    raw_path = os.environ.get("FLOW_CODEX_RUNTIME_STATE", "").strip()
+    if not raw_path:
+        return None
+    path = Path(raw_path).expanduser()
+    if not path.is_file():
+        return None
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def remove_path(path: Path) -> None:
+    try:
+        if path.is_symlink() or path.is_file():
+            path.unlink()
+        elif path.is_dir():
+            for child in path.iterdir():
+                remove_path(child)
+            path.rmdir()
+    except FileNotFoundError:
+        pass
+
+
+def materialize_runtime_skills(state: dict) -> list[Path]:
+    token = str(state.get("token", "")).strip()
+    skills = state.get("skills", [])
+    if not token or not isinstance(skills, list) or not skills:
+        return []
+
+    root = agents_skill_root()
+    root.mkdir(parents=True, exist_ok=True)
+    created: list[Path] = []
+    for skill in skills:
+        if not isinstance(skill, dict):
+            continue
+        name = str(skill.get("name", "")).strip()
+        source = str(skill.get("path", "")).strip()
+        if not name or not source:
+            continue
+        source_path = Path(source).expanduser()
+        if not source_path.is_dir():
+            continue
+        target = root / name
+        if target.exists() or target.is_symlink():
+            remove_path(target)
+        os.symlink(source_path, target, target_is_directory=True)
+        created.append(target)
+    return created
+
+
+def cleanup_runtime_symlinks(paths: list[Path]) -> None:
+    for path in paths:
+        remove_path(path)
+
+
+def main() -> int:
+    state = load_runtime_state()
+    created = materialize_runtime_skills(state) if state else []
+
+    env = dict(os.environ)
+    runtime_state_path = env.get("FLOW_CODEX_RUNTIME_STATE", "").strip()
+    if runtime_state_path:
+        env["FLOW_CODEX_RUNTIME_STATE_PATH"] = runtime_state_path
+    env.pop("FLOW_CODEX_RUNTIME_STATE", None)
+    proc = None
+
+    def forward_signal(signum: int, _frame) -> None:
+        nonlocal proc
+        if proc is not None:
+            proc.send_signal(signum)
+
+    for signum in (signal.SIGINT, signal.SIGTERM):
+        signal.signal(signum, forward_signal)
+
+    try:
+        proc = subprocess.Popen([real_codex_bin(), *sys.argv[1:]], env=env)
+        return proc.wait()
+    finally:
+        cleanup_runtime_symlinks(created)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())