|
| 1 | +"""Tests for the `openkb recompile` CLI command. |
| 2 | +
|
| 3 | +`recompile` re-runs the current compile pipeline (compile_short_doc / |
| 4 | +compile_long_doc) on already-indexed docs so pre-feature KBs gain the |
| 5 | +entities/ layer and refresh to the current format. It does NOT re-run |
| 6 | +PageIndex or re-convert raw files. |
| 7 | +
|
| 8 | +Covers: |
| 9 | +- short-doc dispatch (compile_short_doc called with the right args) |
| 10 | +- long-doc dispatch (compile_long_doc called with doc_id; PageIndex not invoked) |
| 11 | +- --all confirmation + --yes bypass |
| 12 | +- --dry-run: no compile calls, no writes |
| 13 | +- skip+warn paths (missing source, missing summary/doc_id) with others |
| 14 | + still processed |
| 15 | +- unknown <doc_name> / empty registry friendly error |
| 16 | +- --refresh-schema backs up + overwrites only when AGENTS.md differs |
| 17 | +""" |
| 18 | + |
| 19 | +from __future__ import annotations |
| 20 | + |
| 21 | +import json |
| 22 | +from pathlib import Path |
| 23 | +from unittest.mock import AsyncMock, patch |
| 24 | + |
| 25 | +from click.testing import CliRunner |
| 26 | + |
| 27 | +from openkb.cli import cli |
| 28 | +from openkb.schema import AGENTS_MD |
| 29 | + |
| 30 | + |
| 31 | +# --------------------------------------------------------------------------- |
| 32 | +# Helpers |
| 33 | +# --------------------------------------------------------------------------- |
| 34 | + |
| 35 | + |
| 36 | +def _invoke(kb_dir, args, input_text=None): |
| 37 | + return CliRunner().invoke( |
| 38 | + cli, ["--kb-dir", str(kb_dir), *args], input=input_text, |
| 39 | + ) |
| 40 | + |
| 41 | + |
| 42 | +def _seed_short(kb_dir: Path) -> None: |
| 43 | + """One short doc with a source file on disk.""" |
| 44 | + (kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({ |
| 45 | + "h_s": {"name": "notes.md", "doc_name": "notes-h_s", "type": "md"}, |
| 46 | + })) |
| 47 | + (kb_dir / "wiki" / "sources" / "notes-h_s.md").write_text( |
| 48 | + "# Notes\n\nbody\n", encoding="utf-8", |
| 49 | + ) |
| 50 | + (kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8") |
| 51 | + |
| 52 | + |
| 53 | +def _seed_long(kb_dir: Path) -> None: |
| 54 | + """One long (PageIndex) doc with a summary file + doc_id on disk.""" |
| 55 | + (kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({ |
| 56 | + "h_l": { |
| 57 | + "name": "paper.pdf", "doc_name": "paper-h_l", |
| 58 | + "type": "long_pdf", "doc_id": "doc-abc123", |
| 59 | + }, |
| 60 | + })) |
| 61 | + (kb_dir / "wiki" / "summaries" / "paper-h_l.md").write_text( |
| 62 | + "---\nsources: [raw/paper.pdf]\nbrief: P\n---\n# Paper\n", |
| 63 | + encoding="utf-8", |
| 64 | + ) |
| 65 | + (kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8") |
| 66 | + |
| 67 | + |
| 68 | +# --------------------------------------------------------------------------- |
| 69 | +# short-doc dispatch |
| 70 | +# --------------------------------------------------------------------------- |
| 71 | + |
| 72 | + |
| 73 | +def test_recompile_short_dispatches_compile_short_doc(kb_dir): |
| 74 | + _seed_short(kb_dir) |
| 75 | + with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short, \ |
| 76 | + patch("openkb.agent.compiler.compile_long_doc", new_callable=AsyncMock) as long_: |
| 77 | + result = _invoke(kb_dir, ["recompile", "notes.md"]) |
| 78 | + |
| 79 | + assert result.exit_code == 0, result.output |
| 80 | + short.assert_called_once() |
| 81 | + args = short.call_args.args |
| 82 | + assert args[0] == "notes-h_s" # doc_name |
| 83 | + assert args[1] == kb_dir / "wiki" / "sources" / "notes-h_s.md" # source_path |
| 84 | + assert args[2] == kb_dir # kb_dir |
| 85 | + long_.assert_not_called() |
| 86 | + assert "recompiled 1" in result.output |
| 87 | + |
| 88 | + |
| 89 | +# --------------------------------------------------------------------------- |
| 90 | +# long-doc dispatch |
| 91 | +# --------------------------------------------------------------------------- |
| 92 | + |
| 93 | + |
| 94 | +def test_recompile_long_dispatches_compile_long_doc_with_doc_id(kb_dir): |
| 95 | + _seed_long(kb_dir) |
| 96 | + with patch("openkb.agent.compiler.compile_long_doc", new_callable=AsyncMock) as long_, \ |
| 97 | + patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short, \ |
| 98 | + patch("openkb.indexer.index_long_document") as index: |
| 99 | + result = _invoke(kb_dir, ["recompile", "paper.pdf"]) |
| 100 | + |
| 101 | + assert result.exit_code == 0, result.output |
| 102 | + long_.assert_called_once() |
| 103 | + args = long_.call_args.args |
| 104 | + assert args[0] == "paper-h_l" # doc_name |
| 105 | + assert args[1] == kb_dir / "wiki" / "summaries" / "paper-h_l.md" |
| 106 | + assert args[2] == "doc-abc123" # doc_id |
| 107 | + assert args[3] == kb_dir |
| 108 | + short.assert_not_called() |
| 109 | + # PageIndex must NOT be re-run |
| 110 | + index.assert_not_called() |
| 111 | + assert "recompiled 1" in result.output |
| 112 | + |
| 113 | + |
| 114 | +# --------------------------------------------------------------------------- |
| 115 | +# --all confirmation + --yes |
| 116 | +# --------------------------------------------------------------------------- |
| 117 | + |
| 118 | + |
| 119 | +def test_recompile_all_requires_confirmation(kb_dir): |
| 120 | + _seed_short(kb_dir) |
| 121 | + with patch("openkb.agent.compiler.compile_short_doc") as short: |
| 122 | + result = _invoke(kb_dir, ["recompile", "--all"], input_text="n\n") |
| 123 | + |
| 124 | + assert result.exit_code == 0, result.output |
| 125 | + assert "Aborted" in result.output |
| 126 | + short.assert_not_called() |
| 127 | + |
| 128 | + |
| 129 | +def test_recompile_all_yes_bypasses_confirmation(kb_dir): |
| 130 | + _seed_short(kb_dir) |
| 131 | + with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short: |
| 132 | + result = _invoke(kb_dir, ["recompile", "--all", "--yes"]) |
| 133 | + |
| 134 | + assert result.exit_code == 0, result.output |
| 135 | + short.assert_called_once() |
| 136 | + assert "recompiled 1" in result.output |
| 137 | + |
| 138 | + |
| 139 | +# --------------------------------------------------------------------------- |
| 140 | +# --dry-run |
| 141 | +# --------------------------------------------------------------------------- |
| 142 | + |
| 143 | + |
| 144 | +def test_recompile_dry_run_no_calls_no_writes(kb_dir): |
| 145 | + _seed_short(kb_dir) |
| 146 | + log_before = (kb_dir / "wiki" / "log.md").read_text() |
| 147 | + with patch("openkb.agent.compiler.compile_short_doc") as short, \ |
| 148 | + patch("openkb.agent.compiler.compile_long_doc") as long_: |
| 149 | + result = _invoke(kb_dir, ["recompile", "--all", "--dry-run"]) |
| 150 | + |
| 151 | + assert result.exit_code == 0, result.output |
| 152 | + short.assert_not_called() |
| 153 | + long_.assert_not_called() |
| 154 | + assert "notes-h_s" in result.output |
| 155 | + assert "short" in result.output |
| 156 | + # No writes: log.md unchanged |
| 157 | + assert (kb_dir / "wiki" / "log.md").read_text() == log_before |
| 158 | + |
| 159 | + |
| 160 | +# --------------------------------------------------------------------------- |
| 161 | +# skip + warn paths |
| 162 | +# --------------------------------------------------------------------------- |
| 163 | + |
| 164 | + |
| 165 | +def test_recompile_skips_short_missing_source(kb_dir): |
| 166 | + """Short doc with no source on disk is warned + skipped; others run.""" |
| 167 | + (kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({ |
| 168 | + "h_ok": {"name": "ok.md", "doc_name": "ok-h_ok", "type": "md"}, |
| 169 | + "h_miss": {"name": "gone.md", "doc_name": "gone-h_miss", "type": "md"}, |
| 170 | + })) |
| 171 | + (kb_dir / "wiki" / "sources" / "ok-h_ok.md").write_text("# ok\n") |
| 172 | + (kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8") |
| 173 | + |
| 174 | + with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short: |
| 175 | + result = _invoke(kb_dir, ["recompile", "--all", "--yes"]) |
| 176 | + |
| 177 | + assert result.exit_code == 0, result.output |
| 178 | + # only the doc with a present source compiled |
| 179 | + assert short.call_count == 1 |
| 180 | + assert short.call_args.args[0] == "ok-h_ok" |
| 181 | + assert "recompiled 1" in result.output |
| 182 | + assert "skipped 1" in result.output |
| 183 | + |
| 184 | + |
| 185 | +def test_recompile_skips_long_missing_doc_id(kb_dir): |
| 186 | + """Long doc lacking doc_id is warned + skipped; others run.""" |
| 187 | + (kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({ |
| 188 | + "h_l": {"name": "legacy.pdf", "doc_name": "legacy-h_l", "type": "long_pdf"}, |
| 189 | + })) |
| 190 | + (kb_dir / "wiki" / "summaries" / "legacy-h_l.md").write_text("# legacy\n") |
| 191 | + (kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8") |
| 192 | + |
| 193 | + with patch("openkb.agent.compiler.compile_long_doc") as long_: |
| 194 | + result = _invoke(kb_dir, ["recompile", "--all", "--yes"]) |
| 195 | + |
| 196 | + assert result.exit_code == 0, result.output |
| 197 | + long_.assert_not_called() |
| 198 | + assert "skipped 1" in result.output |
| 199 | + assert "recompiled 0" in result.output |
| 200 | + |
| 201 | + |
| 202 | +def test_recompile_skips_long_missing_summary(kb_dir): |
| 203 | + """Long doc with doc_id but no summary on disk is warned + skipped.""" |
| 204 | + (kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({ |
| 205 | + "h_l": { |
| 206 | + "name": "paper.pdf", "doc_name": "paper-h_l", |
| 207 | + "type": "long_pdf", "doc_id": "doc-x", |
| 208 | + }, |
| 209 | + })) |
| 210 | + (kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8") |
| 211 | + |
| 212 | + with patch("openkb.agent.compiler.compile_long_doc") as long_: |
| 213 | + result = _invoke(kb_dir, ["recompile", "--all", "--yes"]) |
| 214 | + |
| 215 | + assert result.exit_code == 0, result.output |
| 216 | + long_.assert_not_called() |
| 217 | + assert "skipped 1" in result.output |
| 218 | + |
| 219 | + |
| 220 | +# --------------------------------------------------------------------------- |
| 221 | +# error paths |
| 222 | +# --------------------------------------------------------------------------- |
| 223 | + |
| 224 | + |
| 225 | +def test_recompile_requires_doc_or_all(kb_dir): |
| 226 | + _seed_short(kb_dir) |
| 227 | + result = _invoke(kb_dir, ["recompile"]) |
| 228 | + assert result.exit_code != 0 or "Specify" in result.output or "--all" in result.output |
| 229 | + |
| 230 | + |
| 231 | +def test_recompile_doc_and_all_conflict(kb_dir): |
| 232 | + _seed_short(kb_dir) |
| 233 | + result = _invoke(kb_dir, ["recompile", "notes.md", "--all"]) |
| 234 | + assert "both" in result.output.lower() or "either" in result.output.lower() \ |
| 235 | + or result.exit_code != 0 |
| 236 | + |
| 237 | + |
| 238 | +def test_recompile_unknown_doc_friendly_error(kb_dir): |
| 239 | + _seed_short(kb_dir) |
| 240 | + with patch("openkb.agent.compiler.compile_short_doc") as short: |
| 241 | + result = _invoke(kb_dir, ["recompile", "no-such-doc"]) |
| 242 | + assert result.exit_code == 0, result.output |
| 243 | + assert "no-such-doc" in result.output |
| 244 | + short.assert_not_called() |
| 245 | + |
| 246 | + |
| 247 | +def test_recompile_empty_registry_friendly_error(kb_dir): |
| 248 | + (kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({})) |
| 249 | + with patch("openkb.agent.compiler.compile_short_doc") as short: |
| 250 | + result = _invoke(kb_dir, ["recompile", "--all"], input_text="y\n") |
| 251 | + assert result.exit_code == 0, result.output |
| 252 | + short.assert_not_called() |
| 253 | + assert "No documents" in result.output or "no documents" in result.output |
| 254 | + |
| 255 | + |
| 256 | +# --------------------------------------------------------------------------- |
| 257 | +# --refresh-schema |
| 258 | +# --------------------------------------------------------------------------- |
| 259 | + |
| 260 | + |
| 261 | +def test_recompile_refresh_schema_overwrites_when_differing(kb_dir): |
| 262 | + _seed_short(kb_dir) |
| 263 | + agents = kb_dir / "wiki" / "AGENTS.md" |
| 264 | + agents.write_text("OLD CUSTOM SCHEMA\n", encoding="utf-8") |
| 265 | + with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short: |
| 266 | + result = _invoke(kb_dir, ["recompile", "notes.md", "--refresh-schema"]) |
| 267 | + |
| 268 | + assert result.exit_code == 0, result.output |
| 269 | + bak = kb_dir / "wiki" / "AGENTS.md.bak" |
| 270 | + assert bak.exists() |
| 271 | + assert bak.read_text(encoding="utf-8") == "OLD CUSTOM SCHEMA\n" |
| 272 | + assert agents.read_text(encoding="utf-8") == AGENTS_MD |
| 273 | + |
| 274 | + |
| 275 | +def test_recompile_refresh_schema_noop_when_identical(kb_dir): |
| 276 | + _seed_short(kb_dir) |
| 277 | + agents = kb_dir / "wiki" / "AGENTS.md" |
| 278 | + agents.write_text(AGENTS_MD, encoding="utf-8") |
| 279 | + with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short: |
| 280 | + result = _invoke(kb_dir, ["recompile", "notes.md", "--refresh-schema"]) |
| 281 | + |
| 282 | + assert result.exit_code == 0, result.output |
| 283 | + assert not (kb_dir / "wiki" / "AGENTS.md.bak").exists() |
| 284 | + |
| 285 | + |
| 286 | +def test_recompile_no_refresh_schema_by_default(kb_dir): |
| 287 | + _seed_short(kb_dir) |
| 288 | + agents = kb_dir / "wiki" / "AGENTS.md" |
| 289 | + agents.write_text("OLD CUSTOM SCHEMA\n", encoding="utf-8") |
| 290 | + with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short: |
| 291 | + result = _invoke(kb_dir, ["recompile", "notes.md"]) |
| 292 | + |
| 293 | + assert result.exit_code == 0, result.output |
| 294 | + # Untouched without the flag |
| 295 | + assert agents.read_text(encoding="utf-8") == "OLD CUSTOM SCHEMA\n" |
| 296 | + assert not (kb_dir / "wiki" / "AGENTS.md.bak").exists() |
0 commit comments