Skip to content

Commit d10055d

Browse files
committed
test(cli): recompile dispatch/dry-run/skip/refresh-schema
1 parent 58f8edc commit d10055d

1 file changed

Lines changed: 296 additions & 0 deletions

File tree

tests/test_recompile.py

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
"""Tests for the `openkb recompile` CLI command.
2+
3+
`recompile` re-runs the current compile pipeline (compile_short_doc /
4+
compile_long_doc) on already-indexed docs so pre-feature KBs gain the
5+
entities/ layer and refresh to the current format. It does NOT re-run
6+
PageIndex or re-convert raw files.
7+
8+
Covers:
9+
- short-doc dispatch (compile_short_doc called with the right args)
10+
- long-doc dispatch (compile_long_doc called with doc_id; PageIndex not invoked)
11+
- --all confirmation + --yes bypass
12+
- --dry-run: no compile calls, no writes
13+
- skip+warn paths (missing source, missing summary/doc_id) with others
14+
still processed
15+
- unknown <doc_name> / empty registry friendly error
16+
- --refresh-schema backs up + overwrites only when AGENTS.md differs
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import json
22+
from pathlib import Path
23+
from unittest.mock import AsyncMock, patch
24+
25+
from click.testing import CliRunner
26+
27+
from openkb.cli import cli
28+
from openkb.schema import AGENTS_MD
29+
30+
31+
# ---------------------------------------------------------------------------
32+
# Helpers
33+
# ---------------------------------------------------------------------------
34+
35+
36+
def _invoke(kb_dir, args, input_text=None):
37+
return CliRunner().invoke(
38+
cli, ["--kb-dir", str(kb_dir), *args], input=input_text,
39+
)
40+
41+
42+
def _seed_short(kb_dir: Path) -> None:
43+
"""One short doc with a source file on disk."""
44+
(kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({
45+
"h_s": {"name": "notes.md", "doc_name": "notes-h_s", "type": "md"},
46+
}))
47+
(kb_dir / "wiki" / "sources" / "notes-h_s.md").write_text(
48+
"# Notes\n\nbody\n", encoding="utf-8",
49+
)
50+
(kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8")
51+
52+
53+
def _seed_long(kb_dir: Path) -> None:
54+
"""One long (PageIndex) doc with a summary file + doc_id on disk."""
55+
(kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({
56+
"h_l": {
57+
"name": "paper.pdf", "doc_name": "paper-h_l",
58+
"type": "long_pdf", "doc_id": "doc-abc123",
59+
},
60+
}))
61+
(kb_dir / "wiki" / "summaries" / "paper-h_l.md").write_text(
62+
"---\nsources: [raw/paper.pdf]\nbrief: P\n---\n# Paper\n",
63+
encoding="utf-8",
64+
)
65+
(kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8")
66+
67+
68+
# ---------------------------------------------------------------------------
69+
# short-doc dispatch
70+
# ---------------------------------------------------------------------------
71+
72+
73+
def test_recompile_short_dispatches_compile_short_doc(kb_dir):
74+
_seed_short(kb_dir)
75+
with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short, \
76+
patch("openkb.agent.compiler.compile_long_doc", new_callable=AsyncMock) as long_:
77+
result = _invoke(kb_dir, ["recompile", "notes.md"])
78+
79+
assert result.exit_code == 0, result.output
80+
short.assert_called_once()
81+
args = short.call_args.args
82+
assert args[0] == "notes-h_s" # doc_name
83+
assert args[1] == kb_dir / "wiki" / "sources" / "notes-h_s.md" # source_path
84+
assert args[2] == kb_dir # kb_dir
85+
long_.assert_not_called()
86+
assert "recompiled 1" in result.output
87+
88+
89+
# ---------------------------------------------------------------------------
90+
# long-doc dispatch
91+
# ---------------------------------------------------------------------------
92+
93+
94+
def test_recompile_long_dispatches_compile_long_doc_with_doc_id(kb_dir):
95+
_seed_long(kb_dir)
96+
with patch("openkb.agent.compiler.compile_long_doc", new_callable=AsyncMock) as long_, \
97+
patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short, \
98+
patch("openkb.indexer.index_long_document") as index:
99+
result = _invoke(kb_dir, ["recompile", "paper.pdf"])
100+
101+
assert result.exit_code == 0, result.output
102+
long_.assert_called_once()
103+
args = long_.call_args.args
104+
assert args[0] == "paper-h_l" # doc_name
105+
assert args[1] == kb_dir / "wiki" / "summaries" / "paper-h_l.md"
106+
assert args[2] == "doc-abc123" # doc_id
107+
assert args[3] == kb_dir
108+
short.assert_not_called()
109+
# PageIndex must NOT be re-run
110+
index.assert_not_called()
111+
assert "recompiled 1" in result.output
112+
113+
114+
# ---------------------------------------------------------------------------
115+
# --all confirmation + --yes
116+
# ---------------------------------------------------------------------------
117+
118+
119+
def test_recompile_all_requires_confirmation(kb_dir):
120+
_seed_short(kb_dir)
121+
with patch("openkb.agent.compiler.compile_short_doc") as short:
122+
result = _invoke(kb_dir, ["recompile", "--all"], input_text="n\n")
123+
124+
assert result.exit_code == 0, result.output
125+
assert "Aborted" in result.output
126+
short.assert_not_called()
127+
128+
129+
def test_recompile_all_yes_bypasses_confirmation(kb_dir):
130+
_seed_short(kb_dir)
131+
with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short:
132+
result = _invoke(kb_dir, ["recompile", "--all", "--yes"])
133+
134+
assert result.exit_code == 0, result.output
135+
short.assert_called_once()
136+
assert "recompiled 1" in result.output
137+
138+
139+
# ---------------------------------------------------------------------------
140+
# --dry-run
141+
# ---------------------------------------------------------------------------
142+
143+
144+
def test_recompile_dry_run_no_calls_no_writes(kb_dir):
145+
_seed_short(kb_dir)
146+
log_before = (kb_dir / "wiki" / "log.md").read_text()
147+
with patch("openkb.agent.compiler.compile_short_doc") as short, \
148+
patch("openkb.agent.compiler.compile_long_doc") as long_:
149+
result = _invoke(kb_dir, ["recompile", "--all", "--dry-run"])
150+
151+
assert result.exit_code == 0, result.output
152+
short.assert_not_called()
153+
long_.assert_not_called()
154+
assert "notes-h_s" in result.output
155+
assert "short" in result.output
156+
# No writes: log.md unchanged
157+
assert (kb_dir / "wiki" / "log.md").read_text() == log_before
158+
159+
160+
# ---------------------------------------------------------------------------
161+
# skip + warn paths
162+
# ---------------------------------------------------------------------------
163+
164+
165+
def test_recompile_skips_short_missing_source(kb_dir):
166+
"""Short doc with no source on disk is warned + skipped; others run."""
167+
(kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({
168+
"h_ok": {"name": "ok.md", "doc_name": "ok-h_ok", "type": "md"},
169+
"h_miss": {"name": "gone.md", "doc_name": "gone-h_miss", "type": "md"},
170+
}))
171+
(kb_dir / "wiki" / "sources" / "ok-h_ok.md").write_text("# ok\n")
172+
(kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8")
173+
174+
with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short:
175+
result = _invoke(kb_dir, ["recompile", "--all", "--yes"])
176+
177+
assert result.exit_code == 0, result.output
178+
# only the doc with a present source compiled
179+
assert short.call_count == 1
180+
assert short.call_args.args[0] == "ok-h_ok"
181+
assert "recompiled 1" in result.output
182+
assert "skipped 1" in result.output
183+
184+
185+
def test_recompile_skips_long_missing_doc_id(kb_dir):
186+
"""Long doc lacking doc_id is warned + skipped; others run."""
187+
(kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({
188+
"h_l": {"name": "legacy.pdf", "doc_name": "legacy-h_l", "type": "long_pdf"},
189+
}))
190+
(kb_dir / "wiki" / "summaries" / "legacy-h_l.md").write_text("# legacy\n")
191+
(kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8")
192+
193+
with patch("openkb.agent.compiler.compile_long_doc") as long_:
194+
result = _invoke(kb_dir, ["recompile", "--all", "--yes"])
195+
196+
assert result.exit_code == 0, result.output
197+
long_.assert_not_called()
198+
assert "skipped 1" in result.output
199+
assert "recompiled 0" in result.output
200+
201+
202+
def test_recompile_skips_long_missing_summary(kb_dir):
203+
"""Long doc with doc_id but no summary on disk is warned + skipped."""
204+
(kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({
205+
"h_l": {
206+
"name": "paper.pdf", "doc_name": "paper-h_l",
207+
"type": "long_pdf", "doc_id": "doc-x",
208+
},
209+
}))
210+
(kb_dir / "wiki" / "log.md").write_text("# Log\n\n", encoding="utf-8")
211+
212+
with patch("openkb.agent.compiler.compile_long_doc") as long_:
213+
result = _invoke(kb_dir, ["recompile", "--all", "--yes"])
214+
215+
assert result.exit_code == 0, result.output
216+
long_.assert_not_called()
217+
assert "skipped 1" in result.output
218+
219+
220+
# ---------------------------------------------------------------------------
221+
# error paths
222+
# ---------------------------------------------------------------------------
223+
224+
225+
def test_recompile_requires_doc_or_all(kb_dir):
226+
_seed_short(kb_dir)
227+
result = _invoke(kb_dir, ["recompile"])
228+
assert result.exit_code != 0 or "Specify" in result.output or "--all" in result.output
229+
230+
231+
def test_recompile_doc_and_all_conflict(kb_dir):
232+
_seed_short(kb_dir)
233+
result = _invoke(kb_dir, ["recompile", "notes.md", "--all"])
234+
assert "both" in result.output.lower() or "either" in result.output.lower() \
235+
or result.exit_code != 0
236+
237+
238+
def test_recompile_unknown_doc_friendly_error(kb_dir):
239+
_seed_short(kb_dir)
240+
with patch("openkb.agent.compiler.compile_short_doc") as short:
241+
result = _invoke(kb_dir, ["recompile", "no-such-doc"])
242+
assert result.exit_code == 0, result.output
243+
assert "no-such-doc" in result.output
244+
short.assert_not_called()
245+
246+
247+
def test_recompile_empty_registry_friendly_error(kb_dir):
248+
(kb_dir / ".openkb" / "hashes.json").write_text(json.dumps({}))
249+
with patch("openkb.agent.compiler.compile_short_doc") as short:
250+
result = _invoke(kb_dir, ["recompile", "--all"], input_text="y\n")
251+
assert result.exit_code == 0, result.output
252+
short.assert_not_called()
253+
assert "No documents" in result.output or "no documents" in result.output
254+
255+
256+
# ---------------------------------------------------------------------------
257+
# --refresh-schema
258+
# ---------------------------------------------------------------------------
259+
260+
261+
def test_recompile_refresh_schema_overwrites_when_differing(kb_dir):
262+
_seed_short(kb_dir)
263+
agents = kb_dir / "wiki" / "AGENTS.md"
264+
agents.write_text("OLD CUSTOM SCHEMA\n", encoding="utf-8")
265+
with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short:
266+
result = _invoke(kb_dir, ["recompile", "notes.md", "--refresh-schema"])
267+
268+
assert result.exit_code == 0, result.output
269+
bak = kb_dir / "wiki" / "AGENTS.md.bak"
270+
assert bak.exists()
271+
assert bak.read_text(encoding="utf-8") == "OLD CUSTOM SCHEMA\n"
272+
assert agents.read_text(encoding="utf-8") == AGENTS_MD
273+
274+
275+
def test_recompile_refresh_schema_noop_when_identical(kb_dir):
276+
_seed_short(kb_dir)
277+
agents = kb_dir / "wiki" / "AGENTS.md"
278+
agents.write_text(AGENTS_MD, encoding="utf-8")
279+
with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short:
280+
result = _invoke(kb_dir, ["recompile", "notes.md", "--refresh-schema"])
281+
282+
assert result.exit_code == 0, result.output
283+
assert not (kb_dir / "wiki" / "AGENTS.md.bak").exists()
284+
285+
286+
def test_recompile_no_refresh_schema_by_default(kb_dir):
287+
_seed_short(kb_dir)
288+
agents = kb_dir / "wiki" / "AGENTS.md"
289+
agents.write_text("OLD CUSTOM SCHEMA\n", encoding="utf-8")
290+
with patch("openkb.agent.compiler.compile_short_doc", new_callable=AsyncMock) as short:
291+
result = _invoke(kb_dir, ["recompile", "notes.md"])
292+
293+
assert result.exit_code == 0, result.output
294+
# Untouched without the flag
295+
assert agents.read_text(encoding="utf-8") == "OLD CUSTOM SCHEMA\n"
296+
assert not (kb_dir / "wiki" / "AGENTS.md.bak").exists()

0 commit comments

Comments
 (0)