@@ -1075,6 +1075,194 @@ def remove(ctx, identifier, keep_raw, keep_empty_concepts, dry_run, yes):
10751075 click .echo (f" [OK] { name } removed from knowledge base." )
10761076
10771077
1078+ def _refresh_schema (wiki_dir : Path ) -> bool :
1079+ """Back up + overwrite ``wiki/AGENTS.md`` with the current ``AGENTS_MD``.
1080+
1081+ If the on-disk schema differs from the bundled default, copy it to
1082+ ``wiki/AGENTS.md.bak`` then overwrite with ``AGENTS_MD``. No-op when the
1083+ file is missing or already identical. Returns True if it overwrote.
1084+ """
1085+ agents_file = wiki_dir / "AGENTS.md"
1086+ current = agents_file .read_text (encoding = "utf-8" ) if agents_file .exists () else ""
1087+ if current == AGENTS_MD :
1088+ return False
1089+ if agents_file .exists ():
1090+ backup = wiki_dir / "AGENTS.md.bak"
1091+ backup .write_text (current , encoding = "utf-8" )
1092+ click .echo (f" Backed up existing schema to { backup .relative_to (wiki_dir .parent )} " )
1093+ agents_file .write_text (AGENTS_MD , encoding = "utf-8" )
1094+ click .echo (" Refreshed wiki/AGENTS.md to the current schema." )
1095+ return True
1096+
1097+
1098+ @cli .command ()
1099+ @click .argument ("doc_name" , required = False )
1100+ @click .option ("--all" , "all_docs" , is_flag = True , default = False ,
1101+ help = "Recompile every indexed document." )
1102+ @click .option ("--dry-run" , is_flag = True , default = False ,
1103+ help = "List the docs that would be recompiled; no LLM calls, no writes." )
1104+ @click .option ("--yes" , "-y" , is_flag = True , default = False ,
1105+ help = "Skip the --all confirmation prompt." )
1106+ @click .option ("--refresh-schema" , "refresh_schema" , is_flag = True , default = False ,
1107+ help = "Overwrite wiki/AGENTS.md with the bundled schema (backs up "
1108+ "the old one to AGENTS.md.bak) if it differs." )
1109+ @click .pass_context
1110+ def recompile (ctx , doc_name , all_docs , dry_run , yes , refresh_schema ):
1111+ """Re-run the current compile pipeline on already-indexed documents.
1112+
1113+ Recompiling re-runs the same ``compile_short_doc`` / ``compile_long_doc``
1114+ that ``openkb add`` uses, so pre-feature KBs gain the ``entities/`` layer
1115+ and pages refresh to the current format. It does NOT re-run PageIndex or
1116+ re-convert raw files — it reuses the on-disk ``wiki/sources/`` and
1117+ ``wiki/summaries/`` content (and the registry's PageIndex ``doc_id``).
1118+
1119+ DOC_NAME recompiles one doc (resolved like ``openkb remove`` — filename,
1120+ slug, or unique substring). ``--all`` recompiles every indexed doc.
1121+ Exactly one of DOC_NAME or ``--all`` is required.
1122+
1123+ Side effect: this regenerates summaries (short docs) and rewrites concept
1124+ pages with the current logic — manual edits to those pages are overwritten.
1125+ """
1126+ from openkb .state import HashRegistry
1127+
1128+ kb_dir = _find_kb_dir (ctx .obj .get ("kb_dir_override" ))
1129+ if kb_dir is None :
1130+ click .echo ("No knowledge base found. Run `openkb init` first." )
1131+ return
1132+
1133+ if all_docs and doc_name :
1134+ click .echo ("Specify either a DOC_NAME or --all, not both." )
1135+ return
1136+ if not all_docs and not doc_name :
1137+ click .echo ("Specify a document name or pass --all to recompile every doc." )
1138+ return
1139+
1140+ openkb_dir = kb_dir / ".openkb"
1141+ wiki_dir = kb_dir / "wiki"
1142+ registry = HashRegistry (openkb_dir / "hashes.json" )
1143+
1144+ # Resolve the set of docs to recompile.
1145+ if all_docs :
1146+ entries = list (registry .all_entries ().values ())
1147+ if not entries :
1148+ click .echo ("No documents indexed yet. Run `openkb add` first." )
1149+ return
1150+ targets = entries
1151+ else :
1152+ matches = _resolve_doc_identifier (registry , doc_name )
1153+ if not matches :
1154+ click .echo (f"No document matching '{ doc_name } ' found in the KB." )
1155+ click .echo ("Try `openkb list` to see indexed documents." )
1156+ return
1157+ if len (matches ) > 1 :
1158+ click .echo (f"'{ doc_name } ' matches multiple documents:" )
1159+ for _ , m in matches :
1160+ click .echo (f" - { m .get ('name' , '?' )} (doc_name: { m .get ('doc_name' , '?' )} )" )
1161+ click .echo ("Use a more specific name or the exact doc_name slug." )
1162+ return
1163+ targets = [matches [0 ][1 ]]
1164+
1165+ def _classify (meta : dict ) -> str :
1166+ return "long" if meta .get ("type" ) == "long_pdf" else "short"
1167+
1168+ # --dry-run: enumerate only, no LLM calls, no writes.
1169+ if dry_run :
1170+ click .echo (f"Would recompile { len (targets )} document(s):" )
1171+ for meta in targets :
1172+ name = meta .get ("doc_name" ) or meta .get ("name" , "?" )
1173+ click .echo (f" - { name } ({ _classify (meta )} )" )
1174+ click .echo (
1175+ "\n Note: recompiling regenerates summaries (short docs) and rewrites "
1176+ "concept pages — manual edits would be overwritten."
1177+ )
1178+ click .echo ("(dry-run — nothing modified)" )
1179+ return
1180+
1181+ # --all confirmation (the summary/concept-regeneration side effect).
1182+ if all_docs and not yes :
1183+ click .echo (
1184+ f"This will recompile { len (targets )} document(s), regenerating "
1185+ "summaries and rewriting concept pages with the current logic.\n "
1186+ "Manual edits to those pages will be overwritten."
1187+ )
1188+ if not click .confirm ("Proceed?" , default = False ):
1189+ click .echo ("Aborted." )
1190+ return
1191+
1192+ if refresh_schema :
1193+ _refresh_schema (wiki_dir )
1194+
1195+ _setup_llm_key (kb_dir )
1196+ config = load_config (openkb_dir / "config.yaml" )
1197+ model : str = config .get ("model" , DEFAULT_CONFIG ["model" ])
1198+
1199+ # Import lazily and reference via the module so tests can patch
1200+ # ``openkb.agent.compiler.compile_*`` and see the call.
1201+ from openkb .agent import compiler
1202+
1203+ recompiled = 0
1204+ skipped = 0
1205+ total = len (targets )
1206+ for i , meta in enumerate (targets , 1 ):
1207+ name = meta .get ("doc_name" ) or Path (meta .get ("name" , "" )).stem
1208+ if not name :
1209+ click .echo (f"[{ i } /{ total } ] [SKIP] registry entry has no doc_name." )
1210+ skipped += 1
1211+ continue
1212+
1213+ if meta .get ("type" ) == "long_pdf" :
1214+ summary_path = wiki_dir / "summaries" / f"{ name } .md"
1215+ doc_id = meta .get ("doc_id" )
1216+ if not doc_id :
1217+ click .echo (
1218+ f"[{ i } /{ total } ] [SKIP] { name } : legacy long-doc entry without a "
1219+ "doc_id — re-add to refresh."
1220+ )
1221+ skipped += 1
1222+ continue
1223+ if not summary_path .exists ():
1224+ click .echo (
1225+ f"[{ i } /{ total } ] [SKIP] { name } : missing summary at "
1226+ f"{ summary_path .relative_to (kb_dir )} ."
1227+ )
1228+ skipped += 1
1229+ continue
1230+ click .echo (f"[{ i } /{ total } ] Recompiling long doc { name } ..." )
1231+ start = time .time ()
1232+ try :
1233+ asyncio .run (compiler .compile_long_doc (name , summary_path , doc_id , kb_dir , model ))
1234+ except Exception as exc :
1235+ click .echo (f" [ERROR] Compilation failed: { exc } " )
1236+ logging .getLogger (__name__ ).debug ("Recompile traceback:" , exc_info = True )
1237+ skipped += 1
1238+ continue
1239+ click .echo (f" [OK] { name } ({ time .time () - start :.1f} s)" )
1240+ recompiled += 1
1241+ else :
1242+ source_path = wiki_dir / "sources" / f"{ name } .md"
1243+ if not source_path .exists ():
1244+ click .echo (
1245+ f"[{ i } /{ total } ] [SKIP] { name } : missing source at "
1246+ f"{ source_path .relative_to (kb_dir )} ."
1247+ )
1248+ skipped += 1
1249+ continue
1250+ click .echo (f"[{ i } /{ total } ] Recompiling short doc { name } ..." )
1251+ start = time .time ()
1252+ try :
1253+ asyncio .run (compiler .compile_short_doc (name , source_path , kb_dir , model ))
1254+ except Exception as exc :
1255+ click .echo (f" [ERROR] Compilation failed: { exc } " )
1256+ logging .getLogger (__name__ ).debug ("Recompile traceback:" , exc_info = True )
1257+ skipped += 1
1258+ continue
1259+ click .echo (f" [OK] { name } ({ time .time () - start :.1f} s)" )
1260+ recompiled += 1
1261+
1262+ click .echo (f"\n Done: recompiled { recompiled } , skipped { skipped } ." )
1263+ append_log (wiki_dir , "recompile" , f"recompiled { recompiled } , skipped { skipped } " )
1264+
1265+
10781266@cli .command ()
10791267@click .option (
10801268 "--resume" , "-r" , "resume" ,
0 commit comments