@@ -739,6 +739,37 @@ def _cleanup_pageindex(
739739 return True , f"deleted PageIndex doc ({ doc_id [:12 ]} …)"
740740
741741
742+ def _scan_affected_pages (pages_dir : Path , source_file_marker : str ) -> list [tuple [str , int ]]:
743+ """Return ``(slug, remaining_sources)`` for pages whose frontmatter
744+ ``sources:`` list contains ``source_file_marker``.
745+
746+ Uses the same ``_parse_yaml_list_value`` parser the executor uses, so
747+ JSON-quoted values (``sources: ["summaries/x.md"]`` — exactly how the
748+ compiler writes them) are matched correctly. A hand-rolled comma-split
749+ here previously kept the surrounding quotes, so the marker never matched
750+ and the remove preview silently reported 0 affected pages.
751+ """
752+ from openkb .agent .compiler import _parse_yaml_list_value
753+
754+ affected : list [tuple [str , int ]] = []
755+ if not pages_dir .is_dir ():
756+ return affected
757+ for path in sorted (pages_dir .glob ("*.md" )):
758+ text = path .read_text (encoding = "utf-8" )
759+ if not text .startswith ("---" ):
760+ continue
761+ fm_end = text .find ("---" , 3 )
762+ if fm_end == - 1 :
763+ continue
764+ for line in text [:fm_end ].split ("\n " ):
765+ if line .lstrip ().startswith ("sources:" ):
766+ items = _parse_yaml_list_value (line )
767+ if items is not None and source_file_marker in items :
768+ affected .append ((path .stem , max (len (items ) - 1 , 0 )))
769+ break
770+ return affected
771+
772+
742773def _resolve_doc_identifier (registry , identifier : str ) -> list [tuple [str , dict ]]:
743774 """Find registry entries matching ``identifier``.
744775
@@ -861,31 +892,7 @@ def remove(ctx, identifier, keep_raw, keep_empty_concepts, dry_run, yes):
861892 # affect the delete/edit classification, so the plan reflects what
862893 # the executor will actually do.
863894 source_file_marker = f"summaries/{ doc_name } .md"
864- affected_concepts : list [tuple [str , int ]] = [] # (slug, remaining_sources)
865- concepts_dir = wiki_dir / "concepts"
866- if concepts_dir .is_dir ():
867- for path in sorted (concepts_dir .glob ("*.md" )):
868- text = path .read_text (encoding = "utf-8" )
869- if not text .startswith ("---" ):
870- continue
871- fm_end = text .find ("---" , 3 )
872- if fm_end == - 1 :
873- continue
874- sources_count = 0
875- source_in_frontmatter = False
876- for line in text [:fm_end ].split ("\n " ):
877- if line .lstrip ().startswith ("sources:" ):
878- lb = line .find ("[" )
879- rb = line .rfind ("]" )
880- if lb != - 1 and rb != - 1 and rb > lb :
881- items = [s .strip () for s in line [lb + 1 :rb ].split ("," ) if s .strip ()]
882- sources_count = len (items )
883- source_in_frontmatter = source_file_marker in items
884- break
885- if not source_in_frontmatter :
886- continue
887- remaining = max (sources_count - 1 , 0 )
888- affected_concepts .append ((path .stem , remaining ))
895+ affected_concepts = _scan_affected_pages (wiki_dir / "concepts" , source_file_marker )
889896
890897 concept_deletes = [s for s , r in affected_concepts if r == 0 and not keep_empty_concepts ]
891898 concept_edits = [s for s , r in affected_concepts if r > 0 or keep_empty_concepts ]
@@ -897,31 +904,7 @@ def remove(ctx, identifier, keep_raw, keep_empty_concepts, dry_run, yes):
897904 # Scan entity pages with the same frontmatter logic as concepts. The
898905 # executor calls ``remove_doc_from_entity_pages``; this only makes the
899906 # preview/summary truthful about what it will delete vs. edit.
900- affected_entities : list [tuple [str , int ]] = [] # (slug, remaining_sources)
901- entities_dir = wiki_dir / "entities"
902- if entities_dir .is_dir ():
903- for path in sorted (entities_dir .glob ("*.md" )):
904- text = path .read_text (encoding = "utf-8" )
905- if not text .startswith ("---" ):
906- continue
907- fm_end = text .find ("---" , 3 )
908- if fm_end == - 1 :
909- continue
910- sources_count = 0
911- source_in_frontmatter = False
912- for line in text [:fm_end ].split ("\n " ):
913- if line .lstrip ().startswith ("sources:" ):
914- lb = line .find ("[" )
915- rb = line .rfind ("]" )
916- if lb != - 1 and rb != - 1 and rb > lb :
917- items = [s .strip () for s in line [lb + 1 :rb ].split ("," ) if s .strip ()]
918- sources_count = len (items )
919- source_in_frontmatter = source_file_marker in items
920- break
921- if not source_in_frontmatter :
922- continue
923- remaining = max (sources_count - 1 , 0 )
924- affected_entities .append ((path .stem , remaining ))
907+ affected_entities = _scan_affected_pages (wiki_dir / "entities" , source_file_marker )
925908
926909 entity_deletes = [s for s , r in affected_entities if r == 0 and not keep_empty_concepts ]
927910 entity_edits = [s for s , r in affected_entities if r > 0 or keep_empty_concepts ]
0 commit comments