Skip to content

Commit e30e40a

Browse files
committed
feat(cli): rename remove --keep-empty-concepts → --keep-empty (covers entities too)
This PR wired entity pages into 'openkb remove', so the flag now governs concept AND entity retention — but the name still said 'concepts'. Make --keep-empty the canonical name (clear that it covers both), keep --keep-empty-concepts as a backward-compatible alias, and update the preview/summary messages, docstring, and README accordingly.
1 parent 19d0f61 commit e30e40a

3 files changed

Lines changed: 45 additions & 19 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ OpenKB commands fall into two layers: the **wiki foundation** (compile + manage
154154
|---|---|
155155
| `openkb init` | Initialize a new knowledge base (interactive) |
156156
| <code>openkb&nbsp;add&nbsp;&lt;file_or_dir_or_URL&gt;</code> | Add documents and compile to wiki. URL ingest auto-detects PDF (saved as `.pdf` → PageIndex / markitdown) vs HTML (trafilatura main-content extract → `.md`) |
157-
| <code>openkb&nbsp;remove&nbsp;&lt;doc&gt;</code> | Remove a document and clean up its wiki pages, images, registry, and PageIndex state (use `--dry-run` to preview, `--keep-raw` / `--keep-empty-concepts` to retain artifacts) |
157+
| <code>openkb&nbsp;remove&nbsp;&lt;doc&gt;</code> | Remove a document and clean up its wiki pages, images, registry, and PageIndex state (use `--dry-run` to preview, `--keep-raw` / `--keep-empty` to retain artifacts) |
158158
| <code>openkb&nbsp;recompile&nbsp;[&lt;doc&gt;]&nbsp;[--all]</code> | Re-run the current compile pipeline on already-indexed docs (e.g. to backfill the `entities/` layer) without re-indexing. Regenerates summaries and rewrites concept pages — manual edits are overwritten. Use `--dry-run` to preview, `--refresh-schema` to also update `wiki/AGENTS.md` |
159159
| `openkb watch` | Watch `raw/` and auto-compile new files |
160160
| `openkb lint` | Run structural + knowledge health checks |

openkb/cli.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -804,28 +804,30 @@ def _resolve_doc_identifier(registry, identifier: str) -> list[tuple[str, dict]]
804804
@click.argument("identifier")
805805
@click.option("--keep-raw", is_flag=True, default=False,
806806
help="Don't delete the original file from raw/.")
807-
@click.option("--keep-empty-concepts", is_flag=True, default=False,
808-
help="Keep concept pages whose only source was the removed doc "
809-
"(with empty sources frontmatter). Useful when replacing "
810-
"the doc with a newer version.")
807+
@click.option("--keep-empty", "--keep-empty-concepts", "keep_empty",
808+
is_flag=True, default=False,
809+
help="Keep concept AND entity pages whose only source was the "
810+
"removed doc (leaving an empty sources: [] list). Useful "
811+
"when replacing the doc with a newer version. "
812+
"(--keep-empty-concepts is a backward-compatible alias.)")
811813
@click.option("--dry-run", is_flag=True, default=False,
812814
help="Print what would be done without modifying anything.")
813815
@click.option("--yes", "-y", is_flag=True, default=False,
814816
help="Skip the confirmation prompt.")
815817
@click.pass_context
816-
def remove(ctx, identifier, keep_raw, keep_empty_concepts, dry_run, yes):
818+
def remove(ctx, identifier, keep_raw, keep_empty, dry_run, yes):
817819
"""Remove a document from the knowledge base.
818820
819821
IDENTIFIER may be the original filename ("paper.pdf"), the doc_name
820822
slug ("paper-a1b2c3d4e5f6"), or a substring that uniquely matches one.
821823
822824
Deletes the doc's summary and source files, prunes the doc from
823-
concept-page frontmatter and Related Documents sections, drops the
824-
Documents entry from index.md, removes the hash entry, and finally
825-
runs `lint --fix` to clean any dangling wikilinks.
825+
concept- and entity-page frontmatter and Related Documents sections,
826+
drops the Documents entry from index.md, removes the hash entry, and
827+
finally runs `lint --fix` to clean any dangling wikilinks.
826828
827-
Concept pages whose only source was this doc are deleted by default;
828-
use --keep-empty-concepts to retain them.
829+
Concept and entity pages whose only source was this doc are deleted by
830+
default; use --keep-empty to retain them.
829831
"""
830832
from openkb.agent.compiler import (
831833
remove_doc_from_concept_pages,
@@ -894,8 +896,8 @@ def remove(ctx, identifier, keep_raw, keep_empty_concepts, dry_run, yes):
894896
source_file_marker = f"summaries/{doc_name}.md"
895897
affected_concepts = _scan_affected_pages(wiki_dir / "concepts", source_file_marker)
896898

897-
concept_deletes = [s for s, r in affected_concepts if r == 0 and not keep_empty_concepts]
898-
concept_edits = [s for s, r in affected_concepts if r > 0 or keep_empty_concepts]
899+
concept_deletes = [s for s, r in affected_concepts if r == 0 and not keep_empty]
900+
concept_edits = [s for s, r in affected_concepts if r > 0 or keep_empty]
899901
for slug in concept_deletes:
900902
actions.append(("DELETE", f"wiki/concepts/{slug}.md (only source: this doc)"))
901903
for slug in concept_edits:
@@ -906,8 +908,8 @@ def remove(ctx, identifier, keep_raw, keep_empty_concepts, dry_run, yes):
906908
# preview/summary truthful about what it will delete vs. edit.
907909
affected_entities = _scan_affected_pages(wiki_dir / "entities", source_file_marker)
908910

909-
entity_deletes = [s for s, r in affected_entities if r == 0 and not keep_empty_concepts]
910-
entity_edits = [s for s, r in affected_entities if r > 0 or keep_empty_concepts]
911+
entity_deletes = [s for s, r in affected_entities if r == 0 and not keep_empty]
912+
entity_edits = [s for s, r in affected_entities if r > 0 or keep_empty]
911913
for slug in entity_deletes:
912914
actions.append(("DELETE", f"wiki/entities/{slug}.md (only source: this doc)"))
913915
for slug in entity_edits:
@@ -953,13 +955,13 @@ def remove(ctx, identifier, keep_raw, keep_empty_concepts, dry_run, yes):
953955
click.echo(
954956
f" {len(concept_deletes)} concept(s) will be DELETED because this is their only source."
955957
)
956-
click.echo(" Pass --keep-empty-concepts to retain them instead.")
958+
click.echo(" Pass --keep-empty to retain them instead.")
957959
if entity_deletes:
958960
click.echo("")
959961
click.echo(
960962
f" {len(entity_deletes)} entity(s) will be DELETED because this is their only source."
961963
)
962-
click.echo(" Pass --keep-empty-concepts to retain them instead.")
964+
click.echo(" Pass --keep-empty to retain them instead.")
963965
click.echo("")
964966

965967
if dry_run:
@@ -988,11 +990,11 @@ def remove(ctx, identifier, keep_raw, keep_empty_concepts, dry_run, yes):
988990
shutil.rmtree(images_dir, ignore_errors=True)
989991

990992
concept_result = remove_doc_from_concept_pages(
991-
wiki_dir, doc_name, keep_empty=keep_empty_concepts,
993+
wiki_dir, doc_name, keep_empty=keep_empty,
992994
)
993995

994996
entity_result = remove_doc_from_entity_pages(
995-
wiki_dir, doc_name, keep_empty=keep_empty_concepts,
997+
wiki_dir, doc_name, keep_empty=keep_empty,
996998
)
997999

9981000
remove_doc_from_index(wiki_dir, doc_name, concept_result["deleted"],

tests/test_remove.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ def test_cli_remove_keep_raw_preserves_file(kb_dir):
489489

490490

491491
def test_cli_remove_keep_empty_concepts(kb_dir):
492+
"""The --keep-empty-concepts alias is still accepted (backward compat)."""
492493
_seed_two_doc_kb(kb_dir)
493494
result = _invoke(
494495
kb_dir, ["remove", "attention.pdf", "--keep-empty-concepts", "--yes"],
@@ -501,6 +502,29 @@ def test_cli_remove_keep_empty_concepts(kb_dir):
501502
assert "sources: []" in transformer.read_text()
502503

503504

505+
def test_cli_remove_keep_empty_retains_concepts_and_entities(kb_dir):
506+
"""The unified --keep-empty flag retains BOTH concept and entity pages
507+
whose only source was the removed doc (not just concepts)."""
508+
_seed_two_doc_kb(kb_dir)
509+
(kb_dir / "wiki" / "entities").mkdir(parents=True)
510+
(kb_dir / "wiki" / "entities" / "vaswani.md").write_text(
511+
'---\nsources: ["summaries/attention-h_a.md"]\ntype: person\nbrief: V\n---\n# Vaswani\n',
512+
encoding="utf-8",
513+
)
514+
515+
result = _invoke(kb_dir, ["remove", "attention.pdf", "--keep-empty", "--yes"])
516+
517+
assert result.exit_code == 0, result.output
518+
# single-source entity retained (not deleted), with emptied sources
519+
vaswani = kb_dir / "wiki" / "entities" / "vaswani.md"
520+
assert vaswani.exists()
521+
assert "sources: []" in vaswani.read_text()
522+
# single-source concept retained too
523+
transformer = kb_dir / "wiki" / "concepts" / "transformer.md"
524+
assert transformer.exists()
525+
assert "sources: []" in transformer.read_text()
526+
527+
504528
def test_cli_remove_by_doc_name_slug(kb_dir):
505529
_seed_two_doc_kb(kb_dir)
506530
result = _invoke(kb_dir, ["remove", "attention-h_a", "--yes"])

0 commit comments

Comments
 (0)