7878_ENTITY_TYPES_STR = ", " .join (_ENTITY_TYPE_LIST )
7979
8080
81+ def _resolve_entity_types (config : dict ) -> list [str ]:
82+ """Resolve the effective entity-type list from config.
83+
84+ If ``config["entity_types"]`` is a non-empty list, each item is cleaned
85+ (``str(x).strip().lower()``, empties dropped); if anything survives, that
86+ cleaned list is used (de-duped, order-preserving) with ``"other"`` always
87+ appended when missing (it's the coercion fallback). Otherwise — the key is
88+ absent, not a list, empty, or fully malformed — the default
89+ ``_ENTITY_TYPE_LIST`` is returned, so behavior is byte-identical to today.
90+ A warning is logged only when ``entity_types`` was present-but-malformed.
91+ """
92+ raw = config .get ("entity_types" )
93+ if raw is None :
94+ return list (_ENTITY_TYPE_LIST )
95+ if not isinstance (raw , list ):
96+ logger .warning (
97+ "config: 'entity_types' must be a list of strings, got %s — "
98+ "falling back to the default entity types." ,
99+ type (raw ).__name__ ,
100+ )
101+ return list (_ENTITY_TYPE_LIST )
102+ cleaned : list [str ] = []
103+ for x in raw :
104+ s = str (x ).strip ().lower ()
105+ if s and s not in cleaned :
106+ cleaned .append (s )
107+ if not cleaned :
108+ logger .warning (
109+ "config: 'entity_types' was present but yielded no usable values — "
110+ "falling back to the default entity types." ,
111+ )
112+ return list (_ENTITY_TYPE_LIST )
113+ if "other" not in cleaned :
114+ cleaned .append ("other" )
115+ return cleaned
116+
117+
81118_CONCEPTS_PLAN_USER = """\
82119 Based on the summary above, decide how to update the wiki's CONCEPT pages and
83120ENTITY pages.
207244Return ONLY valid JSON, no fences.
208245"""
209246
210- # Substitute the canonical entity-type list into every prompt that advertises
211- # it, so the prompt text can never drift from ``_ENTITY_TYPES`` validation.
212- _CONCEPTS_PLAN_USER = _CONCEPTS_PLAN_USER .replace ("__ENTITY_TYPES__" , _ENTITY_TYPES_STR )
213- _ENTITY_PAGE_USER = _ENTITY_PAGE_USER .replace ("__ENTITY_TYPES__" , _ENTITY_TYPES_STR )
214- _ENTITY_UPDATE_USER = _ENTITY_UPDATE_USER .replace ("__ENTITY_TYPES__" , _ENTITY_TYPES_STR )
247+ # NOTE: the prompt templates intentionally KEEP the literal ``__ENTITY_TYPES__``
248+ # token at import time. The effective entity-type list is resolved per-compile
249+ # from config (see ``_resolve_entity_types``) and substituted via ``str.replace``
250+ # at call time inside ``_compile_concepts``. This lets ``entity_types:`` in
251+ # ``.openkb/config.yaml`` override the default enum everywhere at once. The
252+ # token is a plain string (not a ``{}`` placeholder) so it does not collide with
253+ # the ``{{ }}`` JSON braces these templates feed to ``str.format``.
215254
216255_SUMMARY_REWRITE_USER = """\
217256 Task: Rewrite the summary you wrote above into a final version that is \
@@ -432,13 +471,19 @@ def _filter_related_slugs(items: list) -> list[str]:
432471 return valid
433472
434473
435- def _filter_entity_items (items : object ) -> list [dict ]:
474+ def _filter_entity_items (
475+ items : object , valid_types : frozenset | None = None
476+ ) -> list [dict ]:
436477 """Validate entity create/update objects: require name+title, coerce type.
437478
438479 Each kept item is normalized to ``{"name", "title", "type"}`` where
439- ``type`` falls back to ``"other"`` when missing or outside the entity
440- enum and ``title`` falls back to ``name``.
480+ ``type`` falls back to ``"other"`` when missing or outside ``valid_types``
481+ and ``title`` falls back to ``name``. ``valid_types`` defaults to the
482+ module-level ``_ENTITY_TYPES`` so callers that don't thread a config-driven
483+ set keep today's behavior.
441484 """
485+ if valid_types is None :
486+ valid_types = _ENTITY_TYPES
442487 out : list [dict ] = []
443488 if not isinstance (items , list ):
444489 return out
@@ -450,13 +495,13 @@ def _filter_entity_items(items: object) -> list[dict]:
450495 continue
451496 title = it .get ("title" ) if isinstance (it .get ("title" ), str ) else name
452497 etype = it .get ("type" )
453- if not isinstance (etype , str ) or etype not in _ENTITY_TYPES :
498+ if not isinstance (etype , str ) or etype not in valid_types :
454499 etype = "other"
455500 out .append ({"name" : name , "title" : title , "type" : etype })
456501 return out
457502
458503
459- def _parse_entities_plan (parsed : object ) -> dict :
504+ def _parse_entities_plan (parsed : object , valid_types : frozenset | None = None ) -> dict :
460505 """Extract the entities group from a plan dict, with graceful fallback.
461506
462507 Returns ``{"create": [...], "update": [...], "related": [...]}``. A
@@ -470,8 +515,8 @@ def _parse_entities_plan(parsed: object) -> dict:
470515 if not isinstance (group , dict ):
471516 return empty
472517 return {
473- "create" : _filter_entity_items (group .get ("create" , [])),
474- "update" : _filter_entity_items (group .get ("update" , [])),
518+ "create" : _filter_entity_items (group .get ("create" , []), valid_types ),
519+ "update" : _filter_entity_items (group .get ("update" , []), valid_types ),
475520 "related" : _filter_related_slugs (group .get ("related" , [])),
476521 }
477522
@@ -1339,6 +1384,7 @@ async def _compile_concepts(
13391384 doc_brief : str = "" ,
13401385 doc_type : str = "short" ,
13411386 rewrite_summary : bool = False ,
1387+ entity_types : list [str ] | None = None ,
13421388) -> None :
13431389 """Shared Steps 2-4: concepts plan → generate/update → index.
13441390
@@ -1351,6 +1397,13 @@ async def _compile_concepts(
13511397 """
13521398 source_file = f"summaries/{ doc_name } .md"
13531399
1400+ # Effective entity types for this compile (config-driven; defaults to the
1401+ # canonical enum when unset, keeping behavior byte-identical to today).
1402+ if entity_types is None :
1403+ entity_types = list (_ENTITY_TYPE_LIST )
1404+ types_str = ", " .join (entity_types )
1405+ valid_types = frozenset (entity_types )
1406+
13541407 # --- Step 2: Get concepts plan (A cached) ---
13551408 concept_briefs = _read_concept_briefs (wiki_dir )
13561409 entity_briefs = _read_entity_briefs (wiki_dir )
@@ -1363,7 +1416,9 @@ async def _compile_concepts(
13631416 system_msg ,
13641417 doc_msg ,
13651418 summary_msg ,
1366- {"role" : "user" , "content" : _CONCEPTS_PLAN_USER .format (
1419+ {"role" : "user" , "content" : _CONCEPTS_PLAN_USER .replace (
1420+ "__ENTITY_TYPES__" , types_str ,
1421+ ).format (
13671422 concept_briefs = concept_briefs ,
13681423 entity_briefs = entity_briefs ,
13691424 )},
@@ -1442,7 +1497,7 @@ def _write_v1_summary_stripped() -> None:
14421497 "update" : _filter_concept_items (concepts_group .get ("update" , []), "update" ),
14431498 "related" : _filter_related_slugs (concepts_group .get ("related" , [])),
14441499 }
1445- entities_plan = _parse_entities_plan (parsed )
1500+ entities_plan = _parse_entities_plan (parsed , valid_types )
14461501
14471502 create_items = plan ["create" ]
14481503 update_items = plan ["update" ]
@@ -1614,14 +1669,16 @@ async def _gen_entity_create(ent: dict) -> tuple[str, str, str, str]:
16141669 doc_msg , # cached (BP1)
16151670 summary_msg , # cached (BP2)
16161671 known_targets_msg , # cached (BP3) — whitelist
1617- {"role" : "user" , "content" : _ENTITY_PAGE_USER .format (
1672+ {"role" : "user" , "content" : _ENTITY_PAGE_USER .replace (
1673+ "__ENTITY_TYPES__" , types_str ,
1674+ ).format (
16181675 title = title , type = etype , doc_name = doc_name ,
16191676 )},
16201677 ], f"entity: { name } " , response_format = _JSON_RESPONSE_FORMAT )
16211678 try :
16221679 parsed = _parse_json (raw )
16231680 brief = parsed .get ("brief" , "" )
1624- etype_out = parsed .get ("type" ) if parsed .get ("type" ) in _ENTITY_TYPES else etype
1681+ etype_out = parsed .get ("type" ) if parsed .get ("type" ) in valid_types else etype
16251682 # Parse succeeded: do NOT fall back to ``raw`` (the JSON string).
16261683 content = parsed .get ("content" ) or ""
16271684 except (json .JSONDecodeError , ValueError ):
@@ -1650,15 +1707,17 @@ async def _gen_entity_update(ent: dict) -> tuple[str, str, str, str]:
16501707 doc_msg , # cached (BP1)
16511708 summary_msg , # cached (BP2)
16521709 known_targets_msg , # cached (BP3) — whitelist
1653- {"role" : "user" , "content" : _ENTITY_UPDATE_USER .format (
1710+ {"role" : "user" , "content" : _ENTITY_UPDATE_USER .replace (
1711+ "__ENTITY_TYPES__" , types_str ,
1712+ ).format (
16541713 title = title , type = etype , doc_name = doc_name ,
16551714 existing_content = existing_content ,
16561715 )},
16571716 ], f"entity-update: { name } " , response_format = _JSON_RESPONSE_FORMAT )
16581717 try :
16591718 parsed = _parse_json (raw )
16601719 brief = parsed .get ("brief" , "" )
1661- etype_out = parsed .get ("type" ) if parsed .get ("type" ) in _ENTITY_TYPES else etype
1720+ etype_out = parsed .get ("type" ) if parsed .get ("type" ) in valid_types else etype
16621721 # Parse succeeded: do NOT fall back to ``raw`` (the JSON string).
16631722 content = parsed .get ("content" ) or ""
16641723 except (json .JSONDecodeError , ValueError ):
@@ -1902,6 +1961,7 @@ async def compile_short_doc(
19021961 openkb_dir = kb_dir / ".openkb"
19031962 config = load_config (openkb_dir / "config.yaml" )
19041963 language : str = config .get ("language" , "en" )
1964+ entity_types = _resolve_entity_types (config )
19051965
19061966 wiki_dir = kb_dir / "wiki"
19071967 schema_md = get_agents_md (wiki_dir )
@@ -1936,7 +1996,7 @@ async def compile_short_doc(
19361996 await _compile_concepts (
19371997 wiki_dir , kb_dir , model , system_msg , doc_msg ,
19381998 summary , doc_name , max_concurrency , doc_brief = doc_brief ,
1939- doc_type = "short" , rewrite_summary = True ,
1999+ doc_type = "short" , rewrite_summary = True , entity_types = entity_types ,
19402000 )
19412001
19422002
@@ -1959,6 +2019,7 @@ async def compile_long_doc(
19592019 openkb_dir = kb_dir / ".openkb"
19602020 config = load_config (openkb_dir / "config.yaml" )
19612021 language : str = config .get ("language" , "en" )
2022+ entity_types = _resolve_entity_types (config )
19622023
19632024 wiki_dir = kb_dir / "wiki"
19642025 schema_md = get_agents_md (wiki_dir )
@@ -1980,5 +2041,5 @@ async def compile_long_doc(
19802041 await _compile_concepts (
19812042 wiki_dir , kb_dir , model , system_msg , doc_msg ,
19822043 overview , doc_name , max_concurrency , doc_brief = doc_description ,
1983- doc_type = "pageindex" ,
2044+ doc_type = "pageindex" , entity_types = entity_types ,
19842045 )
0 commit comments