Skip to content

Commit b245128

Browse files
committed
fix(compiler): keep ## Entities before ## Explorations; drop dead param + overlap gathers
- _update_index: insert ## Entities before ## Explorations on older index.md files that predate the section (new _ensure_h2_section_before helper), preserving canonical order instead of appending at EOF. - _filter_entity_items: drop the unused 'label' parameter and update call sites in _parse_entities_plan. - _compile_concepts: overlap concept and entity generation in one outer asyncio.gather (they share cached context and the same concurrency semaphore); result/error handling per list is unchanged.
1 parent 1e2d5e0 commit b245128

1 file changed

Lines changed: 59 additions & 22 deletions

File tree

openkb/agent/compiler.py

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def _filter_related_slugs(items: list) -> list[str]:
432432
return valid
433433

434434

435-
def _filter_entity_items(items: object, label: str) -> list[dict]:
435+
def _filter_entity_items(items: object) -> list[dict]:
436436
"""Validate entity create/update objects: require name+title, coerce type.
437437
438438
Each kept item is normalized to ``{"name", "title", "type"}`` where
@@ -470,8 +470,8 @@ def _parse_entities_plan(parsed: object) -> dict:
470470
if not isinstance(group, dict):
471471
return empty
472472
return {
473-
"create": _filter_entity_items(group.get("create", []), "create"),
474-
"update": _filter_entity_items(group.get("update", []), "update"),
473+
"create": _filter_entity_items(group.get("create", [])),
474+
"update": _filter_entity_items(group.get("update", [])),
475475
"related": _filter_related_slugs(group.get("related", [])),
476476
}
477477

@@ -635,6 +635,33 @@ def _ensure_h2_section(lines: list[str], heading: str) -> None:
635635
lines.append("")
636636

637637

638+
def _ensure_h2_section_before(
639+
lines: list[str], heading: str, before: str,
640+
) -> None:
641+
"""Ensure H2 ``heading`` exists, inserting it just before ``before``.
642+
643+
If ``heading`` is already present, no-op. If ``before`` is absent, fall
644+
back to :func:`_ensure_h2_section` (append at end). This keeps the
645+
canonical index order (e.g. ``## Entities`` ahead of ``## Explorations``)
646+
when recovering an older index.md that predates the section.
647+
"""
648+
if _get_section_bounds(lines, heading) is not None:
649+
return
650+
before_bounds = _get_section_bounds(lines, before)
651+
if before_bounds is None:
652+
_ensure_h2_section(lines, heading)
653+
return
654+
# ``start`` is the line after the ``before`` heading; insert the new
655+
# section (heading + blank line) right before that heading line.
656+
insert_at = before_bounds[0] - 1
657+
logger.warning(
658+
"Wiki index is missing %r section; inserting it before %r. "
659+
"Check whether the file was hand-edited away from the canonical layout.",
660+
heading, before,
661+
)
662+
lines[insert_at:insert_at] = [heading, ""]
663+
664+
638665
def _section_contains_link(lines: list[str], heading: str, link: str) -> bool:
639666
"""Check whether an index entry already exists inside the named section."""
640667
bounds = _get_section_bounds(lines, heading)
@@ -1253,7 +1280,10 @@ def _update_index(
12531280
entity_names = entity_names or []
12541281
entity_meta = entity_meta or {}
12551282
if entity_names:
1256-
_ensure_h2_section(lines, "## Entities")
1283+
# Keep canonical order: Entities sits before Explorations. On an older
1284+
# index.md that predates the Entities section, plain ``_ensure_h2_section``
1285+
# would append it after Explorations.
1286+
_ensure_h2_section_before(lines, "## Entities", "## Explorations")
12571287
for name in entity_names:
12581288
link = f"[[entities/{name}]]"
12591289
# Callers always populate entity_meta alongside entity_names; the
@@ -1612,20 +1642,43 @@ async def _gen_entity_update(ent: dict) -> tuple[str, str, str, str]:
16121642
tasks.extend(_gen_create(c) for c in create_items)
16131643
tasks.extend(_gen_update(c) for c in update_items)
16141644

1645+
# --- Step 3 (entities): build the entity task list up front so it can be
1646+
# gathered concurrently with the concept tasks below. Entity coroutines
1647+
# return 4-arity tuples (name, content, brief, type), so their results are
1648+
# processed in their own loop rather than mixed with the concept tuples.
1649+
entity_tasks = []
1650+
entity_tasks.extend(_gen_entity_create(e) for e in entity_create)
1651+
entity_tasks.extend(_gen_entity_update(e) for e in entity_update)
1652+
16151653
concept_names: list[str] = []
16161654
concept_briefs_map: dict[str, str] = {}
16171655
pending_writes: list[tuple[str, str, bool, str]] = []
16181656
entity_names: list[str] = []
16191657
entity_meta: dict[str, tuple[str, str]] = {}
16201658
entity_pending: list[tuple[str, str, str, str]] = []
16211659

1660+
# Concepts and entities are independent and share the cached prompt
1661+
# context + the same concurrency ``semaphore``, so overlap them in one
1662+
# outer gather instead of running entities only after concepts finish.
1663+
total = len(tasks)
1664+
etotal = len(entity_tasks)
16221665
if tasks:
1623-
total = len(tasks)
16241666
sys.stdout.write(f" Generating {total} concept(s) (concurrency={max_concurrency})...\n")
16251667
sys.stdout.flush()
1668+
if entity_tasks:
1669+
sys.stdout.write(
1670+
f" Generating {etotal} entity(ies) (concurrency={max_concurrency})...\n"
1671+
)
1672+
sys.stdout.flush()
16261673

1627-
results = await asyncio.gather(*tasks, return_exceptions=True)
1674+
results, entity_results = ([], [])
1675+
if tasks or entity_tasks:
1676+
results, entity_results = await asyncio.gather(
1677+
asyncio.gather(*tasks, return_exceptions=True),
1678+
asyncio.gather(*entity_tasks, return_exceptions=True),
1679+
)
16281680

1681+
if tasks:
16291682
failure_types: list[str] = []
16301683
for r in results:
16311684
if isinstance(r, Exception):
@@ -1653,23 +1706,7 @@ async def _gen_entity_update(ent: dict) -> tuple[str, str, str, str]:
16531706
)
16541707
sys.stdout.flush()
16551708

1656-
# --- Step 3 (entities): generate entity pages in their OWN gather ---
1657-
# Entity coroutines return 4-arity tuples (name, content, brief, type),
1658-
# so they are gathered separately from the concept tuples rather than
1659-
# mixed into one list with differing arities.
1660-
entity_tasks = []
1661-
entity_tasks.extend(_gen_entity_create(e) for e in entity_create)
1662-
entity_tasks.extend(_gen_entity_update(e) for e in entity_update)
1663-
16641709
if entity_tasks:
1665-
etotal = len(entity_tasks)
1666-
sys.stdout.write(
1667-
f" Generating {etotal} entity(ies) (concurrency={max_concurrency})...\n"
1668-
)
1669-
sys.stdout.flush()
1670-
1671-
entity_results = await asyncio.gather(*entity_tasks, return_exceptions=True)
1672-
16731710
entity_failure_types: list[str] = []
16741711
for r in entity_results:
16751712
if isinstance(r, Exception):

0 commit comments

Comments
 (0)