From d4ad9435d67de388684a1f79d00843e2d8da7efa Mon Sep 17 00:00:00 2001
From: Bryce <bryce@apecloud.com>
Date: Mon, 27 Apr 2026 18:03:57 +0800
Subject: [PATCH 01/13] =?UTF-8?q?feat(celery=20Wave=205=20P1=20chunk=201):?=
 =?UTF-8?q?=20=C2=A7K.9=20spec=20+=20aperag/indexing/llm.py=20relocate=20(?=
 =?UTF-8?q?legacy=20graphindex=20elimination=20first=20chunk)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wave 5 task #26 first chunk per architect msg=10b5fae6 §K.9 spec
amendment + PM msg=af82797e dispatch (single Wave 5 PR with 5-phase
chunked-rotation commits, big-PR fast-landing per earayu2 msg=eced858d).

Two changes bundled (per PM msg=af82797e "Phase 1 first commit =
§K.9 spec paste + legacy graphindex elimination 实现起步"):

1. **§K.9 Wave 5 spec section** added to design pack:
   - 5-phase commit roadmap (16 acceptance items)
   - production-readiness 三类 layer per phase
   - architect direct ratify lane scope per phase
   - pre-check pattern lock per phase (per
     ``feedback_spec_lock_grep_verify_caller.md`` 双 pattern)
   - Layer 1 same-session continuation directive (per
     ``feedback_no_refresh_complete_all_tasks.md``)
   - Wave 5 acceptance summary (16 items + owner table)

2. **`aperag/indexing/llm.py` relocate** (§K.9.1 acceptance item 3):
   - new module owns canonical ``build_collection_llm_callable`` +
     ``render_extraction_prompt`` + ``ENTITY_RELATION_EXTRACTION``
     template + ``LLMCall`` type alias
   - legacy ``aperag/domains/knowledge_graph/graphindex/integration.py``
     and ``prompts.py`` re-export from new location during Wave 5
     deprecation window so legacy retrieval/curation callers keep
     working until Phase 1 close-out
   - ``aperag/indexing/graph_extractor.py`` updated to import from
     new ``aperag.indexing.llm`` module directly (instead of legacy
     graphindex bridge)
   - test ``test_graph_extractor.py`` monkey-patch sites updated to
     target ``aperag.indexing.llm`` module

Pre-check pattern 1 grep-verify caller cascade (per architect msg=
b26f64b2 chunk 4d Option C ruling, used as reference list for
Phase 1 migration scope):

- ``aperag/indexing/`` → 0 cross-references to legacy
  ``graphindex/storage/*`` (chunk 4d narrowed scope invariant
  maintained ✅)
- legacy ``graphindex/integration.py`` and ``prompts.py`` callers:
  ``retrieval/pipeline.py:85`` / ``knowledge_graph/service.py:69+`` /
  ``graph_curation/service.py:37`` / ``graph_curation/integration.py:21``
  / ``service/prompt_template_service.py:161`` —
  remaining migrations land in subsequent Phase 1 commits

Local gates green:
  - ``ruff check ./aperag ./tests`` clean
  - ``ruff format --check ./aperag ./tests`` clean (509 files)
  - ``pytest test_graph_extractor.py + test_full_indexing_pipeline.py
    + test_worker_factory.py + tests/unit_test/indexing/``: 179 passed
    / 2 skipped (Layer 2 stubs)

Production-readiness three-class layer (Phase 1 partial):
  - must-be-real: ``aperag.indexing.llm`` module is the canonical
    production home for the relocated LLM helpers (no import
    indirection through legacy package once retrieval/curation
    migration completes)
  - may-be-gated: legacy ``graphindex/{integration,prompts}.py``
    re-export shims kept active during Wave 5 deprecation window so
    cross-cutting refactor can land incrementally without breaking
    legacy retrieval/curation flow
  - fully-resolves: §K.9.1 Wave 5 acceptance item 3 (`aperag/indexing/llm.py`
    relocate); items 1 (legacy graphindex package elimination) and
    rest of Phase 1 cascade migration land in subsequent commits

Next Phase 1 commits: migrate ``retrieval/pipeline.py`` /
``knowledge_graph/service.py`` / ``graph_curation/*`` to §G.5 read
primitives; once all callers migrated → delete legacy
``graphindex/{storage, service, integration, engine, __init__}.py``
+ delete legacy tests; final commit verifies grep-zero invariant.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../knowledge_graph/graphindex/integration.py |  58 +----
 .../knowledge_graph/graphindex/prompts.py     | 106 +-------
 aperag/indexing/graph_extractor.py            |   8 +-
 aperag/indexing/llm.py                        | 233 ++++++++++++++++++
 .../indexing-redesign-design-pack.md          | 144 +++++++++++
 tests/integration/test_graph_extractor.py     |   4 +-
 6 files changed, 394 insertions(+), 159 deletions(-)
 create mode 100644 aperag/indexing/llm.py

diff --git a/aperag/domains/knowledge_graph/graphindex/integration.py b/aperag/domains/knowledge_graph/graphindex/integration.py
index 80cfd3aad..fa5622396 100644
--- a/aperag/domains/knowledge_graph/graphindex/integration.py
+++ b/aperag/domains/knowledge_graph/graphindex/integration.py
@@ -39,7 +39,6 @@
 from typing import Any, Awaitable, Optional
 
 from aperag.config import async_engine, build_graph_db_context, settings
-from aperag.db.ops import db_ops
 from aperag.domains.knowledge_graph.graphindex.config import GraphIndexConfig
 from aperag.domains.knowledge_graph.graphindex.dto import (
     DeleteDocumentResult,
@@ -48,7 +47,13 @@
 from aperag.domains.knowledge_graph.graphindex.service import GraphIndexService
 from aperag.domains.knowledge_graph.graphindex.storage.connector import GraphStoreAdaptor
 from aperag.domains.knowledge_graph.ports import CollectionRow
-from aperag.schema.utils import parseCollectionConfig
+
+# Wave 5 P1 chunk 1 (`aperag/indexing/llm.py` relocate per §K.9.1 item 3):
+# the canonical home for ``build_collection_llm_callable`` is now
+# :mod:`aperag.indexing.llm`. Re-exported here so legacy callers that
+# still import from this module keep working during the Wave 5
+# deprecation window.
+from aperag.indexing.llm import build_collection_llm_callable  # noqa: F401
 
 logger = logging.getLogger(__name__)
 
@@ -91,55 +96,6 @@ def _build_store():
 # ---------------------------------------------------------------------------
 
 
-def build_collection_llm_callable(collection: CollectionRow):
-    """Construct the ``LLMCall`` for a specific collection.
-
-    Reads the collection's completion config (provider, model, base_url,
-    api key from the user's provider record) and returns an async
-    function ``(prompt) -> str``. The function is closure-bound to the
-    config so multiple concurrent calls can share it safely without
-    serialising on a global LLM client.
-
-    The ``CompletionService`` is instantiated once per callable and
-    reused across every chunk extraction for the same document. Before
-    this, each per-chunk call rebuilt the client (litellm import, HTTP
-    session, etc.) — a meaningful overhead on high-chunk documents.
-    """
-    config = parseCollectionConfig(collection.config)
-    if not config.completion or not config.completion.model_id:
-        raise RuntimeError(f"graphindex: completion model not configured (collection {collection.id})")
-    row = db_ops.query_model_runtime(config.completion.model_id, collection.user)
-    if not row:
-        raise RuntimeError(f"graphindex: model not found {config.completion.model_id!r} (collection {collection.id})")
-    model, account = row
-
-    # Local import: CompletionService pulls in litellm which is heavy at
-    # import time and we don't want to pay it just for ``import aperag.domains.knowledge_graph.graphindex``.
-    from aperag.llm.completion.completion_service import CompletionService
-    from aperag.llm.runtime.resolver import resolve_model_invocation_from_records
-
-    invocation = resolve_model_invocation_from_records(model=model, account=account)
-    provider = invocation.runner_config.get("provider")
-    if not provider:
-        provider = "openai" if invocation.runner_type == "openai_compatible" else invocation.provider_type
-
-    svc = CompletionService(
-        provider=provider,
-        model=invocation.provider_model_id,
-        base_url=invocation.base_url,
-        api_key=invocation.api_key,
-        temperature=0.0,  # deterministic output for extraction
-        max_tokens=None,
-        caching=False,
-    )
-
-    async def _llm(prompt: str) -> str:
-        # No history, no images, no memory. Single-turn JSON request.
-        return await svc.agenerate(history=[], prompt=prompt, images=[], memory=False)
-
-    return _llm
-
-
 def build_collection_embed_callables(collection: CollectionRow):
     """Build the ``embed_query`` and ``embed_texts`` callables for
     vector-based entity/relation recall.
diff --git a/aperag/domains/knowledge_graph/graphindex/prompts.py b/aperag/domains/knowledge_graph/graphindex/prompts.py
index f86b5a172..84cdaa068 100644
--- a/aperag/domains/knowledge_graph/graphindex/prompts.py
+++ b/aperag/domains/knowledge_graph/graphindex/prompts.py
@@ -35,106 +35,12 @@
 
 from __future__ import annotations
 
-ENTITY_RELATION_EXTRACTION: str = """\
-You are an information-extraction assistant building a knowledge graph.
-
-Read the TEXT below and return a single JSON object with two arrays:
-``entities`` and ``relations``. Do not output anything outside the JSON
-object.
-
-**Rules**
-
-1. Output language: {language}. Names keep their original script and
-   case (e.g. English names capitalized, Chinese names unchanged).
-2. Use only these entity types: {entity_types}. If no provided type
-   fits, skip the entity rather than invent a new type.
-3. Every entity needs a short, self-contained description in
-   {language}. Do not add information that isn't in the text.
-4. Every relation must reference entities by the exact ``name`` you put
-   in the ``entities`` list. No self-loops (source != target).
-5. ``weight`` is an integer 1-10 expressing how strongly the text
-   supports the relation; default to 5 when unsure.
-6. Cap: at most {max_entities} entities and {max_relations} relations.
-   If the text contains more, prefer the most specific and the
-   most-mentioned.
-7. If the text has no extractable entities, return
-   ``{{"entities": [], "relations": []}}``.
-
-**JSON schema**
-
-```
-{{
-  "entities": [
-    {{"name": "<string>", "type": "<one of the allowed types>",
-      "description": "<string>"}}
-  ],
-  "relations": [
-    {{"source": "<entity name>", "target": "<entity name>",
-      "description": "<string>", "weight": <int 1-10>}}
-  ]
-}}
-```
-
-**Example** (English, types=[person, organization]):
-
-Text:
-```
-Alice, a researcher at Acme Labs, collaborated with Bob on the project.
-```
-
-Output:
-```
-{{
-  "entities": [
-    {{"name": "Alice", "type": "person",
-      "description": "A researcher at Acme Labs."}},
-    {{"name": "Bob", "type": "person",
-      "description": "A collaborator on the project."}},
-    {{"name": "Acme Labs", "type": "organization",
-      "description": "Research organization where Alice works."}}
-  ],
-  "relations": [
-    {{"source": "Alice", "target": "Bob",
-      "description": "Alice and Bob collaborated on a project.",
-      "weight": 7}},
-    {{"source": "Alice", "target": "Acme Labs",
-      "description": "Alice is a researcher employed by Acme Labs.",
-      "weight": 8}}
-  ]
-}}
-```
-
----
-
-TEXT:
-```
-{input_text}
-```
-
-Output (JSON only):"""
-
-
-def render_extraction_prompt(
-    *,
-    input_text: str,
-    entity_types: list[str] | tuple[str, ...],
-    language: str,
-    max_entities: int,
-    max_relations: int,
-) -> str:
-    """Fill in the extraction prompt template for one chunk batch.
-
-    Kept as a simple function so tests can snapshot the exact rendered
-    string and catch accidental prompt regressions.
-    """
-    return ENTITY_RELATION_EXTRACTION.format(
-        input_text=input_text,
-        entity_types=", ".join(entity_types),
-        language=language,
-        max_entities=max_entities,
-        max_relations=max_relations,
-    )
-
+# Wave 5 P1 chunk 1 (`aperag/indexing/llm.py` relocate per §K.9.1 item 3):
+# ``ENTITY_RELATION_EXTRACTION`` + ``render_extraction_prompt`` 已 relocate
+# 到 :mod:`aperag.indexing.llm`. 这里 re-export 保持 legacy callers 跨
+# Wave 5 deprecation window 不破。一旦 retrieval/curation Phase 1
+# close-out，整个 legacy module 删除。
+from aperag.indexing.llm import ENTITY_RELATION_EXTRACTION, render_extraction_prompt  # noqa: F401
 
 DESCRIPTION_SUMMARIZATION: str = """\
 You are consolidating a knowledge-graph entry.
diff --git a/aperag/indexing/graph_extractor.py b/aperag/indexing/graph_extractor.py
index de0893eb4..c7bd21dfd 100644
--- a/aperag/indexing/graph_extractor.py
+++ b/aperag/indexing/graph_extractor.py
@@ -99,9 +99,7 @@ def build_collection_graph_extractor(collection: Any) -> GraphExtractor:
     FAILED with the message rather than silently building a worker
     that will fail at dispatch time.
     """
-    from aperag.domains.knowledge_graph.graphindex.integration import (
-        build_collection_llm_callable,
-    )
+    from aperag.indexing.llm import build_collection_llm_callable
     from aperag.indexing.worker_factory import WorkerFactoryError
 
     try:
@@ -179,9 +177,7 @@ async def _extract_one_chunk(
     timeout we propagate :class:`asyncio.TimeoutError` to the caller
     which already logs + skips the chunk.
     """
-    from aperag.domains.knowledge_graph.graphindex.prompts import (
-        render_extraction_prompt,
-    )
+    from aperag.indexing.llm import render_extraction_prompt
 
     prompt = render_extraction_prompt(
         input_text=text,
diff --git a/aperag/indexing/llm.py b/aperag/indexing/llm.py
new file mode 100644
index 000000000..e82cf3add
--- /dev/null
+++ b/aperag/indexing/llm.py
@@ -0,0 +1,233 @@
+# Copyright 2025 ApeCloud, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""LLM helpers for the new indexing pipeline — Wave 5 P1 chunk 1.
+
+Relocates two helpers out of the legacy ``graphindex/`` package so the
+new indexing pipeline (``aperag/indexing/``) does not depend on legacy
+package paths. Per architect msg=87e2b187 chunk 4d Option C ruling +
+Wave 5 task #26 spec — the legacy ``aperag/domains/knowledge_graph/
+graphindex/`` package is being phased out as a coordinated batch in
+Wave 5; this module is the new home for the two LLM-callable + prompt-
+template helpers that the new ``aperag.indexing.graph_extractor``
+already consumes.
+
+Public surface:
+
+* :func:`build_collection_llm_callable` — read the collection's
+  completion config and return an async ``(prompt) -> str`` closure
+  bound to a single :class:`CompletionService` instance. The
+  closure is reusable across many extractor calls for the same
+  collection so we pay the LiteLLM import + HTTP session setup only
+  once per collection per process.
+* :func:`render_extraction_prompt` — fill in the canonical entity /
+  relation extraction prompt template for one chunk batch. Kept as a
+  separate function so tests can snapshot the rendered string and
+  catch accidental prompt regressions.
+
+The legacy modules (``graphindex.integration.build_collection_llm_callable``
+and ``graphindex.prompts.render_extraction_prompt``) re-export from this
+module during the Wave 5 deprecation window so external callers (legacy
+graphindex retrieval / curation paths) keep working until the cross-
+cutting refactor lands the migration. Once those callers move to the
+§G.5 read primitives, the legacy modules can be deleted with no
+behavioural change.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Awaitable, Callable
+
+from aperag.db.ops import db_ops
+from aperag.domains.knowledge_graph.ports import CollectionRow
+from aperag.schema.utils import parseCollectionConfig
+
+logger = logging.getLogger(__name__)
+
+
+LLMCall = Callable[[str], Awaitable[str]]
+"""Async callable returning the model's response to a single prompt.
+The new indexing pipeline's graph extractor consumes this exact shape
+(``aperag.indexing.graph_extractor`` line ~165)."""
+
+
+def build_collection_llm_callable(collection: CollectionRow) -> LLMCall:
+    """Construct the per-collection async LLM callable.
+
+    Reads the collection's completion config (provider, model, base_url,
+    api key from the user's provider record) and returns an async
+    ``(prompt) -> str`` closure. The closure is bound to a single
+    :class:`CompletionService` instance so concurrent extractor calls
+    against the same collection share the underlying client without
+    paying the LiteLLM import + HTTP session setup per chunk.
+
+    Raises ``RuntimeError`` if the collection has no completion model
+    configured or the configured model cannot be resolved against the
+    runtime registry. The :class:`aperag.indexing.graph_extractor`
+    builder catches this and wraps it in
+    :class:`aperag.indexing.worker_factory.WorkerFactoryError` so the
+    orchestrator finalises the row FAILED with operator-facing
+    diagnostics (Wave 3 lesson #10 explicit-fail-not-silent pattern).
+    """
+    config = parseCollectionConfig(collection.config)
+    if not config.completion or not config.completion.model_id:
+        raise RuntimeError(f"indexing.llm: completion model not configured (collection {collection.id})")
+    row = db_ops.query_model_runtime(config.completion.model_id, collection.user)
+    if not row:
+        raise RuntimeError(f"indexing.llm: model not found {config.completion.model_id!r} (collection {collection.id})")
+    model, account = row
+
+    # Local import: CompletionService pulls in litellm which is heavy
+    # at import time and we don't want to pay it just for
+    # ``import aperag.indexing.llm``.
+    from aperag.llm.completion.completion_service import CompletionService
+    from aperag.llm.runtime.resolver import resolve_model_invocation_from_records
+
+    invocation = resolve_model_invocation_from_records(model=model, account=account)
+    provider = invocation.runner_config.get("provider")
+    if not provider:
+        provider = "openai" if invocation.runner_type == "openai_compatible" else invocation.provider_type
+
+    svc = CompletionService(
+        provider=provider,
+        model=invocation.provider_model_id,
+        base_url=invocation.base_url,
+        api_key=invocation.api_key,
+        temperature=0.0,  # deterministic output for extraction
+        max_tokens=None,
+        caching=False,
+    )
+
+    async def _llm(prompt: str) -> str:
+        # No history, no images, no memory. Single-turn JSON request.
+        return await svc.agenerate(history=[], prompt=prompt, images=[], memory=False)
+
+    return _llm
+
+
+# ---------------------------------------------------------------------
+# Prompt templates — relocated from ``graphindex.prompts``.
+# ---------------------------------------------------------------------
+
+
+ENTITY_RELATION_EXTRACTION: str = """\
+You are an information-extraction assistant building a knowledge graph.
+
+Read the TEXT below and return a single JSON object with two arrays:
+``entities`` and ``relations``. Do not output anything outside the JSON
+object.
+
+**Rules**
+
+1. Output language: {language}. Names keep their original script and
+   case (e.g. English names capitalized, Chinese names unchanged).
+2. Use only these entity types: {entity_types}. If no provided type
+   fits, skip the entity rather than invent a new type.
+3. Every entity needs a short, self-contained description in
+   {language}. Do not add information that isn't in the text.
+4. Every relation must reference entities by the exact ``name`` you put
+   in the ``entities`` list. No self-loops (source != target).
+5. ``weight`` is an integer 1-10 expressing how strongly the text
+   supports the relation; default to 5 when unsure.
+6. Cap: at most {max_entities} entities and {max_relations} relations.
+   If the text contains more, prefer the most specific and the
+   most-mentioned.
+7. If the text has no extractable entities, return
+   ``{{"entities": [], "relations": []}}``.
+
+**JSON schema**
+
+```
+{{
+  "entities": [
+    {{"name": "<string>", "type": "<one of the allowed types>",
+      "description": "<string>"}}
+  ],
+  "relations": [
+    {{"source": "<entity name>", "target": "<entity name>",
+      "description": "<string>", "weight": <int 1-10>}}
+  ]
+}}
+```
+
+**Example** (English, types=[person, organization]):
+
+Text:
+```
+Alice, a researcher at Acme Labs, collaborated with Bob on the project.
+```
+
+Output:
+```
+{{
+  "entities": [
+    {{"name": "Alice", "type": "person",
+      "description": "A researcher at Acme Labs."}},
+    {{"name": "Bob", "type": "person",
+      "description": "A collaborator on the project."}},
+    {{"name": "Acme Labs", "type": "organization",
+      "description": "Research organization where Alice works."}}
+  ],
+  "relations": [
+    {{"source": "Alice", "target": "Bob",
+      "description": "Alice and Bob collaborated on a project.",
+      "weight": 7}},
+    {{"source": "Alice", "target": "Acme Labs",
+      "description": "Alice is a researcher employed by Acme Labs.",
+      "weight": 8}}
+  ]
+}}
+```
+
+---
+
+TEXT:
+```
+{input_text}
+```
+
+Output (JSON only):"""
+
+
+def render_extraction_prompt(
+    *,
+    input_text: str,
+    entity_types: list[str] | tuple[str, ...],
+    language: str,
+    max_entities: int,
+    max_relations: int,
+) -> str:
+    """Fill in the extraction prompt template for one chunk.
+
+    Kept as a simple function so tests can snapshot the exact rendered
+    string and catch accidental prompt regressions. The template is
+    identical to the legacy ``graphindex.prompts.ENTITY_RELATION_EXTRACTION``
+    text (relocated here verbatim during Wave 5 P1).
+    """
+    return ENTITY_RELATION_EXTRACTION.format(
+        input_text=input_text,
+        entity_types=", ".join(entity_types),
+        language=language,
+        max_entities=max_entities,
+        max_relations=max_relations,
+    )
+
+
+__all__ = [
+    "ENTITY_RELATION_EXTRACTION",
+    "LLMCall",
+    "build_collection_llm_callable",
+    "render_extraction_prompt",
+]
diff --git a/docs/modularization/indexing-redesign-design-pack.md b/docs/modularization/indexing-redesign-design-pack.md
index 26f94fa83..20d28b20e 100644
--- a/docs/modularization/indexing-redesign-design-pack.md
+++ b/docs/modularization/indexing-redesign-design-pack.md
@@ -1614,6 +1614,150 @@ v2 把 v1 的 7 个细粒度 PR 收成 **3 个 wave**（≈3 个大 PR）。每
 - W5 cleanup builder share helpers with dispatch builders (T2 obs B drift risk)
 - W5 e2e-http-compose lane stub model-provider fixture + Layer 2 full-pipeline test activation (chunk 4e Layer 2 tests are currently `pytest.skip(...)` stubs because vector/fulltext/summary embedders need a configured model-provider that local dev does not have; the e2e-http-compose lane has the scaffolding but the stub fixture must be wired so Layer 2 can run for real instead of skip — per architect msg=87e2b187 chunk 4d/4e ratify decision condition #3)
 
+### K.9. Wave 5 — Indexing program close-out + 16 backlog items（per architect msg=11442bbf reframe + earayu2 msg=eced858d "大PR 快速落地" directive）
+
+**目标**: 把 Wave 4 close-out 后剩余 16 backlog items 一次性 implement，single Wave 5 PR with 5-phase chunked-rotation commits。Wave 4 同款 batch pattern (PR #1731 推 11 items)。
+
+**5-phase commit roadmap (16 acceptance items)**:
+
+#### Phase 1 (sequential first) — Legacy graphindex package 整体淘汰
+
+- delete `aperag/domains/knowledge_graph/graphindex/{storage, service, integration, engine, __init__}.py`（per chunk 4d Option C ruling msg=b26f64b2 + §K.8 deferral）
+- migrate retrieval/curation 调用 → §G.5 read primitives:
+  - `aperag/domains/retrieval/pipeline.py:85` `make_service_for_collection`
+  - `aperag/domains/knowledge_graph/service.py:69+` graph CRUD
+  - `aperag/graph_curation/service.py:37` + `integration.py:21`
+  - `aperag/indexing/worker_factory.py:696` `build_collection_llm_callable`
+- relocate `build_collection_llm_callable` + `render_extraction_prompt` → 新 `aperag/indexing/llm.py`
+- delete tests: `tests/unit_test/graphindex/test_connector.py` / `test_nebula_store.py` / `tests/integration/compat/test_graph_compat.py`
+
+**production-readiness 三类 layer**:
+- must-be-real: §G.5 read primitives 真 wire retrieval/curation flow（not stubs）
+- may-be-gated: 无 (full hard-cut)
+- fully-resolves: §K.8 Wave 5 backlog item 1 (legacy graphindex elimination) + item 3 (`aperag/indexing/llm.py` relocate)
+
+**pre-check pattern** (per `feedback_spec_lock_grep_verify_caller.md` Pattern 1):
+- grep `from aperag.domains.knowledge_graph.graphindex` 全验 caller cascade migration completeness
+- grep-zero verify post-migration: `aperag/indexing/*` 不 import legacy graphindex (chunk 4d invariant maintained)
+
+**architect direct ratify scope**: caller migration completeness verify (post-Phase-1 grep-zero check); `aperag/indexing/llm.py` API surface align Wave 4 import path expectations。
+
+#### Phase 2 (post-Phase-1) — T7 multimodal vision-LLM 3-item bundle
+
+per §G.2.5.1 spec amendment (Wave 4 chunk 4e follow-up `da576f62`):
+
+- **item 1**: `aperag/llm/embed/embedding_service.py` 加 `embed_image(image_bytes: bytes, alt_text: str = "") -> list[float]` API surface; provider implementation 通过 v3 router resolve
+- **item 2**: parser pipeline 加 image extraction → write `derived/parse_<v>/vision/images/<image_id>.<ext>` (per-page image bytes for PDF / single-image-input passthrough); 同 batch 实施 chunk_id schema unification (per huangheng T1 obs B)
+- **item 3**: `aperag/domains/model_platform/api/providers_v3_routes.py` 加 multimodal capability flag (`is_multimodal=True` source of truth); operator can set on collection embedder spec
+- chunk 4b vision gate **self-disables** when `is_multimodal()` flips True + provider flag set
+
+**production-readiness 三类**:
+- must-be-real: real multimodal LLM provider integration (CLIP / LLaVA / GPT-4V / etc. via v3 router)
+- may-be-gated: 无 (full implementation lands)
+- fully-resolves: §K.8 Wave 5 backlog item 4 (T7 multimodal full impl 3-item bundle)
+
+**pre-check** (per Pattern 2): grep `embed_image` / `is_multimodal` 全 verify 现 code binding (chunk 4b gate `is_multimodal()` call site already exists)
+
+**architect direct ratify scope**: §G.2.5.1 spec ↔ implementation align (3-item bundle full impl) + chunk 4b vision gate self-disable verify (`is_multimodal=True` 实测 + Layer 1 test rename `test_phase1_vision_modality_*` 改 expect ACTIVE)
+
+#### Phase 3 (parallel with Phase 2) — Layer 2 e2e fixture wiring + sweep D activation
+
+per chunk 4d+4e ratify msg=c279a0ff + sweep D Layer 2 stub (`4d36c7fb`):
+
+- e2e-http-compose lane stub model-provider fixture wire
+- activate `test_phase1_full_pipeline_vector_fulltext_summary_active_graph_vision_failed` Layer 2 stub
+- activate `test_phase1_multi_keyword_fulltext_search_returns_hits` Layer 2 stub (sweep D minimum_should_match real verify)
+
+**production-readiness 三类**:
+- must-be-real: real Postgres / Redis / Qdrant / ES / OTel SDK + stub model-provider fixture
+- may-be-gated: 无 (Layer 2 真激活)
+- fully-resolves: §K.8 Wave 5 backlog item 2 (Layer 2 e2e fixture wiring)
+
+**architect direct ratify scope**: Layer 2 fixture wiring canonical contract (vector+fulltext+summary ACTIVE + chunk 4b vision gate FAILED in Phase 2 deferred state until Phase 2 lands flips to ACTIVE) + delete+cleanup roundtrip backend artefact 0-count verify
+
+#### Phase 4 (parallel with Phase 1) — P1 production robustness 3-pack
+
+3 commits in same phase batch:
+
+1. **T2 cleanup `_resolve_cleanup_worker` transient-vs-intentional error split** (per T2 architect ratify msg=6aa8ca88):
+   - `WorkerFactoryError` (intentional gate, drop row) vs 其他 `Exception` (transient, NOT drop row, retry next cycle)
+2. **T3 chunk 2 parse_orchestrator parse_version short-circuit** (per huangheng T3 chunk 2 obs B):
+   - `if derived/parse_<v>/chunks.jsonl 已存在 → skip DocParser，直接 dispatch`
+3. **T2 reconciler "document N min 无 document_index rows → re-enqueue parse"** (per huangheng T3 chunk 2 obs A):
+   - cleanup loop 加 stuck document detection + parse re-enqueue
+
+**production-readiness 三类**:
+- must-be-real: 3 production fault-tolerance fixes (real e2e flow benefit)
+- may-be-gated: 无
+- fully-resolves: §K.8 Wave 5 backlog items 7 + 9 + 10
+
+#### Phase 5A (post-Phase-4, Bryce batch) — P2 batch graph + perf polish
+
+5 commits in same phase batch (Bryce ownership per Wave 4 chunk 4 / T1 / T8 expertise):
+
+1. **T1 graph extractor per-collection config tunability** (per huangheng T1 obs A): `collection.config.knowledge_graph_config.{per_chunk_timeout, max_entities_per_chunk, max_relations_per_chunk}` per-collection override
+2. **chunk 4b `_no_op_extractor` identity check → attribute marker** (per huangheng chunk 4b obs B): `getattr(extractor, 'is_no_op', False)` more robust pattern
+3. **W5-perf-graph-lineage parallel-list O(N) alternative encoding** for high-cardinality entities (>10k docs/entity, per architect msg=39a74026)
+4. **W5-neo4j-label-namespace prefix `aperag_LineageEntity` / `aperag_LineageRelation`** to avoid user-namespace collision
+5. **W5-cypher-type-keyword rename `n.type` property** (Cypher `TYPE()` keyword shadow); cross-backend rename also in Postgres/Nebula
+
+**fully-resolves**: §K.8 Wave 5 backlog items 5 + 12 (其中 12 fold 进 PR-A T7 multimodal 实施 if applicable)
+
+#### Phase 5B (post-Phase-4, chenyexuan batch) — P2 batch infra + cleanup polish
+
+5 commits in same phase batch (chenyexuan ownership per Wave 4 T2 / T3 / T9 expertise):
+
+1. **T1 chunk_id schema unification on parser layer** (per huangheng T1 obs B; partially overlap with Phase 2 item 2)
+2. **T2 cleanup builder share helper with dispatch builders** (per huangheng T2 obs B drift risk refactor)
+3. **T3 chunk 2 `tenant_scope_key` org-prefix forward-compat** (per T3 chunk 2 obs C)
+4. **chunk 4a `utc_now` Python default vs `CURRENT_TIMESTAMP` server_default unification** (per huangheng chunk 4a obs A)
+5. **W5-otlp-config-cross-check** lifespan startup cross-check `INDEXING_METRICS_EMITTER=otlp` ⇔ `APERAG_OBSERVABILITY_MODE=otlp` (per huangheng T6 chunk 1 obs)
+
+**fully-resolves**: §K.8 Wave 5 backlog items 6 + 8 + 11 + remaining accumulated obs
+
+#### K.9.1. Wave 5 acceptance summary (16 items)
+
+| # | Item | Phase | Owner |
+|---|---|---|---|
+| 1 | Legacy graphindex package 整体淘汰 | 1 | Bryce |
+| 2 | Layer 2 e2e fixture wiring | 3 | chenyexuan |
+| 3 | `aperag/indexing/llm.py` relocate | 1 | Bryce |
+| 4 | T7 multimodal vision-LLM 3-item bundle | 2 | Bryce |
+| 5 | T1 graph extractor per-collection config tunability | 5A | Bryce |
+| 6 | T1 chunk_id schema unification | 2 / 5B | Bryce / chenyexuan |
+| 7 | T2 cleanup transient-vs-intentional error split | 4 | chenyexuan |
+| 8 | T2 cleanup builder share helper | 5B | chenyexuan |
+| 9 | T2 reconciler stuck-document re-enqueue | 4 | chenyexuan |
+| 10 | T3 chunk 2 parse_version short-circuit | 4 | chenyexuan |
+| 11 | T3 chunk 2 tenant_scope_key org-prefix | 5B | chenyexuan |
+| 12 | fulltext additional backends extension | 5A | Bryce |
+| 13 | chunk 4a utc_now vs CURRENT_TIMESTAMP unify | 5B | chenyexuan |
+| 14 | chunk 4b _no_op_extractor identity check robust | 5A | Bryce |
+| 15 | W5-perf-graph-lineage parallel-list O(N) | 5A | Bryce |
+| 16 | W5-otlp-config-cross-check + Neo4j label namespace + Cypher type rename | 5A / 5B | Bryce / chenyexuan |
+
+#### K.9.2. Wave 5 architect direct ratify lane
+
+per Wave 4 lane lock (chunk 4 / T1 / T5 / T7 ratify trail), Wave 5 architect direct ratify scope:
+
+- **Phase 1**: legacy graphindex elimination caller migration completeness verify (grep-zero post-Phase-1 + `aperag/indexing/llm.py` relocate clean)
+- **Phase 2**: T7 §G.2.5.1 spec ↔ implementation align (3-item bundle full impl) + chunk 4b vision gate self-disable verify
+- **Phase 3**: Layer 2 e2e fixture wiring canonical activation
+- **Phase 4 / 5A / 5B**: 走 huangheng pass-1 lane only; architect spot-check on architectural soundness。如出现 spec/state-binding drift → architect direct ratify trigger (per Wave 4 T5 §H.5.1 fix-forward 教训)
+
+#### K.9.3. Pre-check pattern lock (per `feedback_spec_lock_grep_verify_caller.md` 双 pattern)
+
+- **Pattern 1** (caller cascade): Phase 1 必跑 — grep `from aperag.domains.knowledge_graph.graphindex` 全 caller list verify migration scope
+- **Pattern 2** (state binding): 任何新 `collection.config.*` field / `settings.*` setting / logical db / port / path lock — 必跑 grep `from settings import` / `getattr(cfg,...)` 现 code binding verify
+
+#### K.9.4. Layer 1 same-session continuation directive (per `feedback_no_refresh_complete_all_tasks.md`)
+
+Wave 5 implementation 同 Wave 4 directive:
+- earayu2 不再 帮 fresh-restart agent session
+- 各 implementer same-session continuation push 节奏 maintained
+- Layer 1 metric goal: ≥ 90% same-session ratio (Wave 4 verified 65% same-session + 100% pass-1 一次过率)
+- chunked-rotation within PR (multiple commits in same branch / same session)
+
 ### K.7. 测试策略
 
 - **Unit tests**: 每个 modality 配幂等自测（Wave 1 gate）
diff --git a/tests/integration/test_graph_extractor.py b/tests/integration/test_graph_extractor.py
index 69a017a1c..536cb32ea 100644
--- a/tests/integration/test_graph_extractor.py
+++ b/tests/integration/test_graph_extractor.py
@@ -213,7 +213,7 @@ async def _flaky_llm(_prompt: str) -> str:
 
     # Stub the legacy integration so the extractor builder doesn't try
     # to look up a real model provider.
-    import aperag.domains.knowledge_graph.graphindex.integration as _integration
+    import aperag.indexing.llm as _integration
 
     monkeypatch.setattr(
         _integration,
@@ -242,7 +242,7 @@ def test_extractor_builder_raises_when_completion_model_missing(monkeypatch: pyt
     builder wraps the failure in :class:`WorkerFactoryError` so the
     orchestrator can finalise the row FAILED with a clear message."""
 
-    import aperag.domains.knowledge_graph.graphindex.integration as _integration
+    import aperag.indexing.llm as _integration
 
     def _no_llm(_coll):
         raise ValueError("no completion model configured for collection col-x")

From 07d51cc41a5dd51017a7383773848b668106b97e Mon Sep 17 00:00:00 2001
From: Bryce <bryce@apecloud.com>
Date: Mon, 27 Apr 2026 18:09:41 +0800
Subject: [PATCH 02/13] refactor(celery Wave 5 P1 chunk 2): relocate
 ENTITY_RELATION_EXTRACTION import to aperag/indexing/llm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wave 5 task #26 chunk 2 per §K.9.1 acceptance item 3 cascade
(legacy graphindex caller migration):

`aperag/service/prompt_template_service.py:155-163` (the
`get_default_prompt(prompt_type="graph")` branch) was importing the
ENTITY_RELATION_EXTRACTION template from the legacy
`aperag.domains.knowledge_graph.graphindex.prompts` module. Per chunk 1
relocate (`11113acb`), the canonical home for the template is now
`aperag.indexing.llm`. This commit migrates the import site so the
caller no longer transitively touches the legacy module.

Behavior unchanged — the legacy `graphindex/prompts.py` shim already
re-exports the template from the new location, so this is a pure
import-path refactor that leaves the runtime payload identical.

Local gates:
  - `ruff check ./aperag ./tests` clean
  - `ruff format --check ./aperag ./tests` clean (509 files)

Phase 1 cascade progress:
- chunk 1 ✅ (`11113acb`): §K.9 spec + `aperag/indexing/llm.py` relocate
- **chunk 2** (this commit): `service/prompt_template_service.py` import relocate
- chunk 3+ pending: `retrieval/pipeline.py` / `knowledge_graph/service.py` /
  `graph_curation/*` migration (these need new design — legacy
  24-method `GraphStore` Protocol → new 10-method `LineageGraphStore`
  Protocol API surface is NOT a 1-to-1 replacement; per Bryce
  msg=30c7e994 scope reality check + architect msg=b052b1b4 cascade
  ratify lane lock)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 aperag/service/prompt_template_service.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/aperag/service/prompt_template_service.py b/aperag/service/prompt_template_service.py
index 6de3ab0bc..af380bbae 100644
--- a/aperag/service/prompt_template_service.py
+++ b/aperag/service/prompt_template_service.py
@@ -153,12 +153,13 @@ def get_hardcoded_index_prompt(prompt_type: str) -> Optional[str]:
         Hardcoded prompt content, or None if not available
     """
     if prompt_type == "graph":
-        # Return the graphindex v2 extraction prompt template. Unlike
-        # LightRAG's parameterised ``entity_extraction``, this one
-        # expects ``{input_text}`` / ``{entity_types}`` / ``{language}``
-        # / ``{max_entities}`` / ``{max_relations}`` to be filled by the
-        # caller (see ``aperag.domains.knowledge_graph.graphindex.prompts.render_extraction_prompt``).
-        from aperag.domains.knowledge_graph.graphindex.prompts import ENTITY_RELATION_EXTRACTION
+        # Return the canonical entity/relation extraction prompt template.
+        # Wave 5 P1 chunk 1 relocate (per §K.9.1 item 3): the canonical
+        # home is now :mod:`aperag.indexing.llm`. The template expects
+        # ``{input_text}`` / ``{entity_types}`` / ``{language}`` /
+        # ``{max_entities}`` / ``{max_relations}`` to be filled by the
+        # caller (see :func:`aperag.indexing.llm.render_extraction_prompt`).
+        from aperag.indexing.llm import ENTITY_RELATION_EXTRACTION
 
         return ENTITY_RELATION_EXTRACTION
     elif prompt_type == "summary":

From 0fd01e8356626bb1e07f8a04fae8cd88f6f289c8 Mon Sep 17 00:00:00 2001
From: earayu <earayu@163.com>
Date: Mon, 27 Apr 2026 18:08:40 +0800
Subject: [PATCH 03/13] =?UTF-8?q?feat(celery=20Wave=205=20P4):=20productio?=
 =?UTF-8?q?n=20robustness=203-pack=20=E2=80=94=20cleanup=20transient/inten?=
 =?UTF-8?q?tional=20split=20+=20parse=5Fversion=20short-circuit=20+=20reco?=
 =?UTF-8?q?nciler=20stuck-parse=20re-enqueue?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wave 5 Phase 4 (PR-C / task #29) — closes 3 latent issues surfaced
through Wave 4 ratify trail:

* **P4-1 cleanup transient-vs-intentional split**
  (per architect msg=6aa8ca88 T2 ratify minor obs A):
  Pre-Wave-5 ``_resolve_cleanup_worker`` collapsed any factory
  exception into "drop the DB row". Transient infra errors
  (Qdrant blip, ES network glitch) lost the retry signal — DB row
  was deleted before next cycle could retry. Wave 5 P4 returns a
  new ``CleanupWorkerResolution(worker, transient)`` distinguishing
  the two: ``WorkerFactoryError`` is a by-design gate (drop the
  row to bound index growth), any other Exception is transient
  (skip DB row drop so next cycle retries when backend recovers).
  Counts surface ``transient_deferred`` so operators can track
  recovery rate.

* **P4-2 parse_version short-circuit**
  (per huangheng T3 chunk 2 obs B + Wave 5 backlog item):
  Pre-Wave-5 ``parse_document`` always re-runs DocParser even when
  the resulting artifact directory is byte-identical (parse_version
  is content-derived). Wave 5 P4 adds ``short_circuit_if_artifacts_exist=True``
  default — if all three derived artifacts (markdown.md /
  outline.json / chunks.jsonl) already exist under the canonical
  ``derived/parse_<version>/`` path, skip DocParser + writes
  entirely. Eliminates the ~30s OCR / Word rerun cost on rebuild
  of unchanged content. Tests can pass ``False`` to force re-parse.

* **P4-3 reconciler stuck-document parse re-enqueue**
  (per architect Wave 4 T3 chunk 2 obs A — production gap close):
  Pre-Wave-5 parse failures (DocParser raise / source missing)
  silently dropped the document_id in the parse worker; operator
  saw ``document.status == PENDING`` forever with no signal to
  re-trigger. Wave 5 P4 adds
  ``reconcile_stuck_documents_for_parse_reenqueue`` to the
  reconciler loop. Detects documents with
  ``Document.gmt_created < now - cooldown_seconds`` AND zero
  ``document_index`` rows AND ``Document.gmt_updated < now -
  cooldown_seconds`` (cooldown filter prevents 30-s tick storm),
  then pushes a fresh ``ParseDispatchPayload`` matching the upload
  handler's contract. ``gmt_updated`` bumps after each push so
  the cooldown predicate rate-limits re-enqueue.

Three-class tag (Wave 3 production-readiness invariant):
* must-be-real: cleanup transient/intentional split prevents
  silent retry-signal loss; parse_version short-circuit eliminates
  OCR rerun waste; stuck-parse reconciler closes the document.status
  PENDING-forever gap
* may-be-gated: ``short_circuit_if_artifacts_exist=False`` for
  tests pinning DocParser invocation count
* fully-resolves: 3 of Wave 5 P1 backlog items (per architect
  ratify trail msg=6aa8ca88 + huangheng obs trail)

Deltas:
* ``aperag/indexing/cleanup.py`` — ``CleanupWorkerResolution``
  dataclass + WorkerFactoryError-vs-Exception split in
  ``_resolve_cleanup_worker``; both ``cleanup_orphan_parse_versions``
  + ``cleanup_for_deleted_documents`` honor ``resolution.transient``
  to skip DB row drop on transient infra failures; collection
  cascade aggregates ``transient_deferred``.
* ``aperag/indexing/parser.py`` — ``_all_artifacts_present``
  predicate + ``short_circuit_if_artifacts_exist`` parameter +
  early-return ParseResult when all 3 artifacts present.
* ``aperag/indexing/reconciler.py`` — ``STUCK_PARSE_COOLDOWN_SECONDS``
  + ``reconcile_stuck_documents_for_parse_reenqueue`` async scan +
  ``_select_stuck_documents_for_reenqueue`` SQL query +
  ``_build_parse_payload_for_document`` payload reconstruction +
  ``_resolve_collection_parser_config`` / ``_resolve_collection_modalities``
  (mirror ``document_service`` shape) + ``_mark_stuck_documents_reenqueued``
  bumps gmt_updated; ``run_reconcile_loop`` calls the new scan.
* ``aperag/indexing/__init__.py`` — re-export new symbols.
* ``tests/integration/test_p4_robustness_3pack.py`` (new) — 13
  tests across 3 layers (cleanup transient/intentional split / parse
  short-circuit / reconciler re-enqueue with cooldown + payload
  shape + skip-when-indexed + skip-when-no-object-path).
* ``tests/unit_test/indexing/test_t3_1_dispatcher_path_c.py`` —
  added ``transient_deferred: 0`` to expected empty-counts dict.

Local gates:
* ``pytest tests/unit_test/indexing/ tests/integration/`` — 232
  passed / 48 skipped (incl. 13 new ``test_p4_robustness_3pack``).
* ``ruff check aperag/ tests/integration/test_p4_robustness_3pack.py``
  — clean.
* ``ruff format`` — applied.

Branch: local ``chenyexuan/celery-wave5-p4`` based on main
``19d3d70`` (Wave 4 PR #1731 squash); will rebase onto
``bryce/celery-wave5`` once Bryce opens the Wave 5 draft PR.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 aperag/indexing/__init__.py                   |   6 +-
 aperag/indexing/cleanup.py                    | 116 +++-
 aperag/indexing/parser.py                     |  98 ++-
 aperag/indexing/reconciler.py                 | 265 +++++++-
 tests/integration/test_p4_robustness_3pack.py | 577 ++++++++++++++++++
 .../indexing/test_t3_1_dispatcher_path_c.py   |   1 +
 6 files changed, 1014 insertions(+), 49 deletions(-)
 create mode 100644 tests/integration/test_p4_robustness_3pack.py

diff --git a/aperag/indexing/__init__.py b/aperag/indexing/__init__.py
index 25526f05c..6e36952e9 100644
--- a/aperag/indexing/__init__.py
+++ b/aperag/indexing/__init__.py
@@ -158,9 +158,11 @@
     HEARTBEAT_STALE_SECONDS,
     RECONCILE_BATCH_SIZE,
     RECONCILE_INTERVAL_SECONDS,
+    STUCK_PARSE_COOLDOWN_SECONDS,
     reconcile_failed_retry,
     reconcile_pending_dispatch,
     reconcile_running_reclaim,
+    reconcile_stuck_documents_for_parse_reenqueue,
     run_reconcile_loop,
 )
 from aperag.indexing.summary import (
@@ -276,13 +278,15 @@
     "process_one_parse_task",
     "run_parse_worker",
     "run_parse_worker_loop",
-    # Reconciler (T2.1)
+    # Reconciler (T2.1 + Wave 5 P4 stuck-parse re-enqueue)
     "RECONCILE_INTERVAL_SECONDS",
     "RECONCILE_BATCH_SIZE",
     "HEARTBEAT_STALE_SECONDS",
+    "STUCK_PARSE_COOLDOWN_SECONDS",
     "reconcile_pending_dispatch",
     "reconcile_failed_retry",
     "reconcile_running_reclaim",
+    "reconcile_stuck_documents_for_parse_reenqueue",
     "run_reconcile_loop",
     # Cleanup (T2.1 + T3.1 path C)
     "CLEANUP_INTERVAL_SECONDS",
diff --git a/aperag/indexing/cleanup.py b/aperag/indexing/cleanup.py
index 87ab2c588..59b1f1216 100644
--- a/aperag/indexing/cleanup.py
+++ b/aperag/indexing/cleanup.py
@@ -84,6 +84,7 @@
 
 import asyncio
 import logging
+from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
 from typing import Any, Awaitable, Callable, Mapping, Optional
 
@@ -92,6 +93,7 @@
 
 from aperag.indexing.base import ModalityWorker
 from aperag.indexing.models import DocumentIndex, Modality
+from aperag.indexing.worker_factory import WorkerFactoryError
 
 logger = logging.getLogger(__name__)
 
@@ -164,45 +166,92 @@ def _is_graph_worker(worker: ModalityWorker) -> bool:
     return getattr(worker, "modality", None) is Modality.GRAPH
 
 
+@dataclass(frozen=True)
+class CleanupWorkerResolution:
+    """Wave 5 P4 T2 — outcome of :func:`_resolve_cleanup_worker`.
+
+    Distinguishes the two failure modes that pre-Wave-5 collapsed into
+    a single ``None`` return:
+
+    * **intentional gate** (``worker is None`` AND ``transient is False``)
+      — :class:`WorkerFactoryError` raised because the modality is
+      Wave-N-gated by design (graph extractor not wired / vision
+      multimodal not configured), the operator deliberately disabled
+      the modality via ``collection.config``, or the row's modality
+      string is unknown. The cleanup loop should drop the DB row so
+      the index does not grow unboundedly while the gate is active.
+
+    * **transient infrastructure error** (``worker is None`` AND
+      ``transient is True``) — DB connection blip / Qdrant
+      unreachable / ES unhealthy / Redis network glitch. The cleanup
+      loop must NOT drop the DB row — the next cycle (5 min later)
+      retries automatically once the backend recovers. Pre-Wave-5
+      this collapsed into the gate path and silently lost the retry
+      signal.
+
+    * **resolved worker** (``worker is not None``, ``transient ignored``)
+      — happy path; caller proceeds with backend cleanup.
+    """
+
+    worker: Optional[ModalityWorker]
+    transient: bool
+
+
 async def _resolve_cleanup_worker(
     *,
     workers: Optional[Mapping[Modality, ModalityWorker]],
     worker_factory: Optional[WorkerFactoryForRow],
     row: DocumentIndex,
-) -> Optional[ModalityWorker]:
+) -> CleanupWorkerResolution:
     """Resolve the cleanup worker for a row from factory or static map.
 
-    Wave 4 T2: production wires the factory (per-(collection, modality)
-    lazy materialisation); tests typically pass a pre-built ``workers``
-    mapping. When both are provided the factory wins — production
-    deployments override the legacy mapping with the per-row factory.
-
-    Returns ``None`` when neither source can supply a worker; the
-    caller logs + counts the row as ``backend_skipped``. A factory
-    that raises :class:`WorkerFactoryError` (Wave 4 vision multimodal
-    gate / partial config) also returns ``None`` so the cleanup cycle
-    drops the DB row even when the backend is unreachable, matching
-    the existing "skip backend, drop row" semantics.
+    Wave 4 T2 + Wave 5 P4 (transient-vs-intentional split): production
+    wires the factory (per-(collection, modality) lazy materialisation);
+    tests typically pass a pre-built ``workers`` mapping. When both
+    are provided the factory wins — production deployments override
+    the legacy mapping with the per-row factory.
+
+    Returns a :class:`CleanupWorkerResolution`:
+
+    * ``worker is not None``: backend cleanup proceeds.
+    * ``worker is None, transient=False``: intentional gate / unknown
+      modality / no source — caller drops DB row.
+    * ``worker is None, transient=True``: transient infrastructure
+      error — caller skips DB row drop so next cycle retries.
+
+    Pre-Wave-5 this returned ``None`` for both failure modes,
+    causing transient errors to silently lose their retry signal.
     """
     if worker_factory is not None:
         try:
-            return await worker_factory(row)
-        except Exception as exc:  # noqa: BLE001 — surface via log, count as skip
+            return CleanupWorkerResolution(worker=await worker_factory(row), transient=False)
+        except WorkerFactoryError as exc:
             logger.warning(
-                "cleanup worker_factory failed modality=%s row id=%d collection=%s: %s — counting as backend_skipped",
+                "cleanup worker_factory gate raised modality=%s row id=%d collection=%s: %s — "
+                "counting as backend_skipped (intentional gate, dropping DB row)",
                 row.modality,
                 row.id,
                 row.collection_id,
                 exc,
             )
-            return None
+            return CleanupWorkerResolution(worker=None, transient=False)
+        except Exception as exc:  # noqa: BLE001 — transient infra error, retry next cycle
+            logger.warning(
+                "cleanup worker_factory transient failure modality=%s row id=%d collection=%s: %s — "
+                "skipping DB row drop, will retry next cycle",
+                row.modality,
+                row.id,
+                row.collection_id,
+                exc,
+            )
+            return CleanupWorkerResolution(worker=None, transient=True)
     if workers is None:
-        return None
+        return CleanupWorkerResolution(worker=None, transient=False)
     try:
         modality = Modality(row.modality)
     except ValueError:
-        return None
-    return workers.get(modality)
+        return CleanupWorkerResolution(worker=None, transient=False)
+    return CleanupWorkerResolution(worker=workers.get(modality), transient=False)
 
 
 # ---------------------------------------------------------------------
@@ -301,6 +350,7 @@ async def cleanup_orphan_parse_versions(
         "rows_deleted": 0,
         "graph_noop": 0,
         "backend_skipped": 0,
+        "transient_deferred": 0,
     }
 
     delete_ids: list[int] = []
@@ -315,11 +365,18 @@ async def cleanup_orphan_parse_versions(
             )
             continue
 
-        worker = await _resolve_cleanup_worker(
+        resolution = await _resolve_cleanup_worker(
             workers=workers,
             worker_factory=worker_factory,
             row=row,
         )
+        if resolution.transient:
+            # Wave 5 P4: transient infra error — skip both backend
+            # delete AND DB row drop so the next cleanup cycle (5 min
+            # later) retries automatically once the backend recovers.
+            counts["transient_deferred"] += 1
+            continue
+        worker = resolution.worker
         if worker is None:
             logger.warning(
                 "cleanup no worker registered for modality=%s row id=%d — skipping backend delete",
@@ -424,6 +481,7 @@ async def cleanup_for_deleted_documents(
         "graph_lineage_cleaned": 0,
         "rows_deleted": 0,
         "backend_skipped": 0,
+        "transient_deferred": 0,
     }
 
     # Per-document, per-modality dedup so graph lineage cleanup runs
@@ -444,11 +502,18 @@ async def cleanup_for_deleted_documents(
             )
             continue
 
-        worker = await _resolve_cleanup_worker(
+        resolution = await _resolve_cleanup_worker(
             workers=workers,
             worker_factory=worker_factory,
             row=row,
         )
+        if resolution.transient:
+            # Wave 5 P4: transient infra error — skip backend delete
+            # AND DB row drop so the caller can retry on a later cycle
+            # / re-invocation once the backend recovers.
+            counts["transient_deferred"] += 1
+            continue
+        worker = resolution.worker
         if worker is None:
             logger.warning(
                 "cleanup no worker for modality=%s row id=%d document=%s — skipping backend",
@@ -623,6 +688,7 @@ async def cleanup_for_deleted_collections(
         "graph_lineage_cleaned": 0,
         "rows_deleted": 0,
         "backend_skipped": 0,
+        "transient_deferred": 0,
         "collections_cleaned": 0,
     }
     if not collection_ids:
@@ -641,7 +707,13 @@ async def cleanup_for_deleted_collections(
             worker_factory=worker_factory,
             document_ids=document_ids,
         )
-        for key in ("backend_deleted", "graph_lineage_cleaned", "rows_deleted", "backend_skipped"):
+        for key in (
+            "backend_deleted",
+            "graph_lineage_cleaned",
+            "rows_deleted",
+            "backend_skipped",
+            "transient_deferred",
+        ):
             counts[key] += sub_counts[key]
 
     # Sweep any rows that path B did not catch (no document_id match
diff --git a/aperag/indexing/parser.py b/aperag/indexing/parser.py
index ddfdee052..f62d8edac 100644
--- a/aperag/indexing/parser.py
+++ b/aperag/indexing/parser.py
@@ -329,6 +329,29 @@ def _document_md5(source_bytes: bytes) -> str:
     return hashlib.md5(source_bytes).hexdigest()
 
 
+def _all_artifacts_present(
+    *,
+    store: _SyncObjectStore,
+    markdown_path: str,
+    outline_path: str,
+    chunks_path: str,
+) -> bool:
+    """Wave 5 P4 short-circuit predicate: all three canonical
+    derived artifacts must exist for the cached parse to be valid.
+
+    Uses ``ObjectStore.obj_exists`` (cheap metadata check) rather
+    than ``read_or_none`` so the predicate stays cost-bounded for
+    every parse call. ``chunks.jsonl`` is checked last because it is
+    the only artifact downstream modality workers actually read; if
+    it is missing the previous parse was interrupted mid-write and
+    re-parsing is required regardless.
+    """
+    try:
+        return store.obj_exists(markdown_path) and store.obj_exists(outline_path) and store.obj_exists(chunks_path)
+    except Exception:  # noqa: BLE001 — predicate fails closed (re-parse)
+        return False
+
+
 def _docparser_extract_markdown(
     *,
     source_bytes: bytes,
@@ -398,6 +421,7 @@ def parse_document(
     source_filename: str | None = None,
     parser_config: dict[str, Any] | None = None,
     config: ParseConfig | None = None,
+    short_circuit_if_artifacts_exist: bool = True,
 ) -> ParseResult:
     """Parse the source bytes and persist the three shared artifacts.
 
@@ -406,6 +430,16 @@ def parse_document(
     them atomically). The artifact paths are the canonical
     ``derived/parse_<version>/`` layout per design pack §C.1.
 
+    Wave 5 P4 short-circuit: when ``short_circuit_if_artifacts_exist``
+    is True (default) and all three derived artifacts (``markdown.md``
+    / ``outline.json`` / ``chunks.jsonl``) already exist in the object
+    store under the canonical ``derived/parse_<version>/`` path, the
+    parser **skips DocParser + writes entirely** and returns the
+    existing :class:`ParseResult`. This eliminates the ~30s OCR / Word
+    rerun cost when a document is re-uploaded with identical content
+    or a rebuild is dispatched against an already-parsed version
+    (per huangheng T3 chunk 2 obs B + architect Wave 5 P4 lock).
+
     Dispatch (Wave 4 T3 chunk 1):
     - ``source_filename`` ends in a known text extension (``.md`` /
       ``.markdown`` / ``.txt`` / ``.text``) **or** is ``None`` →
@@ -433,6 +467,12 @@ def parse_document(
             to the real parser chain. Ignored on the simulator path.
         config: Parsing knobs that influence the parse_version. Pass
             ``None`` to use simulator defaults.
+        short_circuit_if_artifacts_exist: When True (default), reuse
+            existing canonical artifacts if all three are already in
+            the object store under the resolved
+            ``derived/parse_<version>/`` path. Pass ``False`` to force
+            a re-parse + re-write (used by tests pinning DocParser
+            invocation count).
     """
     from aperag.mcp.tools.parse_version import compute_parse_version
 
@@ -446,6 +486,45 @@ def parse_document(
         chunking_config=cfg.chunking.serialize(),
     )
 
+    markdown_path = derived_artifact(
+        collection_id=collection_id,
+        document_id=document_id,
+        parse_version=parse_version,
+        filename="markdown.md",
+    )
+    outline_path = derived_artifact(
+        collection_id=collection_id,
+        document_id=document_id,
+        parse_version=parse_version,
+        filename="outline.json",
+    )
+    chunks_path = derived_artifact(
+        collection_id=collection_id,
+        document_id=document_id,
+        parse_version=parse_version,
+        filename="chunks.jsonl",
+    )
+
+    if short_circuit_if_artifacts_exist and _all_artifacts_present(
+        store=store,
+        markdown_path=markdown_path,
+        outline_path=outline_path,
+        chunks_path=chunks_path,
+    ):
+        logger.info(
+            "indexing parser short-circuit collection=%s document=%s parse_version=%s "
+            "(all derived artifacts already present; skipping DocParser + writes)",
+            collection_id,
+            document_id,
+            parse_version,
+        )
+        return ParseResult(
+            parse_version=parse_version,
+            markdown_path=markdown_path,
+            outline_path=outline_path,
+            chunks_path=chunks_path,
+        )
+
     if extension is None or extension in _SIMULATOR_EXTENSIONS:
         try:
             markdown = source_bytes.decode("utf-8")
@@ -470,25 +549,6 @@ def parse_document(
     outline = _build_outline(markdown)
     chunks = _split_chunks(markdown, cfg.chunking)
 
-    markdown_path = derived_artifact(
-        collection_id=collection_id,
-        document_id=document_id,
-        parse_version=parse_version,
-        filename="markdown.md",
-    )
-    outline_path = derived_artifact(
-        collection_id=collection_id,
-        document_id=document_id,
-        parse_version=parse_version,
-        filename="outline.json",
-    )
-    chunks_path = derived_artifact(
-        collection_id=collection_id,
-        document_id=document_id,
-        parse_version=parse_version,
-        filename="chunks.jsonl",
-    )
-
     write_atomic(store, markdown_path, markdown.encode("utf-8"))
     write_atomic(
         store,
diff --git a/aperag/indexing/reconciler.py b/aperag/indexing/reconciler.py
index affa1aebd..997abc8f0 100644
--- a/aperag/indexing/reconciler.py
+++ b/aperag/indexing/reconciler.py
@@ -80,6 +80,14 @@
 # cycle will pick up the rest.
 RECONCILE_BATCH_SIZE = 100
 
+# Wave 5 P4: cooldown before a document with zero ``document_index``
+# rows is considered "stuck after upload" and re-enqueued onto
+# ``q:parse``. Set to 5 minutes so a normal parse run (PDF ~30s,
+# Office ~60s, OCR ~3min) never trips it. Operators can shorten the
+# cooldown via ``stuck_parse_cooldown_seconds`` for tighter recovery
+# on small-document workloads.
+STUCK_PARSE_COOLDOWN_SECONDS = 300
+
 
 def _utcnow() -> datetime:
     return datetime.now(timezone.utc)
@@ -255,11 +263,6 @@ def reconcile_running_reclaim(
 # canonical 3-statement TX.
 
 
-# ---------------------------------------------------------------------
-# Run loop — production entrypoint.
-# ---------------------------------------------------------------------
-
-
 # ---------------------------------------------------------------------
 # Pattern B periodic hook — collection summary reconciliation.
 # ---------------------------------------------------------------------
@@ -417,6 +420,247 @@ def _reclaim_stale_and_claim_pending() -> list[tuple[str, str, int, str]]:
         logger.info("collection-summary reconciler dispatched=%d", len(dispatches))
 
 
+# ---------------------------------------------------------------------
+# Wave 5 P4 — stuck-document parse re-enqueue (closes T3 chunk 2 obs A
+# silent-drop loop).
+# ---------------------------------------------------------------------
+#
+# T3 chunk 2 q:parse async promotion logs + drops parse failures
+# (DocParser raise / source missing) without persisting any
+# ``document_index`` row. Pre-Wave-5 this surfaced as ``document.status
+# == PENDING`` forever — the operator had no signal to re-trigger.
+# This reconciler scan closes the loop by detecting documents that
+# uploaded > N minutes ago but never sprouted a ``document_index``
+# row, then re-enqueueing a parse job.
+#
+# Architectural note: the scan is **at-most-once-per-cooldown**, not
+# per cycle: every re-enqueue advances ``Document.gmt_updated`` so
+# consecutive cycles do not multi-push. Without that throttle a
+# stuck document would re-queue every 30s, drowning the parse
+# worker pool.
+
+
+async def reconcile_stuck_documents_for_parse_reenqueue(
+    *,
+    engine: Engine,
+    queue: WorkQueue,
+    cooldown_seconds: int = STUCK_PARSE_COOLDOWN_SECONDS,
+    batch_size: int = RECONCILE_BATCH_SIZE,
+) -> int:
+    """Re-enqueue ``q:parse`` for documents stuck without ``document_index`` rows.
+
+    A document is "stuck" when:
+
+    * ``Document.gmt_deleted IS NULL`` (still active)
+    * ``Document.gmt_created < now - cooldown_seconds`` (uploaded long
+      enough ago that a normal parse should have completed and
+      dispatched)
+    * **zero** ``document_index`` rows reference its ``document_id``
+      (parse never reached :func:`dispatch_indexing` — the parse worker
+      either died, the source was missing, or DocParser raised)
+    * ``Document.gmt_updated < now - cooldown_seconds`` (haven't
+      already been re-enqueued this cooldown window — prevents the
+      30-s reconciler tick from drowning ``q:parse`` with the same
+      stuck doc on every cycle)
+
+    For each stuck document the function pushes a fresh
+    :class:`ParseDispatchPayload` matching the upload handler's
+    contract (per ``document_service._create_or_update_document_indexes``).
+    The parse worker handles retries via its existing log-and-drop
+    semantics; this scan is the **producer-side recovery loop** that
+    closes T3 chunk 2 obs A.
+
+    Returns the number of stuck documents re-enqueued. Failure to
+    reach Redis logs + bumps to next cycle (queue may be transiently
+    unhealthy; we never crash the loop).
+    """
+    payloads = await asyncio.to_thread(
+        _select_stuck_documents_for_reenqueue,
+        engine,
+        cooldown_seconds,
+        batch_size,
+    )
+    if not payloads:
+        return 0
+
+    pushed_ids: list[str] = []
+    for payload_dict in payloads:
+        try:
+            await queue.push_parse(payload=payload_dict)
+        except Exception as exc:  # noqa: BLE001 — transient Redis fault, retry next cycle
+            logger.warning(
+                "stuck-document parse re-enqueue failed for document=%s: %s — will retry next cycle",
+                payload_dict.get("document_id", "<unknown>"),
+                exc,
+            )
+            continue
+        pushed_ids.append(str(payload_dict["document_id"]))
+
+    if pushed_ids:
+        await asyncio.to_thread(_mark_stuck_documents_reenqueued, engine, pushed_ids)
+        logger.info(
+            "reconciler stuck-parse: re-enqueued %d documents to q:parse",
+            len(pushed_ids),
+        )
+    return len(pushed_ids)
+
+
+def _select_stuck_documents_for_reenqueue(
+    engine: Engine,
+    cooldown_seconds: int,
+    batch_size: int,
+) -> list[dict[str, object]]:
+    """Sync DB scan for stuck documents. Returns a list of fully-formed
+    parse payload dicts ready for ``queue.push_parse``.
+
+    Runs through ``asyncio.to_thread`` from the async caller so the
+    SQLAlchemy / collection-config decoding cost does not block the
+    event loop.
+    """
+    from aperag.domains.knowledge_base.db.models import Document
+
+    threshold = _utcnow() - timedelta(seconds=cooldown_seconds)
+    stuck_dicts: list[dict[str, object]] = []
+    with Session(engine) as session:
+        rows_for_doc = select(DocumentIndex.document_id).where(DocumentIndex.document_id == Document.id).exists()
+        stmt = (
+            select(Document)
+            .where(
+                and_(
+                    Document.gmt_deleted.is_(None),
+                    Document.gmt_created < threshold,
+                    Document.gmt_updated < threshold,
+                    ~rows_for_doc,
+                )
+            )
+            .order_by(Document.gmt_created)
+            .limit(batch_size)
+        )
+        for document in session.scalars(stmt):
+            payload = _build_parse_payload_for_document(session, document)
+            if payload is None:
+                continue
+            stuck_dicts.append(payload.to_dict())
+    return stuck_dicts
+
+
+def _build_parse_payload_for_document(session: Session, document):
+    """Reconstruct the parse payload the upload handler would have
+    pushed. Mirrors
+    :func:`aperag.domains.knowledge_base.service.document_service._create_or_update_document_indexes`
+    so the parse worker sees identical data shape regardless of which
+    producer enqueued the job. Returns ``None`` to signal an
+    unrecoverable document (missing object_path / no enabled modalities).
+    """
+    import json as _json
+
+    from aperag.domains.knowledge_base.db.models import Collection
+    from aperag.indexing.parse_orchestrator import ParseDispatchPayload
+
+    metadata: dict[str, object] = {}
+    if document.doc_metadata:
+        try:
+            metadata = _json.loads(document.doc_metadata) or {}
+        except (TypeError, ValueError):
+            metadata = {}
+    object_path = metadata.get("object_path")
+    if not object_path:
+        logger.warning(
+            "stuck-document parse re-enqueue: skipping document=%s — doc_metadata.object_path missing",
+            document.id,
+        )
+        return None
+
+    collection = session.get(Collection, document.collection_id) if document.collection_id else None
+    parser_config = _resolve_collection_parser_config(collection)
+    modalities = _resolve_collection_modalities(collection)
+    if not modalities:
+        logger.warning(
+            "stuck-document parse re-enqueue: skipping document=%s — collection has no enabled modalities",
+            document.id,
+        )
+        return None
+
+    return ParseDispatchPayload(
+        document_id=document.id,
+        collection_id=document.collection_id or "",
+        object_path=str(object_path),
+        tenant_scope_key=f"user:{document.user}",
+        modalities=tuple(modalities),
+        parser_config=parser_config,
+        purge_existing_triples=True,
+    )
+
+
+def _resolve_collection_parser_config(collection) -> dict | None:
+    """Mirror ``document_service._resolve_parser_config_for_collection``
+    in sync context. Defensive on every shape (None / non-JSON /
+    non-dict).
+    """
+    import json as _json
+
+    if collection is None or not collection.config:
+        return None
+    try:
+        config_dict = _json.loads(collection.config)
+    except (TypeError, ValueError):
+        return None
+    if not isinstance(config_dict, dict):
+        return None
+    parser_config = config_dict.get("parser_config")
+    if parser_config is None or not isinstance(parser_config, dict):
+        return None
+    return parser_config
+
+
+def _resolve_collection_modalities(collection) -> list[str]:
+    """Walk ``collection.config.{enable_vector, enable_fulltext,
+    enable_knowledge_graph, enable_summary, enable_vision}`` to derive
+    the modality list. Default to vector + fulltext + summary (the
+    always-Wave-3-real subset) when the collection's config is
+    missing or malformed.
+    """
+    import json as _json
+
+    defaults = ["vector", "fulltext", "summary"]
+    if collection is None or not collection.config:
+        return defaults
+    try:
+        cfg = _json.loads(collection.config)
+    except (TypeError, ValueError):
+        return defaults
+    if not isinstance(cfg, dict):
+        return defaults
+
+    modalities: list[str] = []
+    if cfg.get("enable_vector", True):
+        modalities.append("vector")
+    if cfg.get("enable_fulltext", True):
+        modalities.append("fulltext")
+    if cfg.get("enable_knowledge_graph", False):
+        modalities.append("graph")
+    if cfg.get("enable_summary", False):
+        modalities.append("summary")
+    if cfg.get("enable_vision", False):
+        modalities.append("vision")
+    return modalities or defaults
+
+
+def _mark_stuck_documents_reenqueued(engine: Engine, document_ids: list[str]) -> None:
+    """Bump ``Document.gmt_updated`` for the re-enqueued documents so
+    the next reconciler tick does not pick the same documents up
+    again until the next cooldown window. Re-using ``gmt_updated``
+    keeps the scan single-table and avoids an alembic migration for
+    a recovery-loop bookkeeping field.
+    """
+    if not document_ids:
+        return
+    from aperag.domains.knowledge_base.db.models import Document
+
+    with Session(engine) as session, session.begin():
+        session.execute(update(Document).where(Document.id.in_(document_ids)).values(gmt_updated=_utcnow()))
+
+
 # ---------------------------------------------------------------------
 # Run loop — production entrypoint.
 # ---------------------------------------------------------------------
@@ -450,12 +694,17 @@ async def run_reconcile_loop(
                 engine=engine,
                 stale_seconds=stale_seconds,
             )
-            if pushed or retried or reclaimed:
+            stuck_reenqueued = await reconcile_stuck_documents_for_parse_reenqueue(
+                engine=engine,
+                queue=queue,
+            )
+            if pushed or retried or reclaimed or stuck_reenqueued:
                 logger.info(
-                    "reconciler cycle: dispatched=%d retried=%d reclaimed=%d",
+                    "reconciler cycle: dispatched=%d retried=%d reclaimed=%d stuck_reenqueued=%d",
                     pushed,
                     retried,
                     reclaimed,
+                    stuck_reenqueued,
                 )
         except Exception as exc:  # noqa: BLE001 — keep the loop alive
             logger.exception("reconciler cycle failed: %s", exc)
@@ -473,9 +722,11 @@ async def run_reconcile_loop(
     "HEARTBEAT_STALE_SECONDS",
     "RECONCILE_BATCH_SIZE",
     "RECONCILE_INTERVAL_SECONDS",
+    "STUCK_PARSE_COOLDOWN_SECONDS",
     "reconcile_collection_summaries_hook",
     "reconcile_failed_retry",
     "reconcile_pending_dispatch",
     "reconcile_running_reclaim",
+    "reconcile_stuck_documents_for_parse_reenqueue",
     "run_reconcile_loop",
 ]
diff --git a/tests/integration/test_p4_robustness_3pack.py b/tests/integration/test_p4_robustness_3pack.py
new file mode 100644
index 000000000..700b2c681
--- /dev/null
+++ b/tests/integration/test_p4_robustness_3pack.py
@@ -0,0 +1,577 @@
+# Copyright 2025 ApeCloud, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Wave 5 Phase 4 — production robustness 3-pack.
+
+Three latent issues from Wave 4 huangheng + architect ratify obs surface
+silent-drop / wasted-work bugs that this phase locks down with explicit
+behaviour:
+
+1. **T2 cleanup transient-vs-intentional split** — pre-Wave-5
+   ``_resolve_cleanup_worker`` collapsed any factory exception into
+   "drop the DB row". This loses the retry signal when a transient
+   infra error (Qdrant blip, ES network glitch) makes a
+   ``WorkerFactoryError``-look-alike. Wave 5 P4 distinguishes the two
+   so transient errors keep the row for next-cycle retry; intentional
+   gates still drop the row to keep the index from growing.
+
+2. **T3 parse_orchestrator parse_version short-circuit** — pre-Wave-5
+   :func:`parse_document` always re-runs DocParser even when the
+   resulting artifact directory is byte-identical (parse_version is
+   content-derived). Wave 5 P4 short-circuits when all three derived
+   artifacts are already present in the object store.
+
+3. **T2 reconciler stuck-document parse re-enqueue** — pre-Wave-5
+   parse failures (DocParser raise / source missing) silently dropped
+   the document_id; the operator had no recovery path. Wave 5 P4 adds
+   a reconciler scan that re-enqueues ``q:parse`` for documents that
+   uploaded > N min ago without sprouting any ``document_index`` rows.
+
+Tests use SQLAlchemy in-memory SQLite + InMemoryWorkQueue +
+InMemoryObjectStore so the suite stays fast (~ms per test) without
+external dependencies.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from datetime import datetime, timedelta, timezone
+
+import pytest
+from sqlalchemy import Engine, create_engine, insert, select
+from sqlalchemy.orm import Session
+from sqlalchemy.pool import StaticPool
+
+from aperag.db.base import Base
+from aperag.indexing import (
+    InMemoryObjectStore,
+    InMemoryWorkQueue,
+    Modality,
+    parse_document,
+    reconcile_stuck_documents_for_parse_reenqueue,
+)
+from aperag.indexing.cleanup import (
+    CleanupWorkerResolution,
+    _resolve_cleanup_worker,
+    cleanup_for_deleted_documents,
+)
+from aperag.indexing.models import DocumentIndex, IndexStatus
+from aperag.indexing.worker_factory import WorkerFactoryError
+
+# -----------------------------------------------------------------------
+# Fixtures — SQLite mirror of the production schema for both
+# DocumentIndex and Document tables (P4-3 needs the Document table).
+# -----------------------------------------------------------------------
+
+
+@pytest.fixture
+def engine() -> Engine:
+    eng = create_engine(
+        "sqlite:///:memory:",
+        connect_args={"check_same_thread": False},
+        poolclass=StaticPool,
+    )
+    # Create only the tables we need for these tests so we don't drag
+    # the whole schema in. Document table is required for the
+    # stuck-parse reconciler scan; DocumentIndex table for the
+    # zero-rows predicate.
+    from aperag.domains.knowledge_base.db.models import Collection, Document
+
+    DocumentIndex.metadata.create_all(eng, tables=[DocumentIndex.__table__])
+    Base.metadata.create_all(eng, tables=[Collection.__table__, Document.__table__])
+    return eng
+
+
+def _utcnow() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+# -----------------------------------------------------------------------
+# Layer 1 — P4-1 cleanup transient-vs-intentional split
+# -----------------------------------------------------------------------
+
+
+def _stub_row(modality: Modality = Modality.VECTOR) -> DocumentIndex:
+    return DocumentIndex(
+        id=1,
+        document_id="doc-stub",
+        parse_version="pv0",
+        modality=modality.value,
+        status=IndexStatus.ACTIVE.value,
+        is_serving=True,
+        tenant_scope_key="user:t",
+        collection_id="col-stub",
+        source_path="x",
+    )
+
+
+def test_p4_resolve_cleanup_worker_intentional_gate_drops_row():
+    """``WorkerFactoryError`` from the factory → ``transient=False``.
+
+    The cleanup loop should drop the DB row so the index does not
+    grow unboundedly while the gate is active (Wave 4 T2 surgical-
+    gate corollary preserved).
+    """
+
+    async def gated_factory(_row: DocumentIndex):
+        raise WorkerFactoryError("Wave 4 wiring T1 not yet implemented")
+
+    async def _run() -> None:
+        result = await _resolve_cleanup_worker(
+            workers=None,
+            worker_factory=gated_factory,
+            row=_stub_row(),
+        )
+        assert result == CleanupWorkerResolution(worker=None, transient=False)
+
+    asyncio.run(_run())
+
+
+def test_p4_resolve_cleanup_worker_transient_error_keeps_row_for_retry():
+    """Non-:class:`WorkerFactoryError` exception → ``transient=True``.
+
+    Operator-visible signal: the next cleanup cycle (5 min later)
+    will retry. Pre-Wave-5 this collapsed into the same path as the
+    intentional gate, silently dropping the row + losing the retry
+    signal.
+    """
+
+    async def transient_factory(_row: DocumentIndex):
+        raise ConnectionError("Qdrant cluster unhealthy")
+
+    async def _run() -> None:
+        result = await _resolve_cleanup_worker(
+            workers=None,
+            worker_factory=transient_factory,
+            row=_stub_row(),
+        )
+        assert result.worker is None
+        assert result.transient is True
+
+    asyncio.run(_run())
+
+
+def test_p4_cleanup_for_deleted_documents_skips_drop_on_transient_error(engine):
+    """Path B — ``transient=True`` resolution skips DB row drop.
+
+    The row stays in ``document_index`` so the next cleanup cycle
+    sees it again. Counts the deferred row under
+    ``transient_deferred`` so operators can track the recovery rate.
+    """
+    pv = "stuckxyz0000000a"[:16]
+    with Session(engine) as session, session.begin():
+        session.execute(
+            insert(DocumentIndex).values(
+                document_id="doc-transient",
+                parse_version=pv,
+                modality=Modality.VECTOR.value,
+                status=IndexStatus.ACTIVE.value,
+                tenant_scope_key="user:t",
+                collection_id="col-transient",
+                source_path=f"collections/col-transient/documents/doc-transient/derived/parse_{pv}/chunks.jsonl",
+                is_serving=True,
+            )
+        )
+
+    async def transient_factory(_row: DocumentIndex):
+        raise ConnectionError("Qdrant cluster unhealthy")
+
+    counts = asyncio.run(
+        cleanup_for_deleted_documents(
+            engine=engine,
+            worker_factory=transient_factory,
+            document_ids=["doc-transient"],
+        )
+    )
+    assert counts["transient_deferred"] == 1
+    assert counts["rows_deleted"] == 0
+    assert counts["backend_skipped"] == 0
+    assert counts["backend_deleted"] == 0
+
+    with Session(engine) as session:
+        rows = list(session.scalars(select(DocumentIndex).where(DocumentIndex.document_id == "doc-transient")))
+    assert len(rows) == 1, "transient infra error must NOT drop the DB row"
+
+
+def test_p4_cleanup_for_deleted_documents_drops_row_on_intentional_gate(engine):
+    """Path B — ``transient=False`` from a :class:`WorkerFactoryError`
+    intentional gate still drops the DB row (the gate is by-design;
+    keeping the row would let the index grow unboundedly).
+    """
+    pv = "gatedxyz000000a"[:15] + "x"
+    with Session(engine) as session, session.begin():
+        session.execute(
+            insert(DocumentIndex).values(
+                document_id="doc-gated",
+                parse_version=pv,
+                modality=Modality.GRAPH.value,
+                status=IndexStatus.ACTIVE.value,
+                tenant_scope_key="user:t",
+                collection_id="col-gated",
+                source_path=f"collections/col-gated/documents/doc-gated/derived/parse_{pv}/chunks.jsonl",
+                is_serving=True,
+            )
+        )
+
+    async def gated_factory(_row: DocumentIndex):
+        raise WorkerFactoryError("Wave 4 wiring T1 not yet")
+
+    counts = asyncio.run(
+        cleanup_for_deleted_documents(
+            engine=engine,
+            worker_factory=gated_factory,
+            document_ids=["doc-gated"],
+        )
+    )
+    assert counts["backend_skipped"] == 1
+    assert counts["transient_deferred"] == 0
+    assert counts["rows_deleted"] == 1
+
+    with Session(engine) as session:
+        rows = list(session.scalars(select(DocumentIndex).where(DocumentIndex.document_id == "doc-gated")))
+    assert rows == [], "intentional gate must drop the DB row to bound index growth"
+
+
+# -----------------------------------------------------------------------
+# Layer 2 — P4-2 parse_version short-circuit
+# -----------------------------------------------------------------------
+
+
+def test_p4_parse_document_short_circuits_when_artifacts_already_present():
+    """Pre-Wave-5 :func:`parse_document` re-runs DocParser even when
+    artifacts are byte-identical. Wave 5 P4 short-circuits if all
+    three canonical artifacts already exist in the store under the
+    canonical ``derived/parse_<version>/`` path.
+    """
+    store = InMemoryObjectStore()
+    body = b"# Cached Doc\n\nFirst paragraph.\n\n## Sub\n\nSecond paragraph.\n"
+
+    # First parse — populates artifacts.
+    first = parse_document(
+        store=store,
+        collection_id="col",
+        document_id="doc",
+        source_bytes=body,
+    )
+    snapshot = dict(store._objects)  # noqa: SLF001 — test introspection
+
+    # Hijack store.put to surface unintended writes during the second call.
+    write_calls = {"count": 0}
+    original_put = store.put
+
+    def tracking_put(path, data):  # noqa: ANN001
+        write_calls["count"] += 1
+        original_put(path, data)
+
+    store.put = tracking_put  # type: ignore[method-assign]
+
+    # Second parse — must short-circuit (no writes, no DocParser).
+    second = parse_document(
+        store=store,
+        collection_id="col",
+        document_id="doc",
+        source_bytes=body,
+    )
+    assert second == first
+    assert write_calls["count"] == 0, "short-circuit must not re-write artifacts"
+    assert dict(store._objects) == snapshot, "artifacts must be byte-identical"
+
+
+def test_p4_parse_document_does_not_short_circuit_when_chunks_missing():
+    """If ``chunks.jsonl`` is missing the previous parse was interrupted;
+    the short-circuit predicate must return False so DocParser re-runs.
+    """
+    store = InMemoryObjectStore()
+    body = b"# Doc\n\nText.\n"
+
+    # First parse — populates all three artifacts.
+    parse_document(
+        store=store,
+        collection_id="col",
+        document_id="doc",
+        source_bytes=body,
+    )
+
+    # Manually delete chunks.jsonl to simulate an interrupted parse.
+    chunks_keys = [k for k in store._objects if k.endswith("chunks.jsonl")]
+    assert chunks_keys, "fixture should have produced chunks.jsonl"
+    for k in chunks_keys:
+        del store._objects[k]  # noqa: SLF001
+
+    # Second parse — must NOT short-circuit; it must re-write chunks.
+    parse_document(
+        store=store,
+        collection_id="col",
+        document_id="doc",
+        source_bytes=body,
+    )
+    chunks_keys = [k for k in store._objects if k.endswith("chunks.jsonl")]
+    assert chunks_keys, "interrupted parse must be recovered, not skipped"
+
+
+def test_p4_parse_document_short_circuit_can_be_disabled():
+    """Tests / debugging callers can pass
+    ``short_circuit_if_artifacts_exist=False`` to force a re-parse.
+    """
+    store = InMemoryObjectStore()
+    body = b"# Doc\n\ntext\n"
+
+    parse_document(
+        store=store,
+        collection_id="col",
+        document_id="doc",
+        source_bytes=body,
+    )
+
+    write_calls = {"count": 0}
+    original_put = store.put
+
+    def tracking_put(path, data):  # noqa: ANN001
+        write_calls["count"] += 1
+        original_put(path, data)
+
+    store.put = tracking_put  # type: ignore[method-assign]
+
+    parse_document(
+        store=store,
+        collection_id="col",
+        document_id="doc",
+        source_bytes=body,
+        short_circuit_if_artifacts_exist=False,
+    )
+    assert write_calls["count"] >= 3, "disabling short-circuit must re-write all artifacts"
+
+
+# -----------------------------------------------------------------------
+# Layer 3 — P4-3 reconciler stuck-document parse re-enqueue
+# -----------------------------------------------------------------------
+
+
+def _insert_document(
+    engine: Engine,
+    *,
+    document_id: str,
+    collection_id: str = "col-stuck",
+    object_path: str | None = "collections/col-stuck/documents/doc/source/upload.pdf",
+    age: timedelta = timedelta(minutes=10),
+    user: str = "u1",
+) -> None:
+    """Insert a Document row aged ``age`` ago. The age controls whether
+    the row is past the cooldown window for re-enqueue.
+    """
+    import json as _json
+
+    from aperag.domains.knowledge_base.db.models import Document
+
+    created = _utcnow() - age
+    doc_metadata = _json.dumps({"object_path": object_path}) if object_path else None
+    with Session(engine) as session, session.begin():
+        session.add(
+            Document(
+                id=document_id,
+                name="doc.pdf",
+                user=user,
+                collection_id=collection_id,
+                status="UPLOADED",
+                size=1234,
+                content_hash="abc",
+                object_path=object_path,
+                doc_metadata=doc_metadata,
+                gmt_created=created,
+                gmt_updated=created,
+            )
+        )
+
+
+def _insert_collection(engine: Engine, *, collection_id: str = "col-stuck") -> None:
+    import json as _json
+
+    from aperag.domains.knowledge_base.db.models import Collection
+
+    with Session(engine) as session, session.begin():
+        session.add(
+            Collection(
+                id=collection_id,
+                title="t",
+                user="u1",
+                type="document",
+                status="ACTIVE",
+                config=_json.dumps(
+                    {
+                        "enable_vector": True,
+                        "enable_fulltext": True,
+                        "enable_summary": True,
+                    }
+                ),
+            )
+        )
+
+
+def test_p4_reconciler_re_enqueues_stuck_document(engine):
+    """A document uploaded > cooldown ago with zero ``document_index``
+    rows must be re-enqueued onto ``q:parse`` so the parse worker
+    gets another chance.
+    """
+    _insert_collection(engine)
+    _insert_document(engine, document_id="doc-stuck", age=timedelta(minutes=10))
+
+    queue = InMemoryWorkQueue()
+    pushed = asyncio.run(
+        reconcile_stuck_documents_for_parse_reenqueue(
+            engine=engine,
+            queue=queue,
+            cooldown_seconds=60,  # 60s cooldown — doc is 10 min old, easily past
+        )
+    )
+    assert pushed == 1
+    assert queue.parse_qsize() == 1
+
+
+def test_p4_reconciler_skips_documents_within_cooldown(engine):
+    """A freshly-uploaded document (within cooldown) must NOT be
+    re-enqueued — the parse worker may simply be in the middle of
+    its first run.
+    """
+    _insert_collection(engine)
+    _insert_document(engine, document_id="doc-fresh", age=timedelta(seconds=10))
+
+    queue = InMemoryWorkQueue()
+    pushed = asyncio.run(
+        reconcile_stuck_documents_for_parse_reenqueue(
+            engine=engine,
+            queue=queue,
+            cooldown_seconds=300,  # 5 min — doc is 10s old
+        )
+    )
+    assert pushed == 0
+    assert queue.parse_qsize() == 0
+
+
+def test_p4_reconciler_skips_documents_with_existing_index_rows(engine):
+    """A document that already has ``document_index`` rows is being
+    successfully indexed; the reconciler must not interfere.
+    """
+    _insert_collection(engine)
+    _insert_document(engine, document_id="doc-indexing", age=timedelta(minutes=10))
+    pv = "indexpv00000000a"[:15] + "a"
+    with Session(engine) as session, session.begin():
+        session.execute(
+            insert(DocumentIndex).values(
+                document_id="doc-indexing",
+                parse_version=pv,
+                modality=Modality.VECTOR.value,
+                status=IndexStatus.PENDING.value,
+                tenant_scope_key="user:u1",
+                collection_id="col-stuck",
+                source_path=f"collections/col-stuck/documents/doc-indexing/derived/parse_{pv}/chunks.jsonl",
+                is_serving=False,
+            )
+        )
+
+    queue = InMemoryWorkQueue()
+    pushed = asyncio.run(
+        reconcile_stuck_documents_for_parse_reenqueue(
+            engine=engine,
+            queue=queue,
+            cooldown_seconds=60,
+        )
+    )
+    assert pushed == 0
+    assert queue.parse_qsize() == 0
+
+
+def test_p4_reconciler_at_most_once_per_cooldown(engine):
+    """Two consecutive reconciler ticks must not re-push the same
+    stuck document — gmt_updated bumps after first push so the
+    cooldown predicate filters it out.
+    """
+    _insert_collection(engine)
+    _insert_document(engine, document_id="doc-bump", age=timedelta(minutes=10))
+
+    queue = InMemoryWorkQueue()
+    first = asyncio.run(
+        reconcile_stuck_documents_for_parse_reenqueue(
+            engine=engine,
+            queue=queue,
+            cooldown_seconds=60,
+        )
+    )
+    assert first == 1
+    second = asyncio.run(
+        reconcile_stuck_documents_for_parse_reenqueue(
+            engine=engine,
+            queue=queue,
+            cooldown_seconds=60,
+        )
+    )
+    assert second == 0, (
+        "second tick must not re-push within the cooldown window — Document.gmt_updated was bumped on the first push"
+    )
+    assert queue.parse_qsize() == 1
+
+
+def test_p4_reconciler_skips_documents_without_object_path(engine):
+    """A stuck document whose ``doc_metadata.object_path`` is missing
+    cannot be re-parsed — the parse worker has nothing to read.
+    The reconciler must skip it (operator must fix the upload).
+    """
+    _insert_collection(engine)
+    _insert_document(
+        engine,
+        document_id="doc-missing-path",
+        age=timedelta(minutes=10),
+        object_path=None,
+    )
+
+    queue = InMemoryWorkQueue()
+    pushed = asyncio.run(
+        reconcile_stuck_documents_for_parse_reenqueue(
+            engine=engine,
+            queue=queue,
+            cooldown_seconds=60,
+        )
+    )
+    assert pushed == 0
+
+
+def test_p4_reconciler_pushes_correct_payload_shape(engine):
+    """The payload pushed by the reconciler must match the upload
+    handler's contract (per ``ParseDispatchPayload`` shape) so the
+    parse worker sees identical data regardless of producer.
+    """
+    _insert_collection(engine)
+    _insert_document(
+        engine,
+        document_id="doc-payload",
+        collection_id="col-stuck",
+        age=timedelta(minutes=10),
+    )
+
+    queue = InMemoryWorkQueue()
+    asyncio.run(
+        reconcile_stuck_documents_for_parse_reenqueue(
+            engine=engine,
+            queue=queue,
+            cooldown_seconds=60,
+        )
+    )
+
+    payload = asyncio.run(queue.pop_parse(timeout_seconds=0.5))
+    assert payload is not None
+    assert payload["document_id"] == "doc-payload"
+    assert payload["collection_id"] == "col-stuck"
+    assert payload["object_path"] == "collections/col-stuck/documents/doc/source/upload.pdf"
+    assert payload["tenant_scope_key"] == "user:u1"
+    assert set(payload["modalities"]) == {"vector", "fulltext", "summary"}
+    assert payload["purge_existing_triples"] is True
diff --git a/tests/unit_test/indexing/test_t3_1_dispatcher_path_c.py b/tests/unit_test/indexing/test_t3_1_dispatcher_path_c.py
index 6461b0328..53c4f1c13 100644
--- a/tests/unit_test/indexing/test_t3_1_dispatcher_path_c.py
+++ b/tests/unit_test/indexing/test_t3_1_dispatcher_path_c.py
@@ -366,6 +366,7 @@ def test_path_c_handles_empty_input(engine):
         "graph_lineage_cleaned": 0,
         "rows_deleted": 0,
         "backend_skipped": 0,
+        "transient_deferred": 0,
         "collections_cleaned": 0,
     }
 

From b4b8e8840d0295d35f4b8ea60117655ef2eef2df Mon Sep 17 00:00:00 2001
From: Bryce <bryce@apecloud.com>
Date: Mon, 27 Apr 2026 18:13:48 +0800
Subject: [PATCH 04/13] =?UTF-8?q?docs(celery=20Wave=205=20P1=20chunk=203):?=
 =?UTF-8?q?=20=C2=A7K.9=20Phase=201=20scope=20narrow=20+=20=C2=A7K.10=20Wa?=
 =?UTF-8?q?ve=206=20backlog?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per architect Option (a) ruling 2026-04-27 (post-cascade scope check
msg=30c7e994 + ratify msg=8780c937):

* §K.9 Phase 1 narrowed scope: relocate-only (chunks 1+2 already
  shipped via `11113acb` + `e0707165`). Original Phase 1 scope (delete
  legacy `graphindex` package + migrate retrieval/curation callers)
  collided with unresolved design gap — legacy `GraphIndexService.
  query_context()` 24-method LightRAG-style API has no equivalent on
  the new 10-method `LineageGraphStore` Protocol; building that query
  layer is 1-2 week design + implementation effort, not in-scope for
  Wave 5 single-PR fast-landing style.

* §K.10 Wave 6 backlog added: full legacy graphindex elimination +
  retrieval/curation migration + new LightRAG-style query layer
  design moves to Wave 6 separate PR with its own architect ratify
  lane (per chunk 4d Option C ruling msg=b26f64b2 + Wave 4 §K.8
  precedent).

* Wave 5 acceptance summary updated: 15/16 items in Wave 5 PR
  (item 1 legacy graphindex elimination → Wave 6).

* Phase 1 close-out signal: chunk 1 (`11113acb`) +chunk 2 (`e0707165`)
  ship llm.py relocate + import re-route to new canonical home;
  legacy `graphindex/{integration,prompts}.py` keep deprecation-shim
  re-exports during the Wave 6 deprecation window so legacy
  retrieval/curation callers do not break mid-Wave-5.

This commit closes Phase 1 narrowed scope and unblocks Phase 2 (T7
multimodal vision-LLM 3-item bundle) per architect ruling.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../indexing-redesign-design-pack.md          | 73 +++++++++++++++----
 1 file changed, 57 insertions(+), 16 deletions(-)

diff --git a/docs/modularization/indexing-redesign-design-pack.md b/docs/modularization/indexing-redesign-design-pack.md
index 20d28b20e..e3fe8e17c 100644
--- a/docs/modularization/indexing-redesign-design-pack.md
+++ b/docs/modularization/indexing-redesign-design-pack.md
@@ -1620,27 +1620,33 @@ v2 把 v1 的 7 个细粒度 PR 收成 **3 个 wave**（≈3 个大 PR）。每
 
 **5-phase commit roadmap (16 acceptance items)**:
 
-#### Phase 1 (sequential first) — Legacy graphindex package 整体淘汰
+#### Phase 1 (narrowed scope per architect ruling 2026-04-27 post-cascade scope check) — `aperag/indexing/llm.py` relocate + import re-route
 
-- delete `aperag/domains/knowledge_graph/graphindex/{storage, service, integration, engine, __init__}.py`（per chunk 4d Option C ruling msg=b26f64b2 + §K.8 deferral）
-- migrate retrieval/curation 调用 → §G.5 read primitives:
-  - `aperag/domains/retrieval/pipeline.py:85` `make_service_for_collection`
-  - `aperag/domains/knowledge_graph/service.py:69+` graph CRUD
-  - `aperag/graph_curation/service.py:37` + `integration.py:21`
-  - `aperag/indexing/worker_factory.py:696` `build_collection_llm_callable`
-- relocate `build_collection_llm_callable` + `render_extraction_prompt` → 新 `aperag/indexing/llm.py`
-- delete tests: `tests/unit_test/graphindex/test_connector.py` / `test_nebula_store.py` / `tests/integration/compat/test_graph_compat.py`
+**Scope evolution** (per Bryce scope reality check msg=30c7e994 + architect ruling Option (a) adoption msg-post-2026-04-27-18:10):
 
-**production-readiness 三类 layer**:
-- must-be-real: §G.5 read primitives 真 wire retrieval/curation flow（not stubs）
-- may-be-gated: 无 (full hard-cut)
-- fully-resolves: §K.8 Wave 5 backlog item 1 (legacy graphindex elimination) + item 3 (`aperag/indexing/llm.py` relocate)
+The original Phase 1 scope (delete legacy ``graphindex/{storage, service, integration, engine, __init__}.py`` + migrate retrieval/curation callers to §G.5 read primitives) collided with an unresolved design gap: legacy ``GraphIndexService.query_context()`` is a 24-method LightRAG-style query API, while the new ``LineageGraphStore`` Protocol exposes only 4 read methods (``find_entity_ids_with_lineage`` / ``find_relation_keys_with_lineage`` / ``get_entity`` / ``get_relation``) bound to ``(document_id, parse_version)`` lineage scans + single-key fetches. There is no by-keyword / by-semantic / by-graph-traversal API surface on the new pipeline yet — that is itself a 1-2 week design + implementation effort (build LightRAG-style query layer on ``LineageGraphStore`` with vector recall + traversal).
+
+Per architect Option (a) ruling: **Phase 1 narrowed to relocate-only scope**. Full legacy ``graphindex`` package elimination and retrieval/curation cascade migration are deferred to **Wave 6** as a coordinated cross-cutting refactor PR alongside the new query layer design.
+
+**Phase 1 actual scope (Wave 5 chunks 1+2 already shipped)**:
+
+- relocate ``build_collection_llm_callable`` + ``render_extraction_prompt`` + ``ENTITY_RELATION_EXTRACTION`` template + ``LLMCall`` type alias → new ``aperag/indexing/llm.py`` (Wave 5 PR commit ``11113acb``)
+- legacy ``graphindex/integration.py`` and ``graphindex/prompts.py`` keep deprecation-shim re-exports during the Wave 6 deprecation window so legacy retrieval/curation callers do not break mid-Wave-5
+- migrate ``aperag/indexing/graph_extractor.py`` import to new location (Wave 5 PR commit ``11113acb``)
+- migrate ``aperag/service/prompt_template_service.py`` ``get_default_prompt(prompt_type="graph")`` import to new location (Wave 5 PR commit ``e0707165``)
+
+**production-readiness 三类 layer (revised)**:
+- must-be-real: ``aperag.indexing.llm`` is the canonical home for the relocated helpers; Wave 4 indexing pipeline (``graph_extractor`` + ``worker_factory`` chunk 4b graph dispatch) imports from new location, no transitive legacy dependency
+- may-be-gated: legacy ``graphindex/{integration,prompts}.py`` re-export shims are intentional Wave 6 deprecation-window devices, not gated defaults
+- fully-resolves: §K.8 Wave 5 backlog item 3 (``aperag/indexing/llm.py`` relocate). Wave 5 backlog item 1 (legacy graphindex package elimination) **deferred to Wave 6** — see §K.10.
+
+**Wave 6 entry condition** (per architect Option (a) ruling): a separate Wave 6 PR covers the legacy graphindex retrieval-side query layer migration. Phase 1 narrowed scope avoids the Wave 3 lesson #10 anti-pattern (silent regression by forcing a 1-week design refactor into a high-throughput PR) and follows the chunk 4d Option C precedent (narrowed scope + Wave N+1 cross-cutting refactor batch).
 
 **pre-check pattern** (per `feedback_spec_lock_grep_verify_caller.md` Pattern 1):
-- grep `from aperag.domains.knowledge_graph.graphindex` 全验 caller cascade migration completeness
-- grep-zero verify post-migration: `aperag/indexing/*` 不 import legacy graphindex (chunk 4d invariant maintained)
+- ``grep -rn "from aperag.domains.knowledge_graph.graphindex" aperag/indexing/`` → 0 matches (chunk 4d invariant maintained ✅)
+- legacy callers (``retrieval/pipeline.py`` / ``knowledge_graph/service.py`` / ``graph_curation/*``) **intentionally** still import from legacy modules pending Wave 6 migration
 
-**architect direct ratify scope**: caller migration completeness verify (post-Phase-1 grep-zero check); `aperag/indexing/llm.py` API surface align Wave 4 import path expectations。
+**architect direct ratify scope**: chunk 1 (``11113acb``) ratified ✅; chunk 2 (``e0707165``) huangheng pass-1 ✅. No further Phase 1 architect direct ratify gate needed in Wave 5 — the cascade chunks (3-7) move to Wave 6 with their own architect ratify lane (cascade caller migration spec + completeness verify).
 
 #### Phase 2 (post-Phase-1) — T7 multimodal vision-LLM 3-item bundle
 
@@ -1772,6 +1778,41 @@ PM (燧木) 决定。架构师建议参考 D10 模式：
 - 单 modality 写手（5 人 × Wave 1 一人一个 modality + Wave 2 worker / reconciler 等）
 - Bryce — graph modality（最复杂的那个，且修 nebula append bug）+ idempotency 自测把关
 
+### K.10. Wave 6 — Legacy graphindex elimination + retrieval/curation migration（per architect Option (a) ruling 2026-04-27 + Wave 5 Phase 1 scope-narrow handoff）
+
+**目标**: 完成 Wave 5 Phase 1 narrowed scope 推迟的 legacy ``graphindex`` package 整体淘汰 + retrieval/curation flows 迁移。这是 cross-cutting refactor，与 Wave 5 single-PR 风格不同 — 需先设计新查询层 (LightRAG-style query layer on ``LineageGraphStore``)，再 cascade migrate 调用方。
+
+**Wave 6 acceptance items** (per Wave 5 §K.9 scope-narrow handoff):
+
+1. **Design + implement LightRAG-style query layer** on ``LineageGraphStore`` Protocol:
+   - by-keyword entity / relation lookup
+   - vector-recall augmented retrieval (复用 §G.5 retrieval pipeline 的 vector connector)
+   - graph traversal API (1-hop neighbors, multi-hop expansion)
+   - query result types align with retrieval pipeline expectations
+2. **Migrate legacy ``graphindex`` callers to new query layer**:
+   - ``aperag/domains/retrieval/pipeline.py:_fulltext_search`` etc. — 用新 query API 替 ``GraphIndexService.query_context()``
+   - ``aperag/domains/knowledge_graph/service.py`` graph CRUD — 用新 LineageGraphStore find/get methods
+   - ``aperag/graph_curation/service.py`` + ``integration.py`` + ``candidate_generation.py`` — 用新 query API + Entity DTO relocation
+3. **Delete legacy package**:
+   - ``aperag/domains/knowledge_graph/graphindex/{storage, service, integration, engine, __init__}.py``
+   - ``graphindex/{config, dto, prompts, models}.py`` (DTOs may relocate to canonical home)
+4. **Delete legacy tests**:
+   - ``tests/unit_test/graphindex/test_connector.py``
+   - ``tests/unit_test/graphindex/test_nebula_store.py``
+   - ``tests/integration/compat/test_graph_compat.py``
+5. **Final grep-zero verify**: post-Wave-6, ``aperag/`` 全树无 ``from aperag.domains.knowledge_graph.graphindex`` import
+
+**production-readiness 三类 layer**:
+- must-be-real: 新 LightRAG-style query layer ships behavior-equivalent retrieval (existing graph search results within ε tolerance)
+- may-be-gated: 无 (full hard-cut)
+- fully-resolves: §K.8 Wave 5 backlog item 1 deferred (legacy graphindex elimination) + supplementary cascade items
+
+**effort estimate**: 1-2 weeks single-implementer or 0.5-1 week 2-implementer parallel (query layer design + migration + test rewrite + e2e behavior preservation verify)。
+
+**architect direct ratify lane**: full Wave 6 — query layer Protocol design + migration completeness verify + behavior-preservation validation against retrieval / curation existing behaviors。
+
+**why Wave 6 separate PR (not Wave 5 fold)**: Wave 5 PR (#1733) targets 16 polish items in single-PR fast-landing style. Wave 6 cross-cutting refactor needs (a) new design work upstream, (b) careful behavior validation against existing retrieval flows, (c) phased migration to avoid silent regression. Bundling into Wave 5 risked Wave 3 lesson #10 anti-pattern (rushed cross-cutting refactor in high-throughput PR → silent broken paths). Wave 6 separate PR with its own architect ratify lane is the disciplined path.
+
 ---
 
 ## §L. Private / on-premise deployment — "deploy-and-forget"

From 1abbaa1d061b61cfb76298bb4e5ff1d3eb1edd75 Mon Sep 17 00:00:00 2001
From: earayu <earayu@163.com>
Date: Mon, 27 Apr 2026 18:17:39 +0800
Subject: [PATCH 05/13] feat(celery Wave 5 P3): Layer 2 e2e fixture wiring +
 sweep D activation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wave 5 Phase 3 (task #28) — replaces the pre-Wave-5 ``pytest.skip``
stubs in ``test_full_indexing_pipeline.py`` Layer 2 with functional
test bodies that the e2e-http-compose CI lane can execute against
the live backend stack.

Per architect msg=fdd53586 + chunk 4d+4e ratify msg=c279a0ff —
Layer 2 contract was declared but skip-stub-only; this phase wires
the actual implementation so the canonical Phase 1 invariant runs
end-to-end whenever the operator opts in via ``RUN_E2E_PHASE1_SMOKE=1``.

Two test bodies wired:

* ``test_phase1_full_pipeline_vector_fulltext_summary_active_graph_vision_failed``
  — the canonical Phase 1 smoke (architect msg=da3012a4): real
  ``ProductionWorkerFactory`` + parse_document + dispatch_indexing
  + worker pool drive until every modality finalises. Asserts
  vector + fulltext + summary reach ACTIVE; graph + vision finalise
  FAILED with a gate marker (``Wave 4 wiring`` chunk 4b /
  ``completion model`` post-T1 self-disable / ``multimodal`` vision
  gate). The gate-marker OR tolerates the same-Wave T1 self-disable
  surface so the test stays green across the chunk 4b → T1 closure.

* ``test_phase1_multi_keyword_fulltext_search_returns_hits`` —
  sweep D verification (architect msg=fdd53586): index a real
  document via the worker pool, then issue a 3-keyword query
  through ``_fulltext_search`` and assert ≥1 hit. The retrieval-
  side ``minimum_should_match`` arithmetic over N×content +
  N×title should-clauses (huangheng msg=fb64468c flag) is the
  latent issue this exercises end-to-end.

New fixture machinery:

* ``_phase1_e2e_skip_reason()`` — central skip-reason resolver that
  documents the activation contract: ``RUN_E2E_PHASE1_SMOKE=1`` +
  ``PHASE1_E2E_COLLECTION_ID`` (pointing at a Collection seeded by
  the e2e-http-compose bootstrap with a real model provider
  configured) + backend env vars (``DATABASE_URL`` / ``ES_HOST`` /
  Qdrant + Redis env vars). Both Layer 2 tests share the same
  skip predicate so the lane runner only needs one env-var setup.

* ``_resolve_phase1_e2e_engine()`` — opens a real Postgres engine
  using ``settings.database_url``; skips with a clear reason if
  unset.

* ``_run_phase1_workers_until_quiet()`` — bounded async loop that
  drives ``ProductionWorkerFactory`` directly (mirroring the
  ``run_*_worker`` lifespan path) until every modality row reaches
  a terminal status. Bounded by ``timeout_seconds`` so a hung
  modality fails the test loud rather than blocking forever.

Three-class tag (Wave 3 production-readiness invariant):
* must-be-real: live ProductionWorkerFactory + real backends + real
  ES query
* may-be-gated: skips when env vars missing (local-dev never
  requires the full stack)
* fully-resolves: 2 of Wave 5 P1 backlog items (Layer 2 stubs
  activated for canonical full pipeline + sweep D multi-keyword
  smoke)

Cleanup roundtrip (delete document → cleanup loop → backend
artefacts gone) is left as a follow-up sub-test to land alongside
the e2e-http-compose lane document-delete API access scaffolding.

Local gates:
* ``pytest tests/integration/test_full_indexing_pipeline.py`` — 4
  passed / 2 skipped (Layer 2 skips cleanly with the new
  ``_phase1_e2e_skip_reason()`` until env vars set in CI lane).
* ``pytest tests/unit_test/indexing/ tests/integration/`` — 232
  passed / 48 skipped — no regression vs P4 baseline.
* ``ruff check aperag/ tests/integration/test_full_indexing_pipeline.py``
  — clean.
* ``ruff format`` — applied.

Branch: ``chenyexuan/celery-wave5-p4`` rebased on
``bryce/celery-wave5`` HEAD ``99af7965`` (Phase 1 chunk 3 spec
amend) → push to ``bryce/celery-wave5`` so Wave 5 single-PR pile
continues.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../test_full_indexing_pipeline.py            | 401 +++++++++++++++---
 1 file changed, 336 insertions(+), 65 deletions(-)

diff --git a/tests/integration/test_full_indexing_pipeline.py b/tests/integration/test_full_indexing_pipeline.py
index 8aed66061..6651d22a4 100644
--- a/tests/integration/test_full_indexing_pipeline.py
+++ b/tests/integration/test_full_indexing_pipeline.py
@@ -360,95 +360,366 @@ async def _run(_row: DocumentIndex = row, _modality: Modality = modality) -> Non
 # ---------------------------------------------------------------------
 # Layer 2 — full pipeline e2e (gated by RUN_E2E_PHASE1_SMOKE=1).
 # Real Postgres + Redis + Qdrant + Elasticsearch + OTel SDK.
+#
+# Wave 5 P3 (chenyexuan task #28) wires the Layer 2 fixture and
+# replaces the pre-Wave-5 ``pytest.skip`` stubs with functional test
+# bodies. The e2e-http-compose CI lane sets the env vars below; the
+# tests run against the live stack and validate the canonical Phase 1
+# contract end-to-end.
 # ---------------------------------------------------------------------
 
 
 _PHASE1_E2E_GATE = os.environ.get("RUN_E2E_PHASE1_SMOKE") == "1"
+_PHASE1_E2E_COLLECTION_ID = os.environ.get("PHASE1_E2E_COLLECTION_ID")
+
+
+def _phase1_e2e_skip_reason() -> str | None:
+    """Return ``None`` if all required env vars are set, else a
+    human-readable skip reason describing what is missing.
+
+    The Wave 5 P3 contract is that Layer 2 runs only when:
+    * ``RUN_E2E_PHASE1_SMOKE=1`` (operator opt-in)
+    * ``PHASE1_E2E_COLLECTION_ID`` points at a Collection seeded by
+      the e2e-http-compose lane bootstrap with a real model provider
+      configured (so the embedder + summariser actually work).
+    * Backend env vars set so :class:`ProductionWorkerFactory` can
+      resolve real clients (``DATABASE_URL`` / ``ES_HOST`` / Qdrant
+      env vars / ``INDEXING_QUEUE_REDIS_URL``).
+
+    Skipping with a clear reason beats failing — local-dev runs of
+    this file should not require operators to stand up the full
+    stack.
+    """
+    if not _PHASE1_E2E_GATE:
+        return (
+            "Phase 1 full e2e smoke gated on RUN_E2E_PHASE1_SMOKE=1 — needs "
+            "real Postgres + Redis + Qdrant + ES + a configured model "
+            "provider. Runs in the e2e-http-compose CI lane."
+        )
+    if not _PHASE1_E2E_COLLECTION_ID:
+        return (
+            "Phase 1 Layer 2 needs PHASE1_E2E_COLLECTION_ID env var "
+            "pointing at the e2e-http-compose-bootstrapped Collection "
+            "(real model provider configured). Set it from "
+            "tests/e2e_http/bootstrap/.generated/e2e.env."
+        )
+    return None
+
+
+def _resolve_phase1_e2e_engine() -> Engine:
+    """Open a real Postgres engine using the production ``settings``
+    so the test sees the same Collection rows the e2e-http-compose
+    lane seeded.
+
+    Mirrors :func:`aperag.config.sync_engine` but builds the engine
+    directly inside the test so a stale or pooled connection from a
+    prior pytest module does not leak into the suite.
+    """
+    from aperag.config import settings
+
+    if not settings.database_url:
+        pytest.skip("Phase 1 Layer 2: settings.database_url is empty (POSTGRES_HOST etc unset)")
+    return create_engine(settings.database_url, future=True)
+
+
+async def _run_phase1_workers_until_quiet(
+    *,
+    engine: Engine,
+    document_id: str,
+    timeout_seconds: float = 60.0,
+) -> dict[Modality, DocumentIndex]:
+    """Drive the production worker pool until every per-modality
+    ``DocumentIndex`` row for ``document_id`` finalises (ACTIVE or
+    FAILED — terminal states), or ``timeout_seconds`` elapses.
+
+    Returns a dict keyed by Modality with the final row state.
+
+    Implementation drives :class:`ProductionWorkerFactory` directly
+    against the live backends — same dispatch path the
+    ``run_*_worker`` lifespan tasks use. Each modality is processed
+    once per cycle until terminal; the loop is bounded by
+    ``timeout_seconds`` so a hung modality (e.g. unreachable Qdrant)
+    fails the test loud rather than blocking forever.
+    """
+    from sqlalchemy import select as sa_select
+
+    from aperag.indexing.orchestrator import process_one_task
+
+    factory = ProductionWorkerFactory(engine=engine)
+    deadline = asyncio.get_event_loop().time() + timeout_seconds
+    finalised: dict[Modality, DocumentIndex] = {}
+
+    while asyncio.get_event_loop().time() < deadline:
+        with Session(engine) as session:
+            rows = list(
+                session.execute(
+                    sa_select(DocumentIndex).where(DocumentIndex.document_id == document_id)
+                ).scalars()
+            )
+        if not rows:
+            await asyncio.sleep(0.1)
+            continue
+
+        all_terminal = True
+        for row in rows:
+            try:
+                modality = Modality(row.modality)
+            except ValueError:
+                continue
+            if row.status in (IndexStatus.ACTIVE.value, IndexStatus.FAILED.value):
+                finalised[modality] = row
+                continue
+            all_terminal = False
+            payload = DispatchPayload(
+                index_id=row.id,
+                document_id=row.document_id,
+                parse_version=row.parse_version,
+                modality=modality,
+                source_path=row.source_path or "",
+                collection_id=row.collection_id,
+            )
+            try:
+                worker = await factory(payload)
+            except WorkerFactoryError as exc:
+                # Gate-raise → finalise FAILED with the gate message.
+                # Mirrors :class:`run_worker_loop` path so Layer 2
+                # exercises the production pattern.
+                from aperag.indexing.orchestrator import _claim_row, _finalize_failed
+
+                if await asyncio.to_thread(_claim_row, engine, row.id):
+                    await asyncio.to_thread(
+                        _finalize_failed,
+                        engine,
+                        row.id,
+                        f"worker_factory failed: {exc!r}",
+                    )
+                continue
+            await process_one_task(
+                engine=engine,
+                payload=payload,
+                worker=worker,
+                heartbeat_interval_seconds=0,
+            )
+
+        if all_terminal:
+            break
+        await asyncio.sleep(0.2)
+
+    return finalised
 
 
 @pytest.mark.skipif(
-    not _PHASE1_E2E_GATE,
-    reason=(
-        "Phase 1 full e2e smoke gated on RUN_E2E_PHASE1_SMOKE=1 — needs "
-        "real Postgres + Redis + Qdrant + ES + multimodal-capable embedder. "
-        "Runs in the e2e-http-compose CI lane."
-    ),
+    _phase1_e2e_skip_reason() is not None,
+    reason=(_phase1_e2e_skip_reason() or "phase 1 layer 2 skipped"),
 )
 def test_phase1_full_pipeline_vector_fulltext_summary_active_graph_vision_failed():
     """Layer 2 — canonical Phase 1 e2e smoke per architect msg=da3012a4.
 
-    Sequence:
-    1. Spin up a real ``ProductionWorkerFactory`` against the live
-       backend stack (Postgres / Redis / Qdrant / ES / OTel SDK).
-    2. Upload a markdown document; orchestrator dispatches 5 modality
-       rows (vector / fulltext / summary / graph / vision).
-    3. Run the worker pool until all rows finalise.
-    4. Assert vector + fulltext + summary reach
-       ``status == "ACTIVE"`` (3 modality fully working in Phase 1).
-    5. Assert graph + vision finalise ``status == "FAILED"`` with
-       ``error_message`` containing ``"Wave 4 wiring"`` (gates remain
-       effective even with chunk 4b backend dispatch wired).
-    6. Delete the document; run cleanup loop; assert backend
-       artefacts removed (Qdrant point count == 0, ES doc count == 0,
-       lineage entity rows == 0) for the document_id.
-
-    This is the contract the e2e-http-compose CI lane enforces before
-    Wave 4 close-out (Phase 2 — after T1 + T7 land — flips graph +
-    vision rows to ACTIVE).
+    Wave 5 P3: implementation wired against the e2e-http-compose lane
+    fixture. Reads ``PHASE1_E2E_COLLECTION_ID`` to resolve the live
+    Collection, dispatches a markdown upload, drives the worker pool
+    until every modality row finalises, then asserts the canonical
+    Phase 1 contract:
+
+    * vector + fulltext + summary reach ``ACTIVE`` (three real
+      modalities ship in Wave 4)
+    * graph + vision finalise ``FAILED`` with ``error_message``
+      containing the gate marker (``Wave 4 wiring`` for chunk 4b
+      gates, ``completion model`` for the post-T1 gate self-disable
+      surface, or ``multimodal`` for the vision gate). The OR
+      tolerates the T1 gate-self-disable transition that ships in
+      this same Wave (per chunk 4b → T1 closure).
+
+    The cleanup roundtrip (delete document → cleanup loop → backend
+    artefacts gone) is left to a follow-up sub-test (`...
+    _and_cleanup_removes_backend_artefacts`) once the Layer 2
+    fixture supports document-delete API access.
     """
 
-    pytest.skip(
-        "Layer 2 implementation requires a stub model-provider fixture "
-        "that vector/fulltext/summary embedders can resolve; the "
-        "fixture lives in the e2e-http-compose lane scaffolding "
-        "(``tests/e2e_http/scripts/run_full.sh``). Track Wave 4 "
-        "close-out PR for the wired Layer 2 — current Layer 1 above "
-        "covers the gate invariants Phase 1 needs locked in CI today."
+
+    from aperag.indexing.dispatcher import DispatchRequest, IndexingMode, dispatch_indexing
+    from aperag.indexing.parser import ParseConfig, parse_document
+    from aperag.objectstore.base import get_object_store
+
+    collection_id = _PHASE1_E2E_COLLECTION_ID
+    assert collection_id, "skip should have caught missing env var"
+
+    document_id = "phase1-e2e-" + uuid.uuid4().hex[:8]
+    source_bytes = (
+        b"# Phase 1 e2e smoke\n\n"
+        b"This document exercises the canonical Phase 1 contract: "
+        b"vector + fulltext + summary reach ACTIVE; graph + vision "
+        b"finalise FAILED with the Wave 4 wiring gate message.\n"
     )
 
+    async def _run() -> None:
+        engine = _resolve_phase1_e2e_engine()
+        try:
+            object_store = get_object_store()
+            parsed = parse_document(
+                store=object_store,
+                collection_id=collection_id,
+                document_id=document_id,
+                source_bytes=source_bytes,
+                source_filename="phase1-smoke.md",
+                config=ParseConfig(),
+            )
+            from aperag.indexing.runtime import get_runtime
+
+            runtime = get_runtime()
+            assert runtime is not None and runtime.queue is not None, (
+                "Phase 1 Layer 2 requires a live IndexingRuntime — the e2e-http-compose "
+                "lane bootstraps it via FastAPI lifespan; ensure the test runs against "
+                "the live API process."
+            )
+            await dispatch_indexing(
+                engine=runtime.engine,
+                queue=runtime.queue,
+                workers=None,
+                request=DispatchRequest(
+                    collection_id=collection_id,
+                    document_id=document_id,
+                    parse_version=parsed.parse_version,
+                    source_path=parsed.chunks_path,
+                    tenant_scope_key="user:phase1-e2e",
+                    modalities=tuple(Modality),
+                ),
+                mode=IndexingMode.ASYNC,
+            )
+
+            # Drive the pool inline so the test does not depend on the
+            # lifespan worker tasks racing with the assertion.
+            finalised = await _run_phase1_workers_until_quiet(
+                engine=engine,
+                document_id=document_id,
+            )
+            assert set(finalised.keys()) == set(Modality), (
+                f"every modality must finalise within timeout; got {set(finalised.keys())}"
+            )
+
+            for modality in (Modality.VECTOR, Modality.FULLTEXT, Modality.SUMMARY):
+                row = finalised[modality]
+                assert row.status == IndexStatus.ACTIVE.value, (
+                    f"modality={modality.value} must finalise ACTIVE in Phase 1; "
+                    f"actual={row.status} error={row.error_message}"
+                )
+                assert row.is_serving is True
+
+            for modality in (Modality.GRAPH, Modality.VISION):
+                row = finalised[modality]
+                assert row.status == IndexStatus.FAILED.value, (
+                    f"modality={modality.value} must finalise FAILED until Wave 5 T7 lands; "
+                    f"actual={row.status}"
+                )
+                msg = row.error_message or ""
+                assert any(
+                    marker in msg
+                    for marker in ("Wave 4 wiring", "completion model", "multimodal")
+                ), f"modality={modality.value} FAILED message must surface a gate marker; got {msg!r}"
+        finally:
+            engine.dispose()
+
+    asyncio.run(_run())
+
 
 @pytest.mark.skipif(
-    not _PHASE1_E2E_GATE,
-    reason=(
-        "Phase 1 multi-keyword fulltext smoke gated on RUN_E2E_PHASE1_SMOKE=1 — "
-        "needs real Elasticsearch + a configured fulltext index. Runs in the "
-        "e2e-http-compose CI lane."
-    ),
+    _phase1_e2e_skip_reason() is not None,
+    reason=(_phase1_e2e_skip_reason() or "phase 1 layer 2 skipped"),
 )
 def test_phase1_multi_keyword_fulltext_search_returns_hits():
     """Layer 2 — sweep D verification (architect msg=fdd53586): exercise
-    ``_fulltext_search`` with a multi-keyword query against a freshly
-    indexed document and assert at least one hit. The retrieval-side
-    ``minimum_should_match`` arithmetic over N×content + N×title should
-    clauses (huangheng msg=fb64468c flag) is a latent issue per
-    architect msg=2721a5e7 final review concern D.
-
-    Real-world verification beats algebraic pre-fix — this case runs the
-    actual ES query semantics against a real ES instance with a real
-    indexed document. If it passes, the latent risk did not materialise
-    in production semantics. If it fails, fix-forward in chunk 4e (or
-    escalate if the fix scope outgrows chunk 4e expectations) per
-    architect msg=fdd53586 ruling.
-
-    Sequence:
-    1. Upload a markdown document with content "ApeRAG combines vector
-       search and graph retrieval for production RAG workloads.".
-    2. Wait for fulltext modality to reach ACTIVE.
-    3. Issue a 3-keyword query: ``"ApeRAG vector RAG"``.
-    4. Assert at least 1 hit is returned (the indexed document).
-    5. Cleanup.
+    the retrieval-side ``_fulltext_search`` with a multi-keyword query
+    against a freshly indexed document. Asserts at least one hit so
+    the ``minimum_should_match`` arithmetic over N×content + N×title
+    should-clauses (huangheng msg=fb64468c flag) is verified end-to-end.
+
+    Wave 5 P3 wires this against the live ES instance the
+    e2e-http-compose lane provides. Re-uses the canonical Layer 2
+    fixture (``PHASE1_E2E_COLLECTION_ID``).
     """
 
-    pytest.skip(
-        "Sweep D multi-keyword fulltext smoke implementation requires the "
-        "same e2e-http-compose lane scaffolding as the canonical Layer 2 "
-        "test above (real ES instance + retrieval pipeline + indexed "
-        "document fixture). Track Wave 4 close-out PR for the wired "
-        "implementation — until then, sweep D is verified via the "
-        "tests/integration/test_fulltext_roundtrip_fields.py path which "
-        "exercises bulk_index + search round-trip on a real ES index."
+    from aperag.domains.retrieval.pipeline import _fulltext_search
+    from aperag.indexing.dispatcher import DispatchRequest, IndexingMode, dispatch_indexing
+    from aperag.indexing.parser import ParseConfig, parse_document
+    from aperag.objectstore.base import get_object_store
+
+    collection_id = _PHASE1_E2E_COLLECTION_ID
+    assert collection_id, "skip should have caught missing env var"
+    document_id = "phase1-sweepd-" + uuid.uuid4().hex[:8]
+    source_bytes = (
+        b"# Sweep D fulltext multi-keyword smoke\n\n"
+        b"ApeRAG combines vector search and graph retrieval for production RAG workloads.\n"
     )
 
+    async def _run() -> None:
+        engine = _resolve_phase1_e2e_engine()
+        try:
+            from aperag.indexing.runtime import get_runtime
+
+            runtime = get_runtime()
+            assert runtime is not None and runtime.queue is not None, (
+                "sweep D Layer 2 requires a live IndexingRuntime"
+            )
+
+            object_store = get_object_store()
+            parsed = parse_document(
+                store=object_store,
+                collection_id=collection_id,
+                document_id=document_id,
+                source_bytes=source_bytes,
+                source_filename="phase1-sweepd.md",
+                config=ParseConfig(),
+            )
+            await dispatch_indexing(
+                engine=runtime.engine,
+                queue=runtime.queue,
+                workers=None,
+                request=DispatchRequest(
+                    collection_id=collection_id,
+                    document_id=document_id,
+                    parse_version=parsed.parse_version,
+                    source_path=parsed.chunks_path,
+                    tenant_scope_key="user:phase1-sweepd",
+                    modalities=(Modality.FULLTEXT,),
+                ),
+                mode=IndexingMode.ASYNC,
+            )
+            finalised = await _run_phase1_workers_until_quiet(
+                engine=engine,
+                document_id=document_id,
+            )
+            row = finalised.get(Modality.FULLTEXT)
+            assert row is not None and row.status == IndexStatus.ACTIVE.value, (
+                f"fulltext modality must finalise ACTIVE; got {row}"
+            )
+
+            # Now exercise the multi-keyword path. ``_fulltext_search``
+            # is the retrieval pipeline entry the chat path consumes;
+            # if its ``minimum_should_match: 80%`` over a 2N-clause
+            # should-set has the latent calc gap huangheng flagged in
+            # msg=fb64468c, this assertion fails — which is the whole
+            # point of sweep D verification.
+            from aperag.domains.knowledge_base.db.models import Collection
+
+            with Session(engine) as session:
+                collection = session.get(Collection, collection_id)
+            assert collection is not None
+            hits = await _fulltext_search(
+                collection=collection,
+                query="ApeRAG vector RAG",
+                top_k=5,
+                user_id="user:phase1-sweepd",
+                chat_id=None,
+            )
+            assert hits and len(hits) >= 1, (
+                "multi-keyword fulltext search returned 0 hits — sweep D latent issue "
+                "may have materialised; check _fulltext_search:350 minimum_should_match calc."
+            )
+        finally:
+            engine.dispose()
+
+    asyncio.run(_run())
+
 
 # ---------------------------------------------------------------------
 # Type-checker happy: ``Any`` is imported for future Layer 2 stubs.

From 75cd8bd9ac2133e8a0b08547b79d96b611ab1799 Mon Sep 17 00:00:00 2001
From: Bryce <bryce@apecloud.com>
Date: Mon, 27 Apr 2026 18:18:02 +0800
Subject: [PATCH 06/13] feat(celery Wave 5 P2 chunk 1):
 EmbeddingService.embed_image API surface (T7 item 1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wave 5 task #27 chunk 1 per §G.2.5.1 amendment item 1:
``EmbeddingService.embed_image(image_bytes, alt_text)`` API surface
is the canonical multimodal embedding entry point that replaces the
Wave 3 placeholder ``embed_query(f"{image_id}|{alt_text}")`` string-
concat (Wave 3 lesson #10 broken pattern).

Behavior:

* When ``self.multimodal=True`` (operator wired a multimodal embedder
  on the collection's spec), encode image bytes as base64 data URL
  + forward to LiteLLM ``embedding(input=[{"image_url":{"url":...}}, ...])``
  shape that multimodal-capable providers (Voyage / Jina v3 / OpenAI
  multimodal / etc.) accept natively.
* When ``self.multimodal=False``, raise ``EmbeddingError`` with clear
  operator-facing diagnostic — chunk 4b vision gate already prevents
  this state but runtime check is defense-in-depth (Wave 3 lesson #10
  ship-incomplete-but-don't-silently-lie).
* ``alt_text`` paired into LiteLLM input as a textual hint for
  embedders that accept multi-part inputs; image-only embedders
  silently drop the text element.

The ``aembed_image`` async wrapper mirrors the ``aembed_query``
pattern (``asyncio.to_thread`` since the LiteLLM call is sync).

Wave 5 P2 chunk-1 scope (this commit):

* declares the API surface so Phase 2 chunks 2 (parser image
  extraction) + 3 (provider v3 capability flag UI) and the chunk 4b
  vision gate self-disable path have a concrete contract to wire to
* uses imghdr-based MIME detection + LiteLLM's documented multimodal
  input shape; provider-specific input format variations defer to
  Wave 6 (per §K.10 Wave 6 backlog cross-cutting refactor)

Local gates green:
  - ``ruff check ./aperag ./tests`` clean
  - ``ruff format --check ./aperag ./tests`` clean (510 files)
  - ``pytest`` 179 passed / 2 skipped — no regressions on existing
    EmbeddingService callers (text-only path unchanged)

Production-readiness three-class layer:
  - must-be-real: real LiteLLM multimodal embedding call against the
    operator-configured provider when ``multimodal=True``
  - may-be-gated: provider-specific input-format adjustments (Voyage
    vs Jina vs OpenAI multimodal differences) deferred to Wave 6
  - fully-resolves: §G.2.5.1 item 1 (``embed_image`` API surface) +
    §K.9 Phase 2 partial — items 2+3 (parser image extraction +
    provider v3 capability flag UI) follow in subsequent commits

Next Phase 2 chunks:
- chunk 2: parser image extraction → ``derived/parse_<v>/vision/images/<image_id>.<ext>``
- chunk 3: provider v3 router multimodal capability flag UI exposure
- chunk 4: ``worker_factory._build_vision_worker`` ``_embed`` callsite
  rewrite to use ``embed_image`` + chunk 4b vision gate self-disable
  verify (Phase 1 Layer 1 test rename)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 aperag/llm/embed/embedding_service.py | 109 +++++++++++++++++++++++++-
 1 file changed, 108 insertions(+), 1 deletion(-)

diff --git a/aperag/llm/embed/embedding_service.py b/aperag/llm/embed/embedding_service.py
index e225fd808..64162ce87 100644
--- a/aperag/llm/embed/embedding_service.py
+++ b/aperag/llm/embed/embedding_service.py
@@ -18,7 +18,7 @@
 import hashlib
 import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import Dict, List, Sequence, Tuple
+from typing import Any, Dict, List, Sequence, Tuple
 
 import litellm
 
@@ -153,6 +153,113 @@ async def aembed_query(self, content: str) -> List[float]:
     def is_multimodal(self) -> bool:
         return self.multimodal
 
+    def embed_image(self, image_bytes: bytes, alt_text: str = "") -> List[float]:
+        """Embed image bytes into a vector via the configured multimodal model.
+
+        Wave 5 Phase 2 T7 item 1 per §G.2.5.1 amendment: the canonical
+        multimodal embedding API surface that the new vision modality
+        consumes (replacing the Wave 3 placeholder ``embed_query(f"{image_id}|
+        {alt_text}")`` string-concat pattern). Operators wire a real
+        multimodal embedder (CLIP / GPT-4V / etc.) on the collection's
+        embedder spec and set ``multimodal=True``; the chunk 4b vision
+        gate (``is_multimodal()`` check) then self-disables and the
+        worker_factory ``_build_vision_worker._embed`` callsite invokes
+        this method with real image bytes.
+
+        Encodes the image as a base64 data URL and forwards it to the
+        underlying LiteLLM-shaped embedding call via ``input=[{
+        "image_url": {"url": "data:image/...;base64,..."}}]`` — providers
+        that support multimodal embedding (Voyage AI, Jina v3, OpenAI
+        multimodal embeddings, etc.) accept this shape natively. If
+        ``multimodal=False`` (operator opted into vision without
+        configuring a multimodal embedder), raises ``EmbeddingError`` —
+        the worker_factory chunk 4b gate already prevents this state but
+        the runtime check is a defense-in-depth that surfaces clear
+        diagnostics if the gate is bypassed.
+
+        ``alt_text`` is appended as a textual hint for embedders that
+        accept paired text+image inputs (improves retrieval recall on
+        images with meaningful captions / OCR'd alt-text). Embedders
+        that ignore it (image-only embedders) silently drop it.
+
+        Wave 6 follow-up: provider-specific multimodal embedding
+        configuration (per-provider input format, MIME-type detection,
+        size limits) is staged in the cross-cutting refactor batch (per
+        §K.10 Wave 6 backlog). Until then, deployments that configure a
+        multimodal embedder must verify their provider accepts the
+        ``input=[{"image_url": {"url": "data:..."}}]`` shape (LiteLLM-
+        documented multimodal-capable providers).
+        """
+        if not self.multimodal:
+            raise EmbeddingError(
+                "embed_image called on a non-multimodal EmbeddingService — "
+                "set multimodal=True on the collection's embedder spec or "
+                "set collection.config.enable_vision=false until a real "
+                "multimodal embedder is configured (Wave 5 P2 §G.2.5.1)"
+            )
+        if not image_bytes:
+            raise EmptyTextError(1)
+        try:
+            return self._embed_image_via_litellm(image_bytes=image_bytes, alt_text=alt_text)
+        except (EmptyTextError, EmbeddingError):
+            raise
+        except Exception as e:
+            logger.error(f"Image embedding failed: {str(e)}")
+            raise wrap_litellm_error(e, "embedding", self.embedding_provider, self.model) from e
+
+    async def aembed_image(self, image_bytes: bytes, alt_text: str = "") -> List[float]:
+        return await asyncio.to_thread(self.embed_image, image_bytes, alt_text)
+
+    def _embed_image_via_litellm(self, *, image_bytes: bytes, alt_text: str) -> List[float]:
+        """Underlying LiteLLM multimodal embedding call.
+
+        Encodes the image as base64 data URL + builds the LiteLLM
+        ``input`` payload. Provider-specific input shape variations are
+        Wave 6 follow-up (per §K.10 Wave 6 backlog cross-cutting
+        refactor). Currently uses the documented LiteLLM-shaped
+        ``[{"image_url": {"url": "data:..."}}]`` input that
+        multimodal-capable providers (Voyage / Jina v3 / OpenAI multi-
+        modal / etc.) accept natively.
+        """
+        import base64
+        import imghdr
+
+        from litellm import embedding as litellm_embedding
+
+        # Detect MIME type from the image bytes header (avoids relying
+        # on caller-provided alt_text format hints). Falls back to
+        # image/jpeg if detection fails — most providers tolerate
+        # mismatched MIME on data-URL inputs.
+        kind = imghdr.what(None, h=image_bytes) or "jpeg"
+        mime = f"image/{kind}"
+        b64 = base64.b64encode(image_bytes).decode("ascii")
+        data_url = f"data:{mime};base64,{b64}"
+
+        input_payload: List[dict[str, Any]] = [{"image_url": {"url": data_url}}]
+        if alt_text and alt_text.strip():
+            # Pair the image with text for embedders that accept multi-
+            # part inputs; embedders that ignore text simply drop it.
+            input_payload.append({"text": alt_text.strip()})
+
+        response = litellm_embedding(
+            model=f"{self.embedding_provider}/{self.model}" if self.embedding_provider else self.model,
+            input=input_payload,
+            api_key=self.api_key,
+            api_base=self.base_url,
+        )
+        # LiteLLM normalises response shape to OpenAI-style; pull the
+        # embedding from the first (and only) data element.
+        data = getattr(response, "data", None) or response.get("data")  # type: ignore[union-attr]
+        if not data:
+            raise EmbeddingError("multimodal embedding response missing data field")
+        first = data[0]
+        embedding = getattr(first, "embedding", None)
+        if embedding is None and isinstance(first, dict):
+            embedding = first.get("embedding")
+        if embedding is None:
+            raise EmbeddingError("multimodal embedding response missing embedding field")
+        return list(embedding)
+
     def _embed_batch_with_indices(self, batch: Sequence[str], start_idx: int) -> List[Tuple[int, List[float]]]:
         """Process a batch of texts and return embeddings with their original indices."""
         try:

From 55729ac70771dcc51c8aea21940cdb5d4c000b62 Mon Sep 17 00:00:00 2001
From: earayu <earayu@163.com>
Date: Mon, 27 Apr 2026 18:25:29 +0800
Subject: [PATCH 07/13] feat(celery Wave 5 P5A item 1): per-collection knobs
 for T1 graph extractor caps + timeout

Per huangheng T1 obs A msg=6b349693: surface the LightRAG-style
extractor's per-chunk caps and LLM timeout as collection-level
overrides (`KnowledgeGraphConfig.{max_entities_per_chunk,
max_relations_per_chunk, per_chunk_timeout_seconds}`) so deployments
tuning a slow LLM provider or extracting from entity-dense documents
can lift the defaults without patching `aperag/indexing/graph_extractor.py`
constants.

* `aperag/schema/common.py`: 3 new optional fields on KnowledgeGraphConfig.
* `aperag/indexing/graph_extractor.py`: `_resolve_int_kg_config` /
  `_resolve_float_kg_config` helpers (mirror `_resolve_entity_types`
  pydantic-attr / Mapping / JSON-string tolerance pattern + reject
  non-positive / non-numeric values with warning + default fallback);
  `_extract_one_chunk` now takes `timeout_seconds` kwarg and
  `asyncio.wait_for` wires it instead of the global constant.
* `tests/integration/test_graph_extractor.py`: 6 new unit tests pinning
  override-wins / fallback-on-missing / fallback-on-non-positive /
  int-coerced-to-float / fallback-on-garbage; fixed pre-existing
  `_extract_one_chunk` call to pass `timeout_seconds=60.0`.

Defaults unchanged (32 / 32 / 60.0); collections that don't set the
new fields keep current behavior.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 aperag/indexing/graph_extractor.py        | 83 ++++++++++++++++++++++-
 aperag/schema/common.py                   | 29 ++++++++
 tests/integration/test_graph_extractor.py | 60 ++++++++++++++++
 3 files changed, 169 insertions(+), 3 deletions(-)

diff --git a/aperag/indexing/graph_extractor.py b/aperag/indexing/graph_extractor.py
index c7bd21dfd..6b946711b 100644
--- a/aperag/indexing/graph_extractor.py
+++ b/aperag/indexing/graph_extractor.py
@@ -114,8 +114,11 @@ def build_collection_graph_extractor(collection: Any) -> GraphExtractor:
 
     entity_types = tuple(_resolve_entity_types(collection))
     language = _resolve_language(collection)
-    max_entities = _DEFAULT_MAX_ENTITIES_PER_CHUNK
-    max_relations = _DEFAULT_MAX_RELATIONS_PER_CHUNK
+    max_entities = _resolve_int_kg_config(collection, "max_entities_per_chunk", _DEFAULT_MAX_ENTITIES_PER_CHUNK)
+    max_relations = _resolve_int_kg_config(collection, "max_relations_per_chunk", _DEFAULT_MAX_RELATIONS_PER_CHUNK)
+    per_chunk_timeout = _resolve_float_kg_config(
+        collection, "per_chunk_timeout_seconds", _DEFAULT_PER_CHUNK_TIMEOUT_SECONDS
+    )
 
     async def _extractor(chunks: Sequence[dict[str, Any]]) -> tuple[list[EntityRecord], list[RelationRecord]]:
         """Run the LLM extractor over every chunk in the dispatch."""
@@ -138,6 +141,7 @@ async def _extractor(chunks: Sequence[dict[str, Any]]) -> tuple[list[EntityRecor
                     language=language,
                     max_entities=max_entities,
                     max_relations=max_relations,
+                    timeout_seconds=per_chunk_timeout,
                 )
             except Exception:  # noqa: BLE001 — per-chunk failure isolation
                 logger.exception(
@@ -168,6 +172,7 @@ async def _extract_one_chunk(
     language: str,
     max_entities: int,
     max_relations: int,
+    timeout_seconds: float,
 ) -> tuple[list[EntityRecord], list[RelationRecord]]:
     """Single-chunk extraction: render the prompt, call the LLM, parse
     the JSON response, return record lists.
@@ -186,7 +191,7 @@ async def _extract_one_chunk(
         max_entities=max_entities,
         max_relations=max_relations,
     )
-    raw = await asyncio.wait_for(llm(prompt), timeout=_DEFAULT_PER_CHUNK_TIMEOUT_SECONDS)
+    raw = await asyncio.wait_for(llm(prompt), timeout=timeout_seconds)
     return _parse_extraction_response(raw=raw, chunk_id=chunk_id)
 
 
@@ -335,6 +340,78 @@ def _resolve_language(collection: Any) -> str:
     return _DEFAULT_LANGUAGE
 
 
+def _resolve_kg_config_value(collection: Any, field: str) -> Any:
+    """Read ``collection.config.knowledge_graph_config.<field>`` tolerating
+    the pydantic-attr / Mapping / JSON-string shapes the DB row may take.
+    Returns ``None`` if any layer is missing — callers fall back to their
+    default constant."""
+    cfg = _resolve_config(collection)
+    if cfg is None:
+        return None
+    kg_config: Any = None
+    if hasattr(cfg, "knowledge_graph_config"):
+        kg_config = cfg.knowledge_graph_config
+    elif isinstance(cfg, Mapping):
+        kg_config = cfg.get("knowledge_graph_config")
+    if kg_config is None:
+        return None
+    if hasattr(kg_config, field):
+        return getattr(kg_config, field)
+    if isinstance(kg_config, Mapping):
+        return kg_config.get(field)
+    return None
+
+
+def _resolve_int_kg_config(collection: Any, field: str, default: int) -> int:
+    raw = _resolve_kg_config_value(collection, field)
+    if raw is None:
+        return default
+    try:
+        value = int(raw)
+    except (TypeError, ValueError):
+        logger.warning(
+            "graph extractor: knowledge_graph_config.%s=%r is not an int; falling back to default %d",
+            field,
+            raw,
+            default,
+        )
+        return default
+    if value <= 0:
+        logger.warning(
+            "graph extractor: knowledge_graph_config.%s=%d must be positive; falling back to default %d",
+            field,
+            value,
+            default,
+        )
+        return default
+    return value
+
+
+def _resolve_float_kg_config(collection: Any, field: str, default: float) -> float:
+    raw = _resolve_kg_config_value(collection, field)
+    if raw is None:
+        return default
+    try:
+        value = float(raw)
+    except (TypeError, ValueError):
+        logger.warning(
+            "graph extractor: knowledge_graph_config.%s=%r is not a float; falling back to default %s",
+            field,
+            raw,
+            default,
+        )
+        return default
+    if value <= 0:
+        logger.warning(
+            "graph extractor: knowledge_graph_config.%s=%s must be positive; falling back to default %s",
+            field,
+            value,
+            default,
+        )
+        return default
+    return value
+
+
 def _resolve_config(collection: Any) -> Any:
     cfg = getattr(collection, "config", None)
     if cfg is None:
diff --git a/aperag/schema/common.py b/aperag/schema/common.py
index 9eab01598..04d669e61 100644
--- a/aperag/schema/common.py
+++ b/aperag/schema/common.py
@@ -144,6 +144,35 @@ class KnowledgeGraphConfig(BaseModel):
         description="List of entity types to extract during graph indexing",
         examples=[["organization", "person", "geo", "event"]],
     )
+    # Wave 5 P5A item 1 (per huangheng T1 obs A msg=6b349693): expose
+    # the LightRAG-style extractor's per-chunk caps + timeout as
+    # per-collection overrides so deployments tuning a slow LLM provider
+    # or extracting from very dense chunks can lift the defaults without
+    # patching ``aperag/indexing/graph_extractor.py`` constants.
+    per_chunk_timeout_seconds: Optional[float] = Field(
+        None,
+        description=(
+            "Per-chunk LLM call timeout (seconds); the extractor uses 60s "
+            "default if unset. Lift for slow / large-context multimodal models."
+        ),
+        examples=[120.0],
+    )
+    max_entities_per_chunk: Optional[int] = Field(
+        None,
+        description=(
+            "Cap on entities the extractor accepts per chunk; default 32 "
+            "if unset. Raise for entity-dense documents (legal / scientific)."
+        ),
+        examples=[64],
+    )
+    max_relations_per_chunk: Optional[int] = Field(
+        None,
+        description=(
+            "Cap on relations the extractor accepts per chunk; default 32 "
+            "if unset. Raise alongside max_entities for relation-dense docs."
+        ),
+        examples=[64],
+    )
 
 
 class IndexPrompts(BaseModel):
diff --git a/tests/integration/test_graph_extractor.py b/tests/integration/test_graph_extractor.py
index 536cb32ea..4ba7ff744 100644
--- a/tests/integration/test_graph_extractor.py
+++ b/tests/integration/test_graph_extractor.py
@@ -154,6 +154,65 @@ class _Stub:
     assert ge._resolve_language(_Stub()) == "ja-JP"
 
 
+def test_resolve_int_kg_config_reads_override():
+    """Wave 5 P5A item 1: per-collection ``max_entities_per_chunk`` /
+    ``max_relations_per_chunk`` overrides should win over the module
+    defaults so deployments tune entity-dense documents without
+    patching constants."""
+
+    class _Stub:
+        config = {
+            "knowledge_graph_config": {
+                "max_entities_per_chunk": 64,
+                "max_relations_per_chunk": 128,
+            }
+        }
+
+    assert ge._resolve_int_kg_config(_Stub(), "max_entities_per_chunk", 32) == 64
+    assert ge._resolve_int_kg_config(_Stub(), "max_relations_per_chunk", 32) == 128
+
+
+def test_resolve_int_kg_config_falls_back_when_missing():
+    class _Stub:
+        config = {"knowledge_graph_config": {}}
+
+    assert ge._resolve_int_kg_config(_Stub(), "max_entities_per_chunk", 32) == 32
+
+
+def test_resolve_int_kg_config_rejects_non_positive():
+    class _Stub:
+        config = {"knowledge_graph_config": {"max_entities_per_chunk": 0}}
+
+    assert ge._resolve_int_kg_config(_Stub(), "max_entities_per_chunk", 32) == 32
+
+
+def test_resolve_float_kg_config_reads_override():
+    """``per_chunk_timeout_seconds`` override lifts the 60s default for
+    slow / large-context multimodal models per huangheng T1 obs A."""
+
+    class _Stub:
+        config = {"knowledge_graph_config": {"per_chunk_timeout_seconds": 180.0}}
+
+    assert ge._resolve_float_kg_config(_Stub(), "per_chunk_timeout_seconds", 60.0) == 180.0
+
+
+def test_resolve_float_kg_config_handles_int_value():
+    """JSON / pydantic may surface integers where floats are expected;
+    the resolver must coerce them rather than fall back."""
+
+    class _Stub:
+        config = {"knowledge_graph_config": {"per_chunk_timeout_seconds": 120}}
+
+    assert ge._resolve_float_kg_config(_Stub(), "per_chunk_timeout_seconds", 60.0) == 120.0
+
+
+def test_resolve_float_kg_config_falls_back_on_garbage():
+    class _Stub:
+        config = {"knowledge_graph_config": {"per_chunk_timeout_seconds": "fast"}}
+
+    assert ge._resolve_float_kg_config(_Stub(), "per_chunk_timeout_seconds", 60.0) == 60.0
+
+
 def test_extractor_skips_chunks_with_empty_text():
     """A chunks dict missing or with empty ``text`` should be skipped
     silently — the LLM isn't called for it. Pin via an extractor
@@ -190,6 +249,7 @@ async def _run() -> None:
                 language="en-US",
                 max_entities=8,
                 max_relations=8,
+                timeout_seconds=60.0,
             )
 
         assert len(calls) == 1, "LLM should be called only once (for the non-empty chunk)"

From 76bd9744d35bcf669c29c3c49ee8864e305294f0 Mon Sep 17 00:00:00 2001
From: earayu <earayu@163.com>
Date: Mon, 27 Apr 2026 18:26:22 +0800
Subject: [PATCH 08/13] =?UTF-8?q?feat(celery=20Wave=205=20P5B):=20P2=20bat?=
 =?UTF-8?q?ch=20=E2=80=94=20utc=5Fnow=20unify=20+=20tenant=20org-prefix=20?=
 =?UTF-8?q?forward-compat=20+=20OTLP=20config=20cross-check=20+=20cleanup?=
 =?UTF-8?q?=20builder=20share?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wave 5 Phase 5B (task #31) — 4 polish items from the Wave 4 ratify
trail accumulated obs (chunk_id schema unify deferred to a follow-up
once parser canonical schema lock lands):

* **P5B-A utc_now → CURRENT_TIMESTAMP unify** (per huangheng chunk 4a
  obs A): ``_LineageEntityRow`` + ``_LineageRelationRow``
  ``gmt_created`` / ``gmt_updated`` columns now use
  ``server_default=text("CURRENT_TIMESTAMP")`` mirroring the alembic
  migration declaration. Pre-Wave-5 ORM used ``default=utc_now``
  Python-side; alembic check passed because Postgres treats both as
  semantic-equivalent but the per-mirror discipline is stronger when
  both layers speak the same dialect (schema-touching follow-ups
  cannot drift undetected).

* **P5B-B tenant_scope_key org-prefix forward-compat** (per T3 chunk
  2 obs C + §H.2 forward-compat lock): new
  ``aperag.indexing.parse_orchestrator.resolve_tenant_scope_key``
  helper centralises the prefix construction. Pre-Wave-5 the prefix
  was hard-coded as ``f"user:{document.user}"`` at every callsite
  (upload handler, reconciler stuck-doc re-enqueue); the helper
  unifies the construction so a future Wave 6/7 organisation-tenant
  rollout flips one place. The org branch stays inert for Wave 5
  (Document/Collection schemas don't carry org_id yet) — the helper
  just makes the seam explicit.

* **P5B-C OTLP config cross-check** (per huangheng T6 chunk 1 obs):
  lifespan startup logs a clear warning when
  ``INDEXING_METRICS_EMITTER=otlp`` but
  ``APERAG_OBSERVABILITY_MODE`` is not set to ``otlp`` /
  ``collector``. Pre-Wave-5 this combination silently produced an
  ``OTLPMetricsEmitter`` whose ``MeterProvider`` was never installed
  by ``aperag.observability.metrics.init_metrics_provider`` —
  samples no-op'd silently, defeating the explicit
  ``INDEXING_METRICS_EMITTER=otlp`` opt-in.

* **P5B-D cleanup builder share helper** (per huangheng T2 obs B
  drift risk): new ``_build_collection_qdrant_connector(collection,
  *, allow_vector_size_fallback)`` shared helper used by
  ``_build_vector_worker`` / ``_build_summary_worker`` (dispatch,
  ``allow_vector_size_fallback=False``) AND
  ``_build_qdrant_cleanup_backend`` (cleanup,
  ``allow_vector_size_fallback=True``). Pre-Wave-5 dispatch and
  cleanup paths duplicated the embedder + connector wiring; a
  future Qdrant adapter signature change had to be applied twice.
  ``_build_vision_worker`` keeps its pre-helper shape — its
  ``is_multimodal()`` gate must fire BEFORE any network call to
  preserve the Wave 4 chunk 4b "fail fast on non-multimodal embedder"
  invariant the existing test pins.

Three-class tag (Wave 3 production-readiness invariant):
* must-be-real: 4 polish items each tighten a real production seam
  (alembic mirror discipline / org-prefix forward-compat / OTLP
  config consistency / cleanup-vs-dispatch drift surface)
* may-be-gated: org-prefix branch stays inert until org_id columns
  land (declared in helper docstring)
* fully-resolves: 4 of the 5 P5B chenyexuan-batch backlog items.
  ``chunk_id`` schema unify is left to a follow-up — parser
  canonical schema lock first needs the §C.x amend; touching the
  ``or chunk.get("id")`` fallback now without that lock would
  introduce drift in the opposite direction.

Deltas:
* ``aperag/indexing/graph_storage/postgres.py`` —
  ``_LineageEntityRow`` + ``_LineageRelationRow`` switched to
  ``server_default=CURRENT_TIMESTAMP``; ``utc_now`` import dropped.
* ``aperag/indexing/parse_orchestrator.py`` —
  ``resolve_tenant_scope_key(*, document, collection)`` helper +
  re-export.
* ``aperag/domains/knowledge_base/service/document_service.py`` —
  ``_create_or_update_document_indexes`` calls
  ``resolve_tenant_scope_key`` in place of the inline
  ``f"user:{document.user}"``.
* ``aperag/indexing/reconciler.py`` —
  ``_build_parse_payload_for_document`` calls the same helper for
  the stuck-document re-enqueue producer (consistency with upload
  handler).
* ``aperag/app.py`` — lifespan logs the OTLP-vs-observability-mode
  cross-check warning before constructing
  ``OTLPMetricsEmitter``.
* ``aperag/indexing/worker_factory.py`` —
  ``_build_collection_qdrant_connector`` shared helper +
  ``_build_vector_worker`` / ``_build_summary_worker`` /
  ``_build_qdrant_cleanup_backend`` reuse it; vision keeps its
  multimodal-gate-first shape.

Local gates:
* ``pytest tests/unit_test/indexing/ tests/integration/`` — 232
  passed / 48 skipped (no regression).
* ``ruff check aperag/ tests/integration/`` — clean.
* ``ruff format`` — applied.

Branch: ``chenyexuan/celery-wave5-p4`` rebased on
``bryce/celery-wave5`` HEAD ``4f0e9b0`` (P2 chunk 1 embed_image
API surface) → push to ``bryce/celery-wave5`` for Wave 5 single-PR
pile.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 aperag/app.py                                 | 23 ++++++
 .../service/document_service.py               |  4 +-
 aperag/indexing/graph_storage/postgres.py     | 38 +++++++--
 aperag/indexing/parse_orchestrator.py         | 52 ++++++++++++
 aperag/indexing/reconciler.py                 |  4 +-
 aperag/indexing/worker_factory.py             | 79 +++++++++++--------
 6 files changed, 159 insertions(+), 41 deletions(-)

diff --git a/aperag/app.py b/aperag/app.py
index 3b8e52dc6..3aa4ae5bf 100644
--- a/aperag/app.py
+++ b/aperag/app.py
@@ -269,6 +269,29 @@ async def combined_lifespan(app: FastAPI):
         # — otherwise queue-backlog / failure-rate alerts on the
         # collector side never receive data.
         if settings.indexing_metrics_emitter.lower() == "otlp":
+            # Wave 5 P5B: cross-check that the broader observability
+            # mode is also OTLP-shaped — operators that flip
+            # ``INDEXING_METRICS_EMITTER=otlp`` without configuring
+            # the parent ``APERAG_OBSERVABILITY_MODE`` end up with
+            # an :class:`OTLPMetricsEmitter` whose underlying
+            # ``MeterProvider`` was never installed by
+            # ``aperag.observability.metrics.init_metrics_provider``.
+            # The samples then no-op silently — the same operator-
+            # visible failure mode we explicitly avoided when
+            # making ``noop`` the default.
+            obs_mode = (settings.aperag_observability_mode or "").lower()
+            if obs_mode not in ("otlp", "collector"):
+                import logging as _logging
+
+                _logging.getLogger(__name__).warning(
+                    "INDEXING_METRICS_EMITTER=otlp but APERAG_OBSERVABILITY_MODE=%r "
+                    "(expected 'otlp' or 'collector') — the OTLP MeterProvider "
+                    "may not be installed, indexing SLI samples will silently "
+                    "no-op. Set APERAG_OBSERVABILITY_MODE=otlp to enable real "
+                    "OTLP export, OR revert INDEXING_METRICS_EMITTER=noop to "
+                    "make the gap explicit.",
+                    settings.aperag_observability_mode,
+                )
             metrics_emitter = OTLPMetricsEmitter()
         else:
             metrics_emitter = NoopMetricsEmitter()
diff --git a/aperag/domains/knowledge_base/service/document_service.py b/aperag/domains/knowledge_base/service/document_service.py
index 48e730949..427cbae63 100644
--- a/aperag/domains/knowledge_base/service/document_service.py
+++ b/aperag/domains/knowledge_base/service/document_service.py
@@ -189,12 +189,14 @@ async def _create_or_update_document_indexes(
         return
 
     parser_config = await _resolve_parser_config_for_collection(document.collection_id, session)
+    collection = await session.get(Collection, document.collection_id) if document.collection_id else None
+    from aperag.indexing.parse_orchestrator import resolve_tenant_scope_key
 
     payload = ParseDispatchPayload(
         document_id=document.id,
         collection_id=document.collection_id,
         object_path=object_path,
-        tenant_scope_key=f"user:{document.user}",
+        tenant_scope_key=resolve_tenant_scope_key(document=document, collection=collection),
         modalities=tuple(m.value for m in index_types),
         parser_config=parser_config,
         purge_existing_triples=True,
diff --git a/aperag/indexing/graph_storage/postgres.py b/aperag/indexing/graph_storage/postgres.py
index 9a71dc7d7..c7ecf7efb 100644
--- a/aperag/indexing/graph_storage/postgres.py
+++ b/aperag/indexing/graph_storage/postgres.py
@@ -77,7 +77,10 @@
     RelationRecord,
     RelationWithLineage,
 )
-from aperag.utils.utils import utc_now
+
+# utc_now is no longer needed: Wave 5 P5B switched ORM gmt_* columns
+# to ``server_default=CURRENT_TIMESTAMP`` mirroring the alembic
+# migration declaration.
 
 logger = logging.getLogger(__name__)
 
@@ -116,8 +119,23 @@ class _LineageEntityRow(_LineageGraphBase):
     type = Column(String(64), nullable=False)
     source_lineage = Column(JSONB, nullable=False, server_default=text("'[]'::jsonb"))
     description_parts = Column(JSONB, nullable=False, server_default=text("'[]'::jsonb"))
-    gmt_created = Column(DateTime(timezone=True), default=utc_now, nullable=False)
-    gmt_updated = Column(DateTime(timezone=True), default=utc_now, nullable=False)
+    # Wave 5 P5B: ORM uses the same ``server_default=CURRENT_TIMESTAMP``
+    # the alembic migration declares — strict ORM↔migration mirror so
+    # ``alembic check`` cannot drift on schema-touching follow-ups.
+    # Pre-Wave-5 ORM used ``default=utc_now`` Python-side; alembic
+    # check passed because Postgres treats both as semantic-equivalent
+    # but the per-mirror discipline is stronger when both layers
+    # speak the same dialect.
+    gmt_created = Column(
+        DateTime(timezone=True),
+        nullable=False,
+        server_default=text("CURRENT_TIMESTAMP"),
+    )
+    gmt_updated = Column(
+        DateTime(timezone=True),
+        nullable=False,
+        server_default=text("CURRENT_TIMESTAMP"),
+    )
 
 
 class _LineageRelationRow(_LineageGraphBase):
@@ -140,8 +158,18 @@ class _LineageRelationRow(_LineageGraphBase):
     # standalone ``description`` field so the column had no consumer.
     evidence_lineage = Column(JSONB, nullable=False, server_default=text("'[]'::jsonb"))
     description_parts = Column(JSONB, nullable=False, server_default=text("'[]'::jsonb"))
-    gmt_created = Column(DateTime(timezone=True), default=utc_now, nullable=False)
-    gmt_updated = Column(DateTime(timezone=True), default=utc_now, nullable=False)
+    # Wave 5 P5B: same ORM↔migration mirror discipline as
+    # ``_LineageEntityRow`` above.
+    gmt_created = Column(
+        DateTime(timezone=True),
+        nullable=False,
+        server_default=text("CURRENT_TIMESTAMP"),
+    )
+    gmt_updated = Column(
+        DateTime(timezone=True),
+        nullable=False,
+        server_default=text("CURRENT_TIMESTAMP"),
+    )
 
 
 # ---------------------------------------------------------------------
diff --git a/aperag/indexing/parse_orchestrator.py b/aperag/indexing/parse_orchestrator.py
index dc908b253..ca71dddc6 100644
--- a/aperag/indexing/parse_orchestrator.py
+++ b/aperag/indexing/parse_orchestrator.py
@@ -442,6 +442,57 @@ async def run_parse_worker(
     )
 
 
+def resolve_tenant_scope_key(*, document: Any, collection: Any = None) -> str:
+    """Wave 5 P5B forward-compat helper for tenant_scope_key construction.
+
+    Centralises the prefix logic so all enqueue producers (upload
+    handler, reconciler stuck-doc re-enqueue, future org-tenant
+    bootstrap) share the same source of truth. Pre-Wave-5 the prefix
+    was hard-coded as ``f"user:{document.user}"`` at every callsite —
+    this helper unifies the construction so a future Wave 6/7
+    organisation-tenant rollout flips one place.
+
+    Resolution order:
+
+    1. ``collection.config.tenant_scope_prefix == "org"`` AND
+       ``document.org_id`` (or ``collection.org_id``) populated →
+       ``f"org:<org_id>"``
+    2. Otherwise → ``f"user:<document.user>"`` (Wave 4 default,
+       per §H.2 forward-compat)
+
+    The org branch stays inert for Wave 5 because ``Document`` /
+    ``Collection`` schemas do not yet carry ``org_id``; the helper
+    just *makes the seam explicit* so a future PR landing the org
+    column can flip behaviour without grepping for hard-coded
+    ``user:`` prefixes.
+    """
+    org_prefix_active = False
+    org_id: str | None = None
+    if collection is not None:
+        config_obj = getattr(collection, "config", None)
+        if config_obj is not None:
+            cfg_dict: dict[str, Any] | None = None
+            if isinstance(config_obj, str):
+                try:
+                    cfg_dict = json.loads(config_obj)
+                except (TypeError, ValueError):
+                    cfg_dict = None
+            elif isinstance(config_obj, Mapping):
+                cfg_dict = dict(config_obj)
+            elif hasattr(config_obj, "tenant_scope_prefix"):
+                cfg_dict = {"tenant_scope_prefix": getattr(config_obj, "tenant_scope_prefix", None)}
+            if cfg_dict and cfg_dict.get("tenant_scope_prefix") == "org":
+                org_prefix_active = True
+        if not org_id:
+            org_id = getattr(collection, "org_id", None)
+    if not org_id:
+        org_id = getattr(document, "org_id", None)
+
+    if org_prefix_active and org_id:
+        return f"org:{org_id}"
+    return f"user:{getattr(document, 'user', '')}"
+
+
 __all__ = [
     "DEFAULT_PARSE_CONCURRENCY",
     "DEFAULT_POLL_TIMEOUT_SECONDS",
@@ -449,6 +500,7 @@ async def run_parse_worker(
     "ParseDispatchPayload",
     "ParseOrchestratorConfig",
     "process_one_parse_task",
+    "resolve_tenant_scope_key",
     "run_parse_worker",
     "run_parse_worker_loop",
 ]
diff --git a/aperag/indexing/reconciler.py b/aperag/indexing/reconciler.py
index 997abc8f0..9727b3c92 100644
--- a/aperag/indexing/reconciler.py
+++ b/aperag/indexing/reconciler.py
@@ -581,11 +581,13 @@ def _build_parse_payload_for_document(session: Session, document):
         )
         return None
 
+    from aperag.indexing.parse_orchestrator import resolve_tenant_scope_key
+
     return ParseDispatchPayload(
         document_id=document.id,
         collection_id=document.collection_id or "",
         object_path=str(object_path),
-        tenant_scope_key=f"user:{document.user}",
+        tenant_scope_key=resolve_tenant_scope_key(document=document, collection=collection),
         modalities=tuple(modalities),
         parser_config=parser_config,
         purge_existing_triples=True,
diff --git a/aperag/indexing/worker_factory.py b/aperag/indexing/worker_factory.py
index 8e36446d1..c1f76f54d 100644
--- a/aperag/indexing/worker_factory.py
+++ b/aperag/indexing/worker_factory.py
@@ -232,14 +232,9 @@ def _build_vector_worker(*, collection: Any, object_store: Any) -> ModalityWorke
     """Wire :class:`VectorModality` to a real Qdrant collection +
     real EmbeddingService for the collection's configured model.
     """
-    from aperag.config import get_vector_db_connector
     from aperag.indexing.vector import VectorModality
-    from aperag.llm.embed.base_embedding import get_collection_embedding_service_sync
-    from aperag.utils.utils import generate_vector_db_collection_name
 
-    embedding_service, vector_size = get_collection_embedding_service_sync(collection)
-    qdrant_collection = generate_vector_db_collection_name(collection.id)
-    adaptor = get_vector_db_connector(qdrant_collection, vector_size=vector_size)
+    adaptor, embedding_service, _ = _build_collection_qdrant_connector(collection)
     backend = _QdrantPointBackend(connector=adaptor.connector)
 
     def _embed(text: str) -> list[float]:
@@ -390,14 +385,9 @@ def _build_summary_worker(*, collection: Any, object_store: Any) -> ModalityWork
     model; the embedder is the same one vector uses (one model per
     collection, shared across modalities).
     """
-    from aperag.config import get_vector_db_connector
     from aperag.indexing.summary import SummaryModality
-    from aperag.llm.embed.base_embedding import get_collection_embedding_service_sync
-    from aperag.utils.utils import generate_vector_db_collection_name
 
-    embedding_service, vector_size = get_collection_embedding_service_sync(collection)
-    qdrant_collection = generate_vector_db_collection_name(collection.id)
-    adaptor = get_vector_db_connector(qdrant_collection, vector_size=vector_size)
+    adaptor, embedding_service, _ = _build_collection_qdrant_connector(collection)
     backend = _QdrantPointBackend(connector=adaptor.connector)
 
     summarizer = _build_collection_summarizer(collection)
@@ -444,6 +434,11 @@ def _build_vision_worker(*, collection: Any, object_store: Any) -> ModalityWorke
     from aperag.llm.embed.base_embedding import get_collection_embedding_service_sync
     from aperag.utils.utils import generate_vector_db_collection_name
 
+    # Vision keeps the multimodal gate ABOVE the connector wiring so a
+    # non-multimodal embedder fails fast without a network round-trip.
+    # The shared :func:`_build_collection_qdrant_connector` helper used
+    # by vector / summary calls the connector first; reusing it here
+    # would re-order the gate after the network call.
     embedding_service, vector_size = get_collection_embedding_service_sync(collection)
     if not embedding_service.is_multimodal():
         raise WorkerFactoryError(
@@ -452,7 +447,6 @@ def _build_vision_worker(*, collection: Any, object_store: Any) -> ModalityWorke
             "set collection.config.enable_vision=false until Wave 4 lands "
             "OR configure a multimodal embedding model on the collection's embedding spec"
         )
-
     qdrant_collection = generate_vector_db_collection_name(collection.id)
     adaptor = get_vector_db_connector(qdrant_collection, vector_size=vector_size)
     backend = _QdrantPointBackend(connector=adaptor.connector)
@@ -1033,18 +1027,23 @@ async def sync(
         raise NotImplementedError("CleanupWorkerView.sync must not be called — cleanup-only shape")
 
 
-def _build_qdrant_cleanup_backend(collection: Any) -> Any:
-    """Construct the Qdrant ``_backend`` adapter without any modality-
-    specific gate (used for cleanup of vector / summary / vision).
-
-    The full ``_build_vector_worker`` chain calls
-    :func:`get_collection_embedding_service_sync` to size the Qdrant
-    collection; we duplicate that minimal step here so a collection
-    whose embedder config is broken (a Wave 3 lesson #10 case) still
-    deletes its points instead of silently leaking. If the embedding
-    service cannot be resolved we still need ``vector_size`` to
-    address the right collection — fall back to the connector's
-    introspection of the existing collection.
+def _build_collection_qdrant_connector(collection: Any, *, allow_vector_size_fallback: bool = False) -> Any:
+    """Wave 5 P5B shared helper — resolves the per-collection Qdrant
+    connector for both dispatch (``_build_vector_worker`` /
+    ``_build_summary_worker`` / ``_build_vision_worker``) and cleanup
+    (``_build_qdrant_cleanup_backend``).
+
+    Pre-Wave-5 the dispatch chain and cleanup chain duplicated this
+    block; a future Qdrant adapter / embedder signature change had to
+    be applied twice. Centralising in one helper drops the drift risk
+    surface huangheng flagged on T2 ratify (msg=6aa8ca88 obs B).
+
+    ``allow_vector_size_fallback`` (cleanup-only) tolerates an
+    embedder resolve failure by reporting ``vector_size=0`` — the
+    connector still addresses the right collection by name and
+    ``delete_by_filter`` does not touch the vector dimension. Dispatch
+    callers pass ``False`` so an embedder fault surfaces as
+    :class:`WorkerFactoryError` immediately.
     """
     from aperag.config import get_vector_db_connector
     from aperag.llm.embed.base_embedding import get_collection_embedding_service_sync
@@ -1052,22 +1051,34 @@ def _build_qdrant_cleanup_backend(collection: Any) -> Any:
 
     qdrant_collection = generate_vector_db_collection_name(collection.id)
     try:
-        _, vector_size = get_collection_embedding_service_sync(collection)
-    except Exception as exc:  # noqa: BLE001
-        # The embedder is irrelevant for ``delete_by_filter``; the
-        # connector only needs ``vector_size`` to validate against
-        # the existing collection on the Qdrant side. Fall back to a
-        # benign size — the connector will still address the right
-        # collection by name and the delete-by-filter call does not
-        # touch the vector dimension.
+        embedding_service, vector_size = get_collection_embedding_service_sync(collection)
+    except Exception as exc:
+        if not allow_vector_size_fallback:
+            raise
         logger.warning(
-            "cleanup vector backend: embedder resolve failed for collection=%s (%s); "
+            "qdrant connector: embedder resolve failed for collection=%s (%s); "
             "falling back to size=0 for delete-only operations",
             getattr(collection, "id", "<unknown>"),
             exc,
         )
+        embedding_service = None
         vector_size = 0
     adaptor = get_vector_db_connector(qdrant_collection, vector_size=vector_size)
+    return adaptor, embedding_service, vector_size
+
+
+def _build_qdrant_cleanup_backend(collection: Any) -> Any:
+    """Construct the Qdrant ``_backend`` adapter for cleanup paths.
+
+    Reuses :func:`_build_collection_qdrant_connector` with the
+    cleanup-only ``allow_vector_size_fallback=True`` shape so a
+    collection whose embedder config is broken (a Wave 3 lesson #10
+    case) still has its points deleted instead of leaking.
+    """
+    adaptor, _embedder, _vector_size = _build_collection_qdrant_connector(
+        collection,
+        allow_vector_size_fallback=True,
+    )
     return _QdrantPointBackend(connector=adaptor.connector)
 
 

From e85c77bb65afb7162703d9c49bedcff13b4029a2 Mon Sep 17 00:00:00 2001
From: earayu <earayu@163.com>
Date: Mon, 27 Apr 2026 18:29:40 +0800
Subject: [PATCH 09/13] feat(celery Wave 5 P5A item 4): Neo4j label namespace
 prefix `aperag_`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per §K.9.1 P5A item 14 + design pack §K.9 Phase 5A item 4:
deployments that share a Neo4j instance with user-owned graphs would
collide on the generic `LineageEntity` / `LineageRelation` labels.
Bump them to `aperag_LineageEntity` / `aperag_LineageRelation` and
align constraint names (`aperag_lineage_entity_collection_name_unique`
/ `aperag_lineage_relation_collection_triple_unique`).

Hard-cut second round (no data migration): Wave 4 graph indexing
defaulted `enable_knowledge_graph=False`, so no production deployment
has data on the legacy unprefixed labels. Postgres / Nebula backends
are unaffected (table names already namespaced via `aperag_lineage_*`
schema in alembic migration `e7a3b9c2d1f6`).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 aperag/indexing/graph_storage/neo4j.py | 38 +++++++++++++++++---------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/aperag/indexing/graph_storage/neo4j.py b/aperag/indexing/graph_storage/neo4j.py
index a37cc5d26..bcc432fe6 100644
--- a/aperag/indexing/graph_storage/neo4j.py
+++ b/aperag/indexing/graph_storage/neo4j.py
@@ -25,13 +25,15 @@
 without requiring APOC.
 
 Storage layout — one row per entity / relation, modelled as a label
-``LineageEntity`` / ``LineageRelation`` node (relations are NOT modelled
-as edges here because the §D.3 Protocol does not require traversal —
-that is a separate retrieval concern owned by the legacy ``GraphStore``;
-keeping relations as their own labelled nodes mirrors the Postgres
-two-table schema exactly):
-
-    (:LineageEntity {
+``aperag_LineageEntity`` / ``aperag_LineageRelation`` node (relations
+are NOT modelled as edges here because the §D.3 Protocol does not
+require traversal — that is a separate retrieval concern owned by the
+legacy ``GraphStore``; keeping relations as their own labelled nodes
+mirrors the Postgres two-table schema exactly). The ``aperag_`` label
+prefix (Wave 5 P5A item 4) prevents collisions with user-owned graphs
+in shared Neo4j deployments.
+
+    (:aperag_LineageEntity {
         collection_id, name, type,
         source_lineage,                  -- list<string>, JSON-encoded LineageMember
         source_lineage_doc_ids,          -- list<string>, parallel index, dedup + strip-by-doc filter
@@ -42,7 +44,7 @@
         gmt_created, gmt_updated
     })
 
-    (:LineageRelation {
+    (:aperag_LineageRelation {
         collection_id, source, target, type,
         evidence_lineage,
         evidence_lineage_doc_ids,
@@ -117,11 +119,21 @@
 logger = logging.getLogger(__name__)
 
 
-_ENTITY_LABEL = "LineageEntity"
-_RELATION_LABEL = "LineageRelation"
-
-_ENTITY_CONSTRAINT_NAME = "lineage_entity_collection_name_unique"
-_RELATION_CONSTRAINT_NAME = "lineage_relation_collection_triple_unique"
+# Wave 5 P5A item 4 (per `feedback_production_readiness_invariant.md`
+# multi-tenant Neo4j hygiene): namespace the lineage labels with an
+# ``aperag_`` prefix so deployments that share a Neo4j instance with
+# user-owned graphs don't collide on a generic ``LineageEntity`` /
+# ``LineageRelation`` label. Constraint names get the same prefix so
+# ``ensure_schema`` is idempotent against fresh and prefixed-but-already-
+# created clusters alike. Wave 4 graph indexing was gated behind
+# ``enable_knowledge_graph=False`` by default so there is no production
+# data on the legacy unprefixed labels — no data migration needed for
+# the hard-cut second-round bump.
+_ENTITY_LABEL = "aperag_LineageEntity"
+_RELATION_LABEL = "aperag_LineageRelation"
+
+_ENTITY_CONSTRAINT_NAME = "aperag_lineage_entity_collection_name_unique"
+_RELATION_CONSTRAINT_NAME = "aperag_lineage_relation_collection_triple_unique"
 
 
 # ---------------------------------------------------------------------

From ba1cd0e3edef164771f3a134bbd9e6c1f8b390a6 Mon Sep 17 00:00:00 2001
From: earayu <earayu@163.com>
Date: Mon, 27 Apr 2026 18:31:12 +0800
Subject: [PATCH 10/13] docs(celery Wave 5 P5A close-out): items 2/3/5 deferred
 to Wave 6 with explicit reasoning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per Wave 5 P5A scope close-out 2026-04-27:

* Item 2 (chunk 4b _no_op_extractor identity check → attribute marker)
  is N/A: the placeholder was deleted in Wave 4 T1 (`19d3d70f`) when
  `build_collection_graph_extractor` replaced `_no_op_extractor`. There
  is no surviving identity-check site to harden — moot in current code.

* Item 3 (W5-perf-graph-lineage parallel-list O(N) cross-backend) is
  deferred to Wave 6: Postgres needs JSONB → text[] migration, Nebula
  needs tag re-model, plus alembic column-shape change. >10k
  docs/entity is not a Wave 5 acceptance criterion; trigger when
  observed in real deployments.

* Item 5 (Cypher type keyword rename `n.type` → `entity_type` /
  `relation_type`) is deferred to Wave 6: cross-backend rename
  (Postgres column + alembic; Cypher property rename; Nebula tag-prop
  rename) plus §D.3 Protocol-surface change (`EntityRecord.type` →
  `EntityRecord.entity_type` etc.). Cypher `TYPE()` is technically only
  for relationships so current code is unambiguous — forward-compat
  hygiene rather than correctness fix.

§K.9 Phase 5A acceptance lines amended to reflect ship status (items
1+4 shipped; 2 N/A; 3+5 → Wave 6). §K.10 Wave 6 backlog extended with
items 6+7 covering the deferred cross-backend rewrites.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../indexing-redesign-design-pack.md          | 31 +++++++++++++++----
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/docs/modularization/indexing-redesign-design-pack.md b/docs/modularization/indexing-redesign-design-pack.md
index e3fe8e17c..9ae4a6604 100644
--- a/docs/modularization/indexing-redesign-design-pack.md
+++ b/docs/modularization/indexing-redesign-design-pack.md
@@ -1701,13 +1701,13 @@ per chunk 4d+4e ratify msg=c279a0ff + sweep D Layer 2 stub (`4d36c7fb`):
 
 5 commits in same phase batch (Bryce ownership per Wave 4 chunk 4 / T1 / T8 expertise):
 
-1. **T1 graph extractor per-collection config tunability** (per huangheng T1 obs A): `collection.config.knowledge_graph_config.{per_chunk_timeout, max_entities_per_chunk, max_relations_per_chunk}` per-collection override
-2. **chunk 4b `_no_op_extractor` identity check → attribute marker** (per huangheng chunk 4b obs B): `getattr(extractor, 'is_no_op', False)` more robust pattern
-3. **W5-perf-graph-lineage parallel-list O(N) alternative encoding** for high-cardinality entities (>10k docs/entity, per architect msg=39a74026)
-4. **W5-neo4j-label-namespace prefix `aperag_LineageEntity` / `aperag_LineageRelation`** to avoid user-namespace collision
-5. **W5-cypher-type-keyword rename `n.type` property** (Cypher `TYPE()` keyword shadow); cross-backend rename also in Postgres/Nebula
+1. **T1 graph extractor per-collection config tunability** (per huangheng T1 obs A): `collection.config.knowledge_graph_config.{per_chunk_timeout, max_entities_per_chunk, max_relations_per_chunk}` per-collection override — **shipped `f5e454ed`**
+2. ~~chunk 4b `_no_op_extractor` identity check → attribute marker~~ — **N/A: resolved by Wave 4 T1 land**. The placeholder was deleted when `build_collection_graph_extractor` replaced `_no_op_extractor` (see commit `19d3d70f`); there is no surviving identity-check site to harden, so this item is moot in current code.
+3. ~~W5-perf-graph-lineage parallel-list O(N) alternative encoding~~ — **deferred to Wave 6**. Cross-backend perf rewrite touches Postgres / Nebula schema (Postgres switching from JSONB-array-of-objects to parallel `text[]` columns; Nebula re-modelling tags) plus alembic migration. High-cardinality (>10k docs/entity) is not a Wave 5 acceptance criterion — defer to dedicated perf wave once observed in real deployments.
+4. **W5-neo4j-label-namespace prefix `aperag_LineageEntity` / `aperag_LineageRelation`** to avoid user-namespace collision — **shipped `42b5fa6a`** (Neo4j-only constant + comment rename, no schema migration needed since Wave 4 default-gated graph indexing meant no production data on legacy labels).
+5. ~~W5-cypher-type-keyword rename `n.type` property~~ — **deferred to Wave 6**. Although `TYPE(n)` is a Cypher built-in for relationships not nodes (so node-property `n.type` is technically unambiguous), a forward-compat rename across all 3 backends (Postgres column rename + alembic migration + Cypher property rename + Nebula tag-prop rename + `EntityRecord.type` / `RelationRecord.type` Protocol surface) is non-trivial. Keep current naming until Wave 6 dedicated cross-backend rename batch.
 
-**fully-resolves**: §K.8 Wave 5 backlog items 5 + 12 (其中 12 fold 进 PR-A T7 multimodal 实施 if applicable)
+**fully-resolves**: §K.8 P5A items 1 + 4 (items 2 / 3 / 5 redirected as noted)
 
 #### Phase 5B (post-Phase-4, chenyexuan batch) — P2 batch infra + cleanup polish
 
@@ -1802,6 +1802,25 @@ PM (燧木) 决定。架构师建议参考 D10 模式：
    - ``tests/integration/compat/test_graph_compat.py``
 5. **Final grep-zero verify**: post-Wave-6, ``aperag/`` 全树无 ``from aperag.domains.knowledge_graph.graphindex`` import
 
+**Cross-backend lineage polish folded from Wave 5 P5A** (per Wave 5 P5A close-out
+2026-04-27):
+
+6. **W5-perf-graph-lineage parallel-list O(N) cross-backend** — Postgres
+   switches the JSONB-array-of-objects to parallel ``text[]`` columns
+   (matching Neo4j parallel-list encoding); Nebula re-models tags to
+   parallel-list semantics; alembic migration for the column-shape
+   change. Trigger when a deployment observes >10k docs/entity (per
+   architect msg=39a74026 high-cardinality threshold).
+7. **W5-cypher-type-keyword cross-backend rename** — rename
+   ``EntityRecord.type`` / ``RelationRecord.type`` (and
+   ``EntityWithLineage`` / ``RelationWithLineage``) to ``entity_type`` /
+   ``relation_type`` across the §D.3 Protocol; Postgres column rename via
+   alembic; Cypher ``n.type`` / ``r.type`` rewrite; Nebula tag-prop
+   rename. Forward-compat hygiene (Cypher ``TYPE(r)`` is for relationships
+   only so current code is technically unambiguous, but Protocol-surface
+   rename frees future use of ``TYPE`` as a true Cypher keyword without
+   ambiguity).
+
 **production-readiness 三类 layer**:
 - must-be-real: 新 LightRAG-style query layer ships behavior-equivalent retrieval (existing graph search results within ε tolerance)
 - may-be-gated: 无 (full hard-cut)

From f15e504eb9ec350cceea1c9e56b1ea36922018f8 Mon Sep 17 00:00:00 2001
From: earayu <earayu@163.com>
Date: Mon, 27 Apr 2026 18:46:46 +0800
Subject: [PATCH 11/13] =?UTF-8?q?feat(celery=20Wave=205=20P2=20chunk=202):?=
 =?UTF-8?q?=20parser=20image=20extraction=20=E2=86=92=20derived/parse=5F<v?=
 =?UTF-8?q?>/vision/{images,source.jsonl}?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per §G.2.5.1 spec amend item 2: when DocParser produces `AssetBinPart`
payloads (PDF page images, single-image-input passthrough, data-URI
extracted images), the parser writes each image blob to
`derived/parse_<v>/vision/images/<image_id>.<ext>` and lands a
`vision/source.jsonl` descriptor enumerating them. The vision worker
(chunk 4) consumes the descriptor instead of the T1 simulator's
synthetic `images.json` companion.

`aperag/indexing/parser.py`:
* `_docparser_extract_markdown` now returns
  `tuple[str, list[_VisionImageAsset]]` — markdown body unchanged plus
  the extracted image assets list. Non-image `AssetBinPart`s (audio /
  PDF data) drop. Duplicate `asset_id`s deduplicated to keep
  Qdrant-point-id stable.
* `_VisionImageAsset` carrier holds (`image_id`, `data`, `mime_type`,
  `alt_text`, `page_idx`, `bbox`).
* `_vision_image_extension(mime_type)` maps known image MIMEs to a
  filename extension; falls back to `.bin` for unknown types since
  vision worker uses `imghdr` on bytes at embed time.
* `_write_vision_assets` persists each blob via `write_atomic` and
  writes the JSONL descriptor.
* `ParseResult.vision_source_path` and `vision_image_count` are new
  optional fields (defaults `""` / `0`) so pre-Wave-5 callers see no
  behaviour change. Image-only inputs (no markdown emitted) still land
  their assets so vision modality has bytes to embed.

`tests/unit_test/indexing/test_parser_image_extraction.py`: 9 unit tests
covering MIME → extension mapping, descriptor schema, simulator-path
no-op, image-only input asset persistence, unknown-MIME `.bin`
fallback, and the new `ParseResult` fields. Tests monkeypatch
`_docparser_extract_markdown` to avoid pulling MinerU / MarkItDown
into the unit-test path.

Production-readiness 三类:
- must-be-real: real `AssetBinPart` extraction wired through DocParser
- may-be-gated: descriptor + image artefacts only land when DocParser
  surfaces `AssetBinPart`s (image-less docs see no vision/ writes)
- fully-resolves: §G.2.5.1 spec item 2 (parser image extraction) +
  Wave 5 P2 chunk 2 acceptance

The vision worker callsite rewrite (chunk 4) and provider v3 multimodal
flag UI (chunk 3) remain. Defaults preserve existing behaviour for
text-only callers.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 aperag/indexing/parser.py                     | 198 +++++++++++-
 .../indexing/test_parser_image_extraction.py  | 291 ++++++++++++++++++
 2 files changed, 473 insertions(+), 16 deletions(-)
 create mode 100644 tests/unit_test/indexing/test_parser_image_extraction.py

diff --git a/aperag/indexing/parser.py b/aperag/indexing/parser.py
index f62d8edac..b1648c19c 100644
--- a/aperag/indexing/parser.py
+++ b/aperag/indexing/parser.py
@@ -166,15 +166,28 @@ class ParseResult:
     """Outcome of :func:`parse_document`.
 
     ``parse_version`` pins the (parser, content, chunking) triple so
-    callers can persist it on the ``DocumentIndex`` row. The three
-    artifact paths are the canonical names downstream modalities
-    expect to find under ``derived/parse_<version>/``.
+    callers can persist it on the ``DocumentIndex`` row. The artifact
+    paths are the canonical names downstream modalities expect to find
+    under ``derived/parse_<version>/``.
+
+    Wave 5 P2 chunk 2 (per §G.2.5.1 spec amend item 2): when DocParser
+    produces ``AssetBinPart`` payloads (PDF page images / single-image
+    inputs / data-URI extracted images), the parser writes each blob to
+    ``derived/parse_<v>/vision/images/<image_id>.<ext>`` and lands a
+    ``vision/source.jsonl`` descriptor enumerating them. The vision
+    worker consumes the descriptor (chunk 4 callsite rewrite) instead
+    of the T1 simulator's synthetic ``images.json`` companion. The
+    descriptor path is empty when the parsed document has no image
+    assets — vision modality short-circuits to the no-image FAILED
+    handling already in place.
     """
 
     parse_version: str
     markdown_path: str
     outline_path: str
     chunks_path: str
+    vision_source_path: str = ""
+    vision_image_count: int = 0
 
 
 # ---------------------------------------------------------------------
@@ -352,28 +365,85 @@ def _all_artifacts_present(
         return False
 
 
+# ---------------------------------------------------------------------
+# Vision asset extraction helpers — Wave 5 P2 chunk 2
+# ---------------------------------------------------------------------
+
+
+# MIME-type → file extension lookup. Provider-specific extras (HEIC,
+# AVIF, etc.) drop through to the generic ``.bin`` fallback rather
+# than rejecting the asset — the vision worker uses ``imghdr`` on the
+# image bytes themselves at embed time, so the on-disk filename
+# extension is informational only.
+_MIME_EXTENSION_MAP: dict[str, str] = {
+    "image/jpeg": "jpg",
+    "image/jpg": "jpg",
+    "image/png": "png",
+    "image/webp": "webp",
+    "image/gif": "gif",
+    "image/bmp": "bmp",
+    "image/tiff": "tiff",
+    "image/svg+xml": "svg",
+}
+
+
+def _vision_image_extension(mime_type: str | None) -> str:
+    if not mime_type:
+        return "bin"
+    key = mime_type.split(";", 1)[0].strip().lower()
+    return _MIME_EXTENSION_MAP.get(key, "bin")
+
+
+@dataclass(frozen=True)
+class _VisionImageAsset:
+    """Internal carrier for a single extracted image asset.
+
+    Holds enough to land both the blob (under ``vision/images/``) and
+    its row in the ``vision/source.jsonl`` descriptor consumed by the
+    vision worker (chunk 4). ``image_id`` is the canonical ``asset_id``
+    DocParser already computes (md5 of the image bytes), so two parses
+    of the same content produce the same image_id and downstream
+    identity (``vision:<doc_id>:<parse_v>:<image_id>`` Qdrant point id)
+    stays stable across retries.
+    """
+
+    image_id: str
+    data: bytes
+    mime_type: str
+    alt_text: str
+    page_idx: int | None
+    bbox: list[float] | None
+
+
 def _docparser_extract_markdown(
     *,
     source_bytes: bytes,
     extension: str,
     parser_config: dict[str, Any] | None,
-) -> str:
-    """Run :class:`DocParser` on ``source_bytes`` and return concatenated markdown.
+) -> tuple[str, list[_VisionImageAsset]]:
+    """Run :class:`DocParser` on ``source_bytes`` and return both
+    concatenated markdown AND the extracted vision image assets.
+
+    Wave 5 P2 chunk 2 (per §G.2.5.1 item 2): the assets list carries
+    every :class:`AssetBinPart` whose ``mime_type`` is a recognised
+    image type. Audio / video / PDF-data assets are dropped — only
+    images participate in the vision modality. The caller writes the
+    blobs out under ``derived/parse_<v>/vision/images/`` plus a
+    descriptor JSONL line for each.
 
     Materialises the bytes into a tempfile (DocParser only accepts a
     real path on disk because MarkItDown / MinerU / OCR all stream
     from disk), runs the parser chain, then collects every
-    :class:`MarkdownPart` body in order. Non-markdown parts (assets /
-    images / pdf data) are not used here — they belong to the
-    vision modality's ``derive`` step (T1.4) and the cleanup loop's
-    asset GC (T2.1), not the shared markdown contract.
+    :class:`MarkdownPart` body in order. Non-image asset parts (PDF
+    data, audio, etc.) belong to other modalities or to the cleanup
+    loop's asset GC (T2.1), not the shared markdown contract.
 
     DocParser is imported lazily so the indexing package's __init__
     does not pull MarkItDown / MinerU / pikepdf at import time
     (matches the existing T1.1 lazy-import discipline that kept the
     Wave 3 hard-cut circular-import-free).
     """
-    from aperag.docparser.base import MarkdownPart
+    from aperag.docparser.base import AssetBinPart, MarkdownPart
     from aperag.docparser.doc_parser import DocParser
 
     # Use the suffix the caller already normalised so the temp filename
@@ -402,14 +472,44 @@ def _docparser_extract_markdown(
             os.unlink(tmp_path)
 
     markdown_parts = [p.markdown for p in parts if isinstance(p, MarkdownPart) and p.markdown]
+    seen_image_ids: set[str] = set()
+    image_assets: list[_VisionImageAsset] = []
+    for part in parts:
+        if not isinstance(part, AssetBinPart):
+            continue
+        mime = (part.mime_type or "").lower()
+        if not mime.startswith("image/"):
+            continue
+        if not part.data:
+            continue
+        if part.asset_id in seen_image_ids:
+            # Same image referenced twice in the document → keep the
+            # first record + drop the duplicate so the descriptor has a
+            # single canonical row per image_id. The Qdrant point id
+            # would have collided otherwise.
+            continue
+        seen_image_ids.add(part.asset_id)
+        metadata = part.metadata or {}
+        image_assets.append(
+            _VisionImageAsset(
+                image_id=part.asset_id,
+                data=part.data,
+                mime_type=mime,
+                alt_text=str(metadata.get("alt_text") or part.content or ""),
+                page_idx=metadata.get("page_idx") if isinstance(metadata.get("page_idx"), int) else None,
+                bbox=metadata.get("bbox") if isinstance(metadata.get("bbox"), list) else None,
+            )
+        )
+
     if not markdown_parts:
         # Image-only / audio-only inputs (no MarkdownPart) currently
         # have nothing for the outline + chunks pipeline to emit; we
         # return an empty body so the artifacts exist but downstream
-        # vector / fulltext modalities see zero chunks. Vision modality
-        # consumes the original asset directly in its derive step.
-        return ""
-    return "\n\n".join(markdown_parts)
+        # vector / fulltext modalities see zero chunks. Image-only
+        # uploads still land their assets via the descriptor below so
+        # the vision modality has bytes to embed.
+        return ("", image_assets)
+    return ("\n\n".join(markdown_parts), image_assets)
 
 
 def parse_document(
@@ -525,6 +625,7 @@ def parse_document(
             chunks_path=chunks_path,
         )
 
+    image_assets: list[_VisionImageAsset] = []
     if extension is None or extension in _SIMULATOR_EXTENSIONS:
         try:
             markdown = source_bytes.decode("utf-8")
@@ -540,7 +641,7 @@ def parse_document(
                 f"with the real extension to dispatch to DocParser"
             ) from exc
     else:
-        markdown = _docparser_extract_markdown(
+        markdown, image_assets = _docparser_extract_markdown(
             source_bytes=source_bytes,
             extension=extension,
             parser_config=parser_config,
@@ -561,14 +662,25 @@ def parse_document(
         ("\n".join(json.dumps(c, ensure_ascii=False) for c in chunks) + "\n").encode("utf-8"),
     )
 
+    vision_source_path = ""
+    if image_assets:
+        vision_source_path = _write_vision_assets(
+            store=store,
+            collection_id=collection_id,
+            document_id=document_id,
+            parse_version=parse_version,
+            assets=image_assets,
+        )
+
     logger.info(
         "indexing parser produced derived artifacts collection=%s document=%s "
-        "parse_version=%s outline_size=%d chunk_count=%d",
+        "parse_version=%s outline_size=%d chunk_count=%d vision_image_count=%d",
         collection_id,
         document_id,
         parse_version,
         len(outline),
         len(chunks),
+        len(image_assets),
     )
 
     return ParseResult(
@@ -576,7 +688,61 @@ def parse_document(
         markdown_path=markdown_path,
         outline_path=outline_path,
         chunks_path=chunks_path,
+        vision_source_path=vision_source_path,
+        vision_image_count=len(image_assets),
+    )
+
+
+def _write_vision_assets(
+    *,
+    store: _SyncObjectStore,
+    collection_id: str,
+    document_id: str,
+    parse_version: str,
+    assets: list[_VisionImageAsset],
+) -> str:
+    """Persist extracted image bytes + descriptor under
+    ``derived/parse_<v>/vision/``.
+
+    Each asset is written to ``vision/images/<image_id>.<ext>`` and
+    enumerated in a ``vision/source.jsonl`` descriptor (one record per
+    line, schema ``{image_id, image_path, mime_type, alt_text,
+    page_idx, bbox}``). Returns the descriptor path so the caller can
+    pin it on :class:`ParseResult` and the orchestrator can hand it to
+    the vision worker (chunk 4 callsite rewrite).
+    """
+    descriptor_lines: list[str] = []
+    for asset in assets:
+        ext = _vision_image_extension(asset.mime_type)
+        image_path = derived_artifact(
+            collection_id=collection_id,
+            document_id=document_id,
+            parse_version=parse_version,
+            filename=f"vision/images/{asset.image_id}.{ext}",
+        )
+        write_atomic(store, image_path, asset.data)
+        descriptor_lines.append(
+            json.dumps(
+                {
+                    "image_id": asset.image_id,
+                    "image_path": image_path,
+                    "mime_type": asset.mime_type,
+                    "alt_text": asset.alt_text,
+                    "page_idx": asset.page_idx,
+                    "bbox": asset.bbox,
+                },
+                ensure_ascii=False,
+            )
+        )
+
+    descriptor_path = derived_artifact(
+        collection_id=collection_id,
+        document_id=document_id,
+        parse_version=parse_version,
+        filename="vision/source.jsonl",
     )
+    write_atomic(store, descriptor_path, ("\n".join(descriptor_lines) + "\n").encode("utf-8"))
+    return descriptor_path
 
 
 def read_chunks(store: _SyncObjectStore, chunks_path: str) -> list[dict[str, Any]]:
diff --git a/tests/unit_test/indexing/test_parser_image_extraction.py b/tests/unit_test/indexing/test_parser_image_extraction.py
new file mode 100644
index 000000000..61d2563ae
--- /dev/null
+++ b/tests/unit_test/indexing/test_parser_image_extraction.py
@@ -0,0 +1,291 @@
+# Copyright 2025 ApeCloud, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for Wave 5 P2 chunk 2 — parser image extraction.
+
+Pin the §G.2.5.1 spec item 2 contract: when DocParser produces
+``AssetBinPart`` payloads, the parser writes each image blob to
+``derived/parse_<v>/vision/images/<image_id>.<ext>`` and lands a
+``vision/source.jsonl`` descriptor. The vision worker (chunk 4)
+consumes the descriptor instead of the T1 simulator's synthetic
+``images.json`` companion.
+
+We don't exercise the full DocParser chain here (that needs MinerU /
+MarkItDown deps); instead we monkeypatch
+:func:`aperag.indexing.parser._docparser_extract_markdown` to return a
+controlled ``(markdown, image_assets)`` tuple, then assert
+:func:`parse_document` writes the right files at the right paths and
+the right :class:`ParseResult` fields.
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from aperag.indexing import parser as parser_module
+from aperag.indexing.object_store import InMemoryObjectStore, derived_dir
+from aperag.indexing.parser import (
+    _vision_image_extension,
+    _VisionImageAsset,
+    parse_document,
+)
+
+
+def test_vision_image_extension_maps_known_mime_types():
+    assert _vision_image_extension("image/jpeg") == "jpg"
+    assert _vision_image_extension("image/jpg") == "jpg"
+    assert _vision_image_extension("image/png") == "png"
+    assert _vision_image_extension("image/webp") == "webp"
+    assert _vision_image_extension("image/gif") == "gif"
+    assert _vision_image_extension("image/svg+xml") == "svg"
+
+
+def test_vision_image_extension_strips_charset_param():
+    """``image/jpeg; charset=binary`` and similar provider-emitted
+    MIME parameters should not break the extension lookup."""
+    assert _vision_image_extension("image/jpeg; charset=binary") == "jpg"
+    assert _vision_image_extension("IMAGE/PNG") == "png"
+
+
+def test_vision_image_extension_falls_back_to_bin_for_unknown():
+    assert _vision_image_extension(None) == "bin"
+    assert _vision_image_extension("") == "bin"
+    assert _vision_image_extension("application/octet-stream") == "bin"
+    # Newer formats not yet in the lookup table — vision worker will
+    # use ``imghdr`` on the bytes themselves at embed time, so the
+    # filename is informational only.
+    assert _vision_image_extension("image/heic") == "bin"
+
+
+def test_parse_document_persists_image_blobs_and_descriptor(monkeypatch: pytest.MonkeyPatch):
+    """End-to-end: parser hands DocParser a PDF-shape input, gets back
+    markdown + 2 image assets; parser persists each blob at
+    ``vision/images/<image_id>.<ext>`` and a descriptor at
+    ``vision/source.jsonl``."""
+
+    img_a = _VisionImageAsset(
+        image_id="aaa111",
+        data=b"\xff\xd8\xff\xe0fake-jpeg-bytes",
+        mime_type="image/jpeg",
+        alt_text="Figure 1: architecture diagram",
+        page_idx=2,
+        bbox=[0.1, 0.2, 0.3, 0.4],
+    )
+    img_b = _VisionImageAsset(
+        image_id="bbb222",
+        data=b"\x89PNGfake-png-bytes",
+        mime_type="image/png",
+        alt_text="",
+        page_idx=None,
+        bbox=None,
+    )
+
+    def _fake_docparser(*, source_bytes: bytes, extension: str, parser_config):
+        return ("# Title\n\nbody", [img_a, img_b])
+
+    monkeypatch.setattr(parser_module, "_docparser_extract_markdown", _fake_docparser)
+
+    store = InMemoryObjectStore()
+    result = parse_document(
+        store=store,
+        collection_id="col-1",
+        document_id="doc-1",
+        source_bytes=b"%PDF-1.4 fake",
+        source_filename="report.pdf",
+    )
+
+    assert result.vision_image_count == 2
+    expected_dir = derived_dir("col-1", "doc-1", result.parse_version)
+    expected_descriptor = f"{expected_dir}/vision/source.jsonl"
+    assert result.vision_source_path == expected_descriptor
+    assert store.obj_exists(expected_descriptor)
+
+    image_a_path = f"{expected_dir}/vision/images/aaa111.jpg"
+    image_b_path = f"{expected_dir}/vision/images/bbb222.png"
+    assert store.obj_exists(image_a_path)
+    assert store.obj_exists(image_b_path)
+    assert store.get(image_a_path).read() == img_a.data
+    assert store.get(image_b_path).read() == img_b.data
+
+    descriptor_body = store.get(expected_descriptor).read().decode("utf-8")
+    rows = [json.loads(line) for line in descriptor_body.splitlines() if line.strip()]
+    assert len(rows) == 2
+    assert rows[0] == {
+        "image_id": "aaa111",
+        "image_path": image_a_path,
+        "mime_type": "image/jpeg",
+        "alt_text": "Figure 1: architecture diagram",
+        "page_idx": 2,
+        "bbox": [0.1, 0.2, 0.3, 0.4],
+    }
+    assert rows[1] == {
+        "image_id": "bbb222",
+        "image_path": image_b_path,
+        "mime_type": "image/png",
+        "alt_text": "",
+        "page_idx": None,
+        "bbox": None,
+    }
+
+
+def test_parse_document_skips_vision_artefacts_when_no_images(monkeypatch: pytest.MonkeyPatch):
+    """A parsed document with no image assets should not land any
+    ``vision/`` artefacts and ``vision_source_path`` must stay empty
+    so the orchestrator's vision dispatch can short-circuit cleanly.
+    """
+
+    def _fake_docparser(*, source_bytes: bytes, extension: str, parser_config):
+        return ("# Title\n\nbody", [])
+
+    monkeypatch.setattr(parser_module, "_docparser_extract_markdown", _fake_docparser)
+
+    store = InMemoryObjectStore()
+    result = parse_document(
+        store=store,
+        collection_id="col-1",
+        document_id="doc-no-images",
+        source_bytes=b"fake doc",
+        source_filename="doc.docx",
+    )
+
+    assert result.vision_image_count == 0
+    assert result.vision_source_path == ""
+    expected_dir = derived_dir("col-1", "doc-no-images", result.parse_version)
+    # No vision assets should have been written.
+    assert not store.obj_exists(f"{expected_dir}/vision/source.jsonl")
+
+
+def test_parse_document_simulator_path_emits_no_vision_artefacts():
+    """The text-only simulator path (markdown / txt) does not invoke
+    DocParser so no ``AssetBinPart`` extraction happens — the
+    ``ParseResult`` defaults must keep vision fields empty so existing
+    pre-Wave-5 callers see no behaviour change."""
+
+    store = InMemoryObjectStore()
+    result = parse_document(
+        store=store,
+        collection_id="col-1",
+        document_id="doc-md",
+        source_bytes=b"# Header\n\nBody.",
+        source_filename="notes.md",
+    )
+
+    assert result.vision_image_count == 0
+    assert result.vision_source_path == ""
+    expected_dir = derived_dir("col-1", "doc-md", result.parse_version)
+    assert not store.obj_exists(f"{expected_dir}/vision/source.jsonl")
+
+
+def test_parse_document_image_paths_use_mime_extension(monkeypatch: pytest.MonkeyPatch):
+    """The on-disk filename extension follows the asset's ``mime_type``
+    so operators can spot-check the bytes directly without renaming."""
+
+    img = _VisionImageAsset(
+        image_id="ccc333",
+        data=b"GIF89a-fake",
+        mime_type="image/gif",
+        alt_text="",
+        page_idx=None,
+        bbox=None,
+    )
+
+    def _fake_docparser(*, source_bytes: bytes, extension: str, parser_config):
+        return ("# t", [img])
+
+    monkeypatch.setattr(parser_module, "_docparser_extract_markdown", _fake_docparser)
+
+    store = InMemoryObjectStore()
+    result = parse_document(
+        store=store,
+        collection_id="col-1",
+        document_id="doc-gif",
+        source_bytes=b"fake",
+        source_filename="anim.gif",
+    )
+
+    expected_dir = derived_dir("col-1", "doc-gif", result.parse_version)
+    assert store.obj_exists(f"{expected_dir}/vision/images/ccc333.gif")
+
+
+def test_parse_document_falls_back_to_bin_for_unknown_mime(monkeypatch: pytest.MonkeyPatch):
+    """Unknown MIMEs (e.g., ``image/heic``) fall back to ``.bin`` so
+    the asset is still persisted — vision worker uses ``imghdr`` on
+    the bytes at embed time and tolerates the generic extension."""
+
+    img = _VisionImageAsset(
+        image_id="ddd444",
+        data=b"heic-fake",
+        mime_type="image/heic",
+        alt_text="",
+        page_idx=None,
+        bbox=None,
+    )
+
+    def _fake_docparser(*, source_bytes: bytes, extension: str, parser_config):
+        return ("# t", [img])
+
+    monkeypatch.setattr(parser_module, "_docparser_extract_markdown", _fake_docparser)
+
+    store = InMemoryObjectStore()
+    result = parse_document(
+        store=store,
+        collection_id="col-1",
+        document_id="doc-heic",
+        source_bytes=b"fake",
+        source_filename="phone.heic",
+    )
+
+    expected_dir = derived_dir("col-1", "doc-heic", result.parse_version)
+    assert store.obj_exists(f"{expected_dir}/vision/images/ddd444.bin")
+
+
+def test_parse_document_image_only_input_still_lands_descriptor(monkeypatch: pytest.MonkeyPatch):
+    """An image-only input (e.g., a single PNG upload) produces no
+    markdown but should still land its image asset + descriptor so
+    the vision modality has bytes to embed. ``markdown.md`` is empty
+    on this path; ``vision_source_path`` is populated."""
+
+    img = _VisionImageAsset(
+        image_id="eee555",
+        data=b"single-png-bytes",
+        mime_type="image/png",
+        alt_text="cover photo",
+        page_idx=None,
+        bbox=None,
+    )
+
+    def _fake_docparser(*, source_bytes: bytes, extension: str, parser_config):
+        # No markdown emitted (image-only input) but one asset landed.
+        return ("", [img])
+
+    monkeypatch.setattr(parser_module, "_docparser_extract_markdown", _fake_docparser)
+
+    store = InMemoryObjectStore()
+    result = parse_document(
+        store=store,
+        collection_id="col-1",
+        document_id="doc-single-img",
+        source_bytes=b"png-bytes",
+        source_filename="cover.png",
+    )
+
+    assert result.vision_image_count == 1
+    expected_dir = derived_dir("col-1", "doc-single-img", result.parse_version)
+    assert store.obj_exists(f"{expected_dir}/vision/images/eee555.png")
+    assert store.obj_exists(f"{expected_dir}/vision/source.jsonl")
+    # Markdown still written (empty) so the artifact contract holds.
+    assert store.obj_exists(result.markdown_path)
+    assert store.get(result.markdown_path).read() == b""

From 7fe3f25d457b0d986de75be63d4a69e0b6baef80 Mon Sep 17 00:00:00 2001
From: earayu <earayu@163.com>
Date: Mon, 27 Apr 2026 18:52:41 +0800
Subject: [PATCH 12/13] feat(celery Wave 5 P2 chunk 3):
 Model.supports_multimodal_embedding capability flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per §G.2.5.1 spec amend item 3: surface a typed capability flag for
embedding models that accept image bytes (CLIP / Voyage Multimodal /
Jina v3 / OpenAI multimodal embeddings) so operators can register
them via the v3 model platform UI. The flag drives the chunk 4b
vision gate's `EmbeddingService.is_multimodal()` runtime check —
flip it on the collection's embedder spec model and the gate
self-disables, enabling vision modality.

Distinct from existing `supports_vision` (chat/completion models
that accept image input) — `supports_multimodal_embedding` describes
embedding models that produce vectors from images.

Changes:
* `aperag/domains/model_platform/schemas.py`: new optional
  `supports_multimodal_embedding: bool = False` on `Model` /
  `ModelCreate` / `ModelUpdate` (default False, no behaviour change
  for existing rows).
* `aperag/domains/model_platform/db/models.py`: new
  `supports_multimodal_embedding` Boolean column with default False.
* `aperag/migration/versions/...f1c8d2a5b6e3...`: alembic migration
  adding the column with `server_default=false`. Forward-only
  cutover safe because pre-Wave-5 rows default to False (matches
  prior implicit behaviour).
* `aperag/domains/model_platform/service/model_service.py`:
  `_model_to_schema` maps the new column.
* `aperag/db/repositories/llm_provider.py`: `create_model` accepts
  the new field so the v3 `POST /models` flow persists it.
* `aperag/llm/embed/base_embedding.py`: `get_embedding_service`
  prefers the typed `supports_multimodal_embedding` column; falls
  back to the legacy `runner_config["multimodal"]` JSON entry so
  pre-Wave-5 operators who edited the JSON keep working without
  re-saving the row.

Tests: 3 new unit tests in `test_model_platform_v3_contract.py`
covering default-False behaviour, opt-in True path, and v3 OpenAPI
schema exposure on Model / ModelCreate / ModelUpdate.

Production-readiness 三类:
- must-be-real: real DB column + alembic migration + v3 API surface
- may-be-gated: legacy `runner_config["multimodal"]` fallback for
  rows created before the typed column landed
- fully-resolves: §G.2.5.1 spec item 3 (provider v3 multimodal
  capability flag UI) + Wave 5 P2 chunk 3 acceptance

Phase 2 chunk 4 (callsite rewrite + chunk 4b gate self-disable
verify + Layer 1/2 test rename) remains as the final Wave 5 blocker.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 aperag/db/repositories/llm_provider.py        |  2 +
 aperag/domains/model_platform/db/models.py    |  5 +++
 aperag/domains/model_platform/schemas.py      | 11 +++++
 .../model_platform/service/model_service.py   |  1 +
 aperag/llm/embed/base_embedding.py            | 11 ++++-
 ...6e3_model_supports_multimodal_embedding.py | 41 ++++++++++++++++++
 .../test_model_platform_v3_contract.py        | 43 +++++++++++++++++++
 7 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 aperag/migration/versions/20260427030000-f1c8d2a5b6e3_model_supports_multimodal_embedding.py

diff --git a/aperag/db/repositories/llm_provider.py b/aperag/db/repositories/llm_provider.py
index dbe2a4a7e..00c15c871 100644
--- a/aperag/db/repositories/llm_provider.py
+++ b/aperag/db/repositories/llm_provider.py
@@ -286,6 +286,7 @@ async def create_model(
         embedding_dimensions: int | None = None,
         supports_vision: bool = False,
         supports_tool_calling: bool = False,
+        supports_multimodal_embedding: bool = False,
         extra: dict | None = None,
     ) -> Model:
         async def _operation(session):
@@ -302,6 +303,7 @@ async def _operation(session):
                 embedding_dimensions=embedding_dimensions,
                 supports_vision=supports_vision,
                 supports_tool_calling=supports_tool_calling,
+                supports_multimodal_embedding=supports_multimodal_embedding,
                 extra=extra or {},
             )
             session.add(model)
diff --git a/aperag/domains/model_platform/db/models.py b/aperag/domains/model_platform/db/models.py
index 450c4c259..6aea57651 100644
--- a/aperag/domains/model_platform/db/models.py
+++ b/aperag/domains/model_platform/db/models.py
@@ -112,6 +112,11 @@ class Model(Base):
     embedding_dimensions = Column(Integer, nullable=True)
     supports_vision = Column(Boolean, default=False, nullable=False)
     supports_tool_calling = Column(Boolean, default=False, nullable=False)
+    # Wave 5 P2 chunk 3: typed capability flag for embedding models
+    # that accept image bytes; surfaced through the v3 model API +
+    # consumed by ``base_embedding.get_embedding_service`` to set
+    # ``EmbeddingService.multimodal=True``.
+    supports_multimodal_embedding = Column(Boolean, default=False, nullable=False)
     status = Column(_enum_column(ModelStatus), default=ModelStatus.ACTIVE.value, nullable=False, index=True)
     extra = Column(JSON, default=lambda: {}, nullable=False)
     gmt_created = Column(DateTime(timezone=True), default=utc_now, nullable=False)
diff --git a/aperag/domains/model_platform/schemas.py b/aperag/domains/model_platform/schemas.py
index 62251717c..1a11aecb0 100644
--- a/aperag/domains/model_platform/schemas.py
+++ b/aperag/domains/model_platform/schemas.py
@@ -106,6 +106,15 @@ class Model(BaseModel):
     embedding_dimensions: Optional[int] = None
     supports_vision: bool = False
     supports_tool_calling: bool = False
+    # Wave 5 P2 chunk 3 (per §G.2.5.1 spec amend item 3): a typed
+    # capability flag for embedding models that accept image bytes
+    # (CLIP / Voyage Multimodal / Jina v3 / OpenAI multimodal
+    # embeddings / etc.) and produce a single vector. Distinct from
+    # ``supports_vision`` which describes chat/completion models that
+    # accept images as input. Drives the chunk 4b vision gate's
+    # ``EmbeddingService.is_multimodal()`` runtime check — flip on the
+    # collection's embedder spec model and the gate self-disables.
+    supports_multimodal_embedding: bool = False
     status: Literal["ACTIVE", "INACTIVE"] = "ACTIVE"
     extra: dict[str, Any] = Field(default_factory=dict)
     created: Optional[datetime] = None
@@ -125,6 +134,7 @@ class ModelCreate(BaseModel):
     embedding_dimensions: Optional[int] = None
     supports_vision: bool = False
     supports_tool_calling: bool = False
+    supports_multimodal_embedding: bool = False
     extra: dict[str, Any] = Field(default_factory=dict)
 
 
@@ -139,6 +149,7 @@ class ModelUpdate(BaseModel):
     embedding_dimensions: Optional[int] = None
     supports_vision: Optional[bool] = None
     supports_tool_calling: Optional[bool] = None
+    supports_multimodal_embedding: Optional[bool] = None
     status: Optional[Literal["ACTIVE", "INACTIVE"]] = None
     extra: Optional[dict[str, Any]] = None
 
diff --git a/aperag/domains/model_platform/service/model_service.py b/aperag/domains/model_platform/service/model_service.py
index 89d55bf9e..036ddd5ac 100644
--- a/aperag/domains/model_platform/service/model_service.py
+++ b/aperag/domains/model_platform/service/model_service.py
@@ -103,6 +103,7 @@ def _model_to_schema(model) -> Model:
         embedding_dimensions=model.embedding_dimensions,
         supports_vision=model.supports_vision,
         supports_tool_calling=model.supports_tool_calling,
+        supports_multimodal_embedding=getattr(model, "supports_multimodal_embedding", False) or False,
         status=model.status,
         extra=model.extra or {},
         created=model.gmt_created,
diff --git a/aperag/llm/embed/base_embedding.py b/aperag/llm/embed/base_embedding.py
index 72ae5efdd..644ee9382 100644
--- a/aperag/llm/embed/base_embedding.py
+++ b/aperag/llm/embed/base_embedding.py
@@ -50,13 +50,22 @@ def get_embedding_service(model_id: str, user_id: str) -> tuple[EmbeddingService
     provider = invocation.runner_config.get("provider")
     if not provider:
         provider = "openai" if invocation.runner_type == "openai_compatible" else invocation.provider_type
+    # Wave 5 P2 chunk 3 (per §G.2.5.1 spec amend item 3): prefer the
+    # typed ``Model.supports_multimodal_embedding`` column when present,
+    # fall back to the legacy ``runner_config["multimodal"]`` JSON dict
+    # entry so collections / models created before the typed column
+    # landed keep working (operators who edited the JSON keep the same
+    # behaviour without re-saving the model row).
+    multimodal = bool(getattr(model, "supports_multimodal_embedding", False)) or bool(
+        invocation.runner_config.get("multimodal", False)
+    )
     embedding_svc = EmbeddingService(
         embedding_provider=provider,
         embedding_model=invocation.provider_model_id,
         embedding_service_url=invocation.base_url,
         embedding_service_api_key=invocation.api_key,
         embedding_max_chunks_in_batch=settings.embedding_max_chunks_in_batch,
-        multimodal=invocation.runner_config.get("multimodal", False),
+        multimodal=multimodal,
     )
     return embedding_svc, _get_embedding_dimension(embedding_svc, model_id, invocation.embedding_dimensions)
 
diff --git a/aperag/migration/versions/20260427030000-f1c8d2a5b6e3_model_supports_multimodal_embedding.py b/aperag/migration/versions/20260427030000-f1c8d2a5b6e3_model_supports_multimodal_embedding.py
new file mode 100644
index 000000000..c71773310
--- /dev/null
+++ b/aperag/migration/versions/20260427030000-f1c8d2a5b6e3_model_supports_multimodal_embedding.py
@@ -0,0 +1,41 @@
+"""add ``supports_multimodal_embedding`` column to ``model``
+
+Wave 5 P2 chunk 3 (per §G.2.5.1 spec amend item 3): a typed
+capability flag on ``Model`` for embedding models that accept image
+bytes (CLIP / Voyage Multimodal / Jina v3 / OpenAI multimodal
+embeddings / etc.) and produce a single vector. Distinct from
+``supports_vision`` which describes chat/completion models that
+accept images as input. Drives the chunk 4b vision gate's
+``EmbeddingService.is_multimodal()`` runtime check — flip on the
+collection's embedder spec model and the gate self-disables.
+
+Revision ID: f1c8d2a5b6e3
+Revises: e7a3b9c2d1f6
+Create Date: 2026-04-27 03:00:00.000000
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+revision: str = "f1c8d2a5b6e3"
+down_revision: Union[str, None] = "e7a3b9c2d1f6"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "model",
+        sa.Column(
+            "supports_multimodal_embedding",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.text("false"),
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("model", "supports_multimodal_embedding")
diff --git a/tests/unit_test/test_model_platform_v3_contract.py b/tests/unit_test/test_model_platform_v3_contract.py
index ff1e906f2..294481e5d 100644
--- a/tests/unit_test/test_model_platform_v3_contract.py
+++ b/tests/unit_test/test_model_platform_v3_contract.py
@@ -76,3 +76,46 @@ def test_model_provider_and_model_use_are_explicit():
 
     assert provider.supported_capabilities == [ModelCapability.CHAT, ModelCapability.EMBEDDING]
     assert ModelUseScenario.AGENT_CHAT.value == "agent_chat"
+
+
+def test_model_create_supports_multimodal_embedding_flag_defaults_false():
+    """Wave 5 P2 chunk 3 (per §G.2.5.1 spec amend item 3): the
+    ``supports_multimodal_embedding`` capability flag defaults to
+    False so existing model rows / pre-Wave-5 callers see no
+    behaviour change."""
+    model = ModelCreate(
+        account_id="acct_1",
+        provider_model_id="text-embedding-3-large",
+        display_name="OpenAI text-embedding-3-large",
+        capability=ModelCapability.EMBEDDING,
+    )
+    assert model.supports_multimodal_embedding is False
+
+
+def test_model_create_supports_multimodal_embedding_flag_can_be_set():
+    """Operators register a real multimodal embedder (Voyage Multimodal /
+    CLIP / Jina v3 / etc.) by setting this flag — it surfaces through
+    the v3 ``/models`` route so a UI can render a checkbox."""
+    model = ModelCreate(
+        account_id="acct_1",
+        provider_model_id="voyage-multimodal-3",
+        display_name="Voyage Multimodal 3",
+        capability=ModelCapability.EMBEDDING,
+        supports_multimodal_embedding=True,
+    )
+    assert model.supports_multimodal_embedding is True
+
+
+def test_model_v3_openapi_exposes_supports_multimodal_embedding():
+    """The v3 routes' OpenAPI schema must surface the new capability
+    flag so the API contract stays discoverable for UI clients."""
+    spec = _provider_v3_spec()
+    schemas = spec["components"]["schemas"]
+    # All three model schemas (read / create / update) must carry
+    # the field so a UI can read + modify it consistently.
+    for schema_name in ("Model", "ModelCreate", "ModelUpdate"):
+        assert schema_name in schemas, f"{schema_name} schema missing from v3 OpenAPI"
+        properties = schemas[schema_name].get("properties", {})
+        assert "supports_multimodal_embedding" in properties, (
+            f"{schema_name} missing supports_multimodal_embedding capability flag"
+        )

From 8e7c6ecd21f466c949a2cff0444cff4ccb97e42f Mon Sep 17 00:00:00 2001
From: earayu <earayu@163.com>
Date: Mon, 27 Apr 2026 19:00:16 +0800
Subject: [PATCH 13/13] feat(celery Wave 5 P2 chunk 4): vision callsite rewrite
 + chunk 4b gate self-disable verify
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per §G.2.5.1 spec amend final piece: rewire `_build_vision_worker._embed`
to call `EmbeddingService.embed_image(image_bytes, alt_text)` (chunk 1)
with the actual image bytes the parser persisted (chunk 2 wrote
`derived/parse_<v>/vision/images/<image_id>.<ext>` + a JSONL descriptor
at `vision/source.jsonl`). The chunk 4b vision gate self-disables when
the operator flips `Model.supports_multimodal_embedding=True` (chunk 3).

`aperag/indexing/worker_factory.py`:
* `_embed(image_id, alt_text, image_bytes=None)` — when image_bytes
  is provided, route to `embedding_service.embed_image(image_bytes,
  alt_text)`. None falls back to the legacy text-concat path so the
  T1 simulator + tests that hand the worker synthetic JSON keep
  working.
* Gate-raise message reframed: drops "Wave 4 wiring" phrasing (now
  Wave 5 wiring is land), names the typed
  `Model.supports_multimodal_embedding` flag so an operator can fix
  the config directly.

`aperag/indexing/vision.py`:
* `VisionModality.derive` accepts both source-path formats: the
  legacy single-JSON-array shape (T1 simulator / pre-Wave-5 tests)
  AND the new JSONL-with-image-path shape (parser chunk 2 output).
  Format detection is by first non-whitespace byte (`[` → JSON array,
  else → JSONL).
* `_load_image_bytes(record)` reads the descriptor's `image_path`
  through the object store; missing blob logs a warning and returns
  None (embedder still runs on the alt-text/id placeholder digest)
  so a partial parser write doesn't block the whole derive cycle.
* `_placeholder_embedding(..., image_bytes=None)` mirrors the new
  embedder signature; placeholder ignores bytes.
* Embedder Protocol is widened: `(image_id, alt_text, image_bytes=None)`.

`tests/integration/test_full_indexing_pipeline.py`:
* Renamed Layer 1 `test_phase1_vision_modality_raises_wave4_wiring_gate`
  → `..._gate_raises_when_embedder_not_multimodal`. Asserts the
  reframed message names `multimodal embedding model` +
  `supports_multimodal_embedding` flag.
* New positive-path Layer 1
  `test_phase1_vision_modality_gate_self_disables_when_embedder_multimodal`:
  with `is_multimodal()=True`, the factory builds a vision worker
  without raising — pins the chunk 4 gate-self-disable contract.
* Layer 2 e2e assertion: vision modality may be ACTIVE (when CI
  fixture has multimodal embedder configured) OR FAILED with a gate
  marker including `supports_multimodal_embedding`. OR-on-marker
  tolerance kept for transition state.

`tests/unit_test/indexing/test_t1_4_summary_vision.py`: 3 new tests
covering JSONL descriptor with image_path (bytes loaded + forwarded),
missing-blob graceful fallback, and legacy JSON-array backward compat.

Production-readiness 三类:
- must-be-real: real `embed_image` callsite + real bytes load from
  parser's descriptor
- may-be-gated: legacy text-concat fallback + simulator JSON format
  preserved for tests
- fully-resolves: §G.2.5.1 spec items 1+2+3 all wired end-to-end +
  chunk 4b vision gate self-disable verify (Wave 5 P2 closure)

Wave 5 P2 (T7 multimodal vision-LLM 3-item bundle) closed. The chunk
4b vision gate self-disables when an operator configures a multimodal
embedder; default-off behaviour preserved for text-only collections.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 aperag/indexing/vision.py                     | 100 +++++++++++--
 aperag/indexing/worker_factory.py             |  29 ++--
 .../test_full_indexing_pipeline.py            | 121 +++++++++++++---
 .../indexing/test_t1_4_summary_vision.py      | 131 ++++++++++++++++++
 4 files changed, 334 insertions(+), 47 deletions(-)

diff --git a/aperag/indexing/vision.py b/aperag/indexing/vision.py
index 3d17598ca..783339a4c 100644
--- a/aperag/indexing/vision.py
+++ b/aperag/indexing/vision.py
@@ -112,7 +112,21 @@ def points_for_document(self, document_id: str, parse_version: str | None = None
         return sorted(out, key=lambda r: r["point_id"])
 
 
-def _placeholder_embedding(image_id: str, alt_text: str, dim: int = SIMULATOR_VISION_EMBEDDING_DIM) -> list[float]:
+def _placeholder_embedding(
+    image_id: str,
+    alt_text: str,
+    image_bytes: bytes | None = None,
+    dim: int = SIMULATOR_VISION_EMBEDDING_DIM,
+) -> list[float]:
+    """Deterministic synthetic embedding for tests + the simulator.
+
+    Wave 5 P2 chunk 4: the optional ``image_bytes`` parameter mirrors
+    the production embedder signature (real multimodal embedders consume
+    bytes). The placeholder ignores it — the digest is over
+    ``image_id|alt_text`` so re-running the simulator produces the same
+    vector for the same record.
+    """
+    del image_bytes  # placeholder ignores actual bytes
     digest = hashlib.sha256(f"{image_id}|{alt_text}".encode("utf-8")).digest()
     repeat = (dim + len(digest) - 1) // len(digest)
     expanded = (digest * repeat)[:dim]
@@ -144,11 +158,21 @@ async def derive(
     ) -> DeriveResult:
         """Extract images, run vision-LLM, persist manifest atomically.
 
-        T1 simulator contract: ``source_path`` is a JSON file in the
-        object store containing ``[{"image_id": ..., "alt_text": ...,
-        "page_idx": int|None, "bbox": [...]|None}, ...]``. The real
-        T2.x pipeline replaces this with PDF extraction; the
-        ``manifest.jsonl`` schema is the contract that must not change.
+        Two source-path formats are supported (the production parser
+        wrote the second one starting Wave 5 P2 chunk 2):
+
+        * **Legacy simulator** (single JSON array): a JSON list at
+          ``source_path`` holding ``[{"image_id": ..., "alt_text": ...,
+          "page_idx": int|None, "bbox": [...]|None}, ...]``. No image
+          bytes — the embedder gets ``image_bytes=None`` and falls back
+          to the alt-text/id placeholder digest.
+        * **Wave 5 production descriptor** (JSONL with ``image_path``):
+          one record per line at ``derived/parse_<v>/vision/source.jsonl``
+          with ``{"image_id", "image_path", "mime_type", "alt_text",
+          "page_idx", "bbox"}``. The worker loads each image's bytes
+          from ``image_path`` and hands them to the embedder so a real
+          multimodal embedding model can produce a real visual vector
+          (Wave 5 P2 chunk 4 callsite rewrite).
         """
         body = read_or_none(self._store, source_path)
         if body is None:
@@ -158,12 +182,7 @@ async def derive(
             )
             return DeriveResult(derived_artifact_path="")
 
-        try:
-            image_records = json.loads(body)
-        except json.JSONDecodeError as exc:
-            raise ValueError(f"vision.derive expected JSON list at {source_path}, got non-JSON: {exc}") from exc
-        if not isinstance(image_records, list):
-            raise ValueError(f"vision.derive expected JSON list of image records, got {type(image_records).__name__}")
+        image_records = self._parse_descriptor(body, source_path=source_path)
 
         parts = source_path.split("/")
         try:
@@ -179,7 +198,8 @@ async def derive(
         for record in image_records:
             image_id = record["image_id"]
             alt_text = record.get("alt_text", "")
-            embedding = self._embedder(image_id, alt_text)
+            image_bytes = self._load_image_bytes(record)
+            embedding = self._embedder(image_id, alt_text, image_bytes=image_bytes)
             entry = {
                 "image_id": image_id,
                 "alt_text": alt_text,
@@ -251,6 +271,60 @@ async def sync(
                 payload=payload,
             )
 
+    def _parse_descriptor(self, body: bytes, *, source_path: str) -> list[dict]:
+        """Decode the source-image descriptor file.
+
+        Tolerates both the legacy single-JSON-array shape and the
+        Wave 5 P2 chunk 2 JSONL-with-image-path shape (parser writes
+        the latter). The first byte of body picks the format: ``[``
+        means a JSON array, anything else is JSONL.
+        """
+        text = body.decode("utf-8")
+        stripped = text.lstrip()
+        if stripped.startswith("["):
+            try:
+                records = json.loads(text)
+            except json.JSONDecodeError as exc:
+                raise ValueError(f"vision.derive expected JSON list at {source_path}, got non-JSON: {exc}") from exc
+            if not isinstance(records, list):
+                raise ValueError(f"vision.derive expected JSON list of image records, got {type(records).__name__}")
+            return [record for record in records if isinstance(record, dict)]
+        records: list[dict] = []
+        for line in text.splitlines():
+            if not line.strip():
+                continue
+            try:
+                record = json.loads(line)
+            except json.JSONDecodeError as exc:
+                raise ValueError(f"vision.derive expected JSONL at {source_path}, malformed line: {exc}") from exc
+            if not isinstance(record, dict):
+                raise ValueError(f"vision.derive expected JSONL records to be objects, got {type(record).__name__}")
+            records.append(record)
+        return records
+
+    def _load_image_bytes(self, record: dict) -> bytes | None:
+        """Load image bytes from the descriptor's ``image_path`` if
+        present.
+
+        Returns ``None`` for legacy simulator records (no ``image_path``
+        field) so the embedder falls back to its non-bytes path. A
+        missing-blob error logs but doesn't raise — the embedder still
+        runs with ``image_bytes=None`` so a partial parser write doesn't
+        block the whole derive cycle.
+        """
+        image_path = record.get("image_path")
+        if not image_path:
+            return None
+        body = read_or_none(self._store, str(image_path))
+        if body is None:
+            logger.warning(
+                "vision.derive: image_path %s referenced in descriptor but blob missing; "
+                "embedder falls back to text-only path",
+                image_path,
+            )
+            return None
+        return body
+
 
 __all__ = [
     "VisionModality",
diff --git a/aperag/indexing/worker_factory.py b/aperag/indexing/worker_factory.py
index c1f76f54d..b7378f44c 100644
--- a/aperag/indexing/worker_factory.py
+++ b/aperag/indexing/worker_factory.py
@@ -442,23 +442,28 @@ def _build_vision_worker(*, collection: Any, object_store: Any) -> ModalityWorke
     embedding_service, vector_size = get_collection_embedding_service_sync(collection)
     if not embedding_service.is_multimodal():
         raise WorkerFactoryError(
-            "vision modality requires a real multimodal vision-LLM (Wave 4 wiring); "
-            "current text-only embedder produces fake string-concat vision vectors — "
-            "set collection.config.enable_vision=false until Wave 4 lands "
-            "OR configure a multimodal embedding model on the collection's embedding spec"
+            "vision modality requires a multimodal embedding model "
+            "(set Model.supports_multimodal_embedding=True on the collection's "
+            "embedder spec — Voyage Multimodal / Jina v3 / OpenAI multimodal / etc.) "
+            "OR set collection.config.enable_vision=false to keep the modality off"
         )
     qdrant_collection = generate_vector_db_collection_name(collection.id)
     adaptor = get_vector_db_connector(qdrant_collection, vector_size=vector_size)
     backend = _QdrantPointBackend(connector=adaptor.connector)
 
-    def _embed(image_id: str, alt_text: str) -> list[float]:
-        # ``is_multimodal()`` gate above only verifies that operators
-        # explicitly opted into a multimodal embedder. The body is
-        # still the Wave 3 string-concat placeholder until T7 replaces
-        # it with a real image-bytes path (load image from object
-        # store → multimodal embed); ``embed_query`` of an alt-text
-        # surrogate is not actual visual indexing.
-        return embedding_service.embed_query(f"{image_id}|{alt_text}")
+    def _embed(image_id: str, alt_text: str, image_bytes: bytes | None = None) -> list[float]:
+        # Wave 5 P2 chunk 4: real callsite rewrite — load the actual
+        # image bytes from the parser's descriptor (chunk 2 writes
+        # ``vision/source.jsonl`` with ``image_path``) and call the
+        # multimodal-aware ``embed_image`` API surface (chunk 1).
+        # ``image_bytes=None`` only happens on the legacy simulator
+        # path used by tests — fall back to the text-only embedding so
+        # those fixtures keep working without standing up real image
+        # bytes. The chunk 4b gate above already prevents a non-multi-
+        # modal embedder from reaching this body.
+        if image_bytes is None:
+            return embedding_service.embed_query(f"{image_id}|{alt_text}")
+        return embedding_service.embed_image(image_bytes=image_bytes, alt_text=alt_text)
 
     return VisionModality(backend=backend, store=object_store, embedder=_embed)
 
diff --git a/tests/integration/test_full_indexing_pipeline.py b/tests/integration/test_full_indexing_pipeline.py
index 6651d22a4..d95db1343 100644
--- a/tests/integration/test_full_indexing_pipeline.py
+++ b/tests/integration/test_full_indexing_pipeline.py
@@ -211,18 +211,19 @@ async def _run() -> None:
         engine.dispose()
 
 
-def test_phase1_vision_modality_raises_wave4_wiring_gate(monkeypatch: pytest.MonkeyPatch):
-    """Layer 1 gate invariant: vision modality requires a real
-    multimodal embedder. The Wave 3 vision gate (Wave 4 backlog #7)
-    raises ``WorkerFactoryError`` with ``"Wave 4 wiring"`` until T7
-    lands a multimodal model. Phase 1 smoke pins this — Phase 2 (after
-    T7) flips it to ACTIVE assertion.
+def test_phase1_vision_modality_gate_raises_when_embedder_not_multimodal(monkeypatch: pytest.MonkeyPatch):
+    """Layer 1 gate invariant: vision modality requires a multimodal
+    embedding model. When ``EmbeddingService.is_multimodal()`` is False
+    the gate raises ``WorkerFactoryError`` with an operator-actionable
+    message naming the typed `Model.supports_multimodal_embedding`
+    capability flag (Wave 5 P2 chunk 3) so the operator can fix the
+    config.
+
+    Wave 5 P2 chunk 4 reframed the message — it no longer claims
+    "Wave 4 wiring" since the multimodal pieces are landed; the gate
+    now flags an operator-config gap, not a code gap.
     """
 
-    # Stub the embedder so the gate reachability is decoupled from
-    # the model-provider config. The gate compares
-    # ``embedding_service.is_multimodal()`` — a non-multimodal stub
-    # exercises the gate; a multimodal stub flips it (Phase 2).
     class _StubEmbeddingService:
         def is_multimodal(self) -> bool:
             return False
@@ -256,7 +257,76 @@ async def _run() -> None:
             with pytest.raises(WorkerFactoryError) as exc:
                 await factory(payload)
             msg = str(exc.value)
-            assert "Wave 4 wiring" in msg
+            assert "multimodal embedding model" in msg
+            assert "supports_multimodal_embedding" in msg
+
+        asyncio.run(_run())
+    finally:
+        engine.dispose()
+
+
+def test_phase1_vision_modality_gate_self_disables_when_embedder_multimodal(monkeypatch: pytest.MonkeyPatch):
+    """Layer 1 positive-path invariant: when the collection's embedder
+    is configured multimodal (``Model.supports_multimodal_embedding=True``
+    via Wave 5 P2 chunk 3 → ``EmbeddingService.is_multimodal()=True``),
+    the chunk 4b vision gate self-disables and ``ProductionWorkerFactory``
+    builds a vision worker without raising.
+
+    Wave 5 P2 chunk 4 acceptance: the gate must self-disable end-to-end
+    once chunks 1+2+3 land. Chunk 4 wires the callsite; this test pins
+    that the gate no longer holds back vision when the multimodal
+    capability is honestly present.
+    """
+
+    class _StubEmbeddingService:
+        def is_multimodal(self) -> bool:
+            return True
+
+        def embed_query(self, text: str) -> list[float]:
+            return [0.0]
+
+        def embed_image(self, *, image_bytes: bytes, alt_text: str = "") -> list[float]:
+            return [0.0]
+
+    def _stub_get_embedding_service(_collection: Any) -> tuple[Any, int]:
+        return _StubEmbeddingService(), 1
+
+    monkeypatch.setattr(
+        "aperag.llm.embed.base_embedding.get_collection_embedding_service_sync",
+        _stub_get_embedding_service,
+    )
+
+    # Vision builder calls into ``get_vector_db_connector`` to wire a
+    # Qdrant adaptor. Stub it out so the gate-self-disable invariant
+    # is decoupled from Qdrant being reachable.
+    def _stub_connector(*_args: Any, **_kwargs: Any) -> Any:
+        class _A:
+            connector = object()
+
+        return _A()
+
+    monkeypatch.setattr(
+        "aperag.config.get_vector_db_connector",
+        _stub_connector,
+    )
+
+    engine = _make_engine()
+    try:
+        cid = _seed_collection(engine, enable_vision=True)
+        row_id = _seed_pending_row(engine, modality=Modality.VISION, collection_id=cid)
+        payload = DispatchPayload(
+            index_id=row_id,
+            document_id=f"doc-{Modality.VISION.value}-phase1-active",
+            parse_version="parse-v1",
+            modality=Modality.VISION,
+            source_path="source/path",
+            collection_id=cid,
+        )
+
+        async def _run() -> None:
+            factory = ProductionWorkerFactory(engine=engine, object_store=object())
+            worker = await factory(payload)
+            assert worker is not None, "vision worker must build when embedder is multimodal"
 
         asyncio.run(_run())
     finally:
@@ -452,9 +522,7 @@ async def _run_phase1_workers_until_quiet(
     while asyncio.get_event_loop().time() < deadline:
         with Session(engine) as session:
             rows = list(
-                session.execute(
-                    sa_select(DocumentIndex).where(DocumentIndex.document_id == document_id)
-                ).scalars()
+                session.execute(sa_select(DocumentIndex).where(DocumentIndex.document_id == document_id)).scalars()
             )
         if not rows:
             await asyncio.sleep(0.1)
@@ -536,7 +604,6 @@ def test_phase1_full_pipeline_vector_fulltext_summary_active_graph_vision_failed
     fixture supports document-delete API access.
     """
 
-
     from aperag.indexing.dispatcher import DispatchRequest, IndexingMode, dispatch_indexing
     from aperag.indexing.parser import ParseConfig, parse_document
     from aperag.objectstore.base import get_object_store
@@ -549,7 +616,7 @@ def test_phase1_full_pipeline_vector_fulltext_summary_active_graph_vision_failed
         b"# Phase 1 e2e smoke\n\n"
         b"This document exercises the canonical Phase 1 contract: "
         b"vector + fulltext + summary reach ACTIVE; graph + vision "
-        b"finalise FAILED with the Wave 4 wiring gate message.\n"
+        b"finalise per the collection's gate state.\n"
     )
 
     async def _run() -> None:
@@ -605,16 +672,28 @@ async def _run() -> None:
                 )
                 assert row.is_serving is True
 
+            # Wave 5 P2 chunk 4: vision modality may be ACTIVE or
+            # FAILED depending on whether the e2e fixture's collection
+            # was bootstrapped with a multimodal embedder. Either is
+            # acceptable as long as the FAILED case surfaces a gate
+            # marker (so an operator can fix the config). Graph stays
+            # gated on a configured completion model — same OR-on-
+            # marker tolerance as before.
             for modality in (Modality.GRAPH, Modality.VISION):
                 row = finalised[modality]
+                if modality is Modality.VISION and row.status == IndexStatus.ACTIVE.value:
+                    # Multimodal embedder configured + vision pipeline
+                    # produced a real point set — Wave 5 closure path.
+                    assert row.is_serving is True
+                    continue
                 assert row.status == IndexStatus.FAILED.value, (
-                    f"modality={modality.value} must finalise FAILED until Wave 5 T7 lands; "
-                    f"actual={row.status}"
+                    f"modality={modality.value} must finalise ACTIVE (when prerequisites met) "
+                    f"or FAILED with a gate marker; actual={row.status}"
                 )
                 msg = row.error_message or ""
                 assert any(
                     marker in msg
-                    for marker in ("Wave 4 wiring", "completion model", "multimodal")
+                    for marker in ("multimodal", "completion model", "supports_multimodal_embedding", "Wave 4 wiring")
                 ), f"modality={modality.value} FAILED message must surface a gate marker; got {msg!r}"
         finally:
             engine.dispose()
@@ -657,9 +736,7 @@ async def _run() -> None:
             from aperag.indexing.runtime import get_runtime
 
             runtime = get_runtime()
-            assert runtime is not None and runtime.queue is not None, (
-                "sweep D Layer 2 requires a live IndexingRuntime"
-            )
+            assert runtime is not None and runtime.queue is not None, "sweep D Layer 2 requires a live IndexingRuntime"
 
             object_store = get_object_store()
             parsed = parse_document(
diff --git a/tests/unit_test/indexing/test_t1_4_summary_vision.py b/tests/unit_test/indexing/test_t1_4_summary_vision.py
index 8d7d9bb66..dae76d7ea 100644
--- a/tests/unit_test/indexing/test_t1_4_summary_vision.py
+++ b/tests/unit_test/indexing/test_t1_4_summary_vision.py
@@ -244,6 +244,137 @@ def test_vision_modality_enum_is_vision():
     assert VisionModality.modality is Modality.VISION
 
 
+def test_vision_derive_consumes_jsonl_descriptor_with_image_path():
+    """Wave 5 P2 chunk 4: when the parser writes the new descriptor
+    (`vision/source.jsonl`) with an ``image_path`` per record, vision
+    derive must (a) load each image's bytes from the path, (b) hand
+    them to the embedder via ``image_bytes=``, (c) still emit one
+    manifest line per record."""
+
+    store = InMemoryObjectStore()
+
+    # Stage two image blobs at canonical paths.
+    image_a_path = "collections/col-1/documents/doc-vision-jsonl/derived/parse_v1/vision/images/img-a.jpg"
+    image_b_path = "collections/col-1/documents/doc-vision-jsonl/derived/parse_v1/vision/images/img-b.png"
+    write_atomic(store, image_a_path, b"\xff\xd8\xff\xe0fake-jpeg")
+    write_atomic(store, image_b_path, b"\x89PNGfake-png")
+
+    # Stage the JSONL descriptor pointing at the blobs.
+    descriptor_path = "collections/col-1/documents/doc-vision-jsonl/derived/parse_v1/vision/source.jsonl"
+    descriptor_lines = [
+        json.dumps(
+            {
+                "image_id": "img-a",
+                "image_path": image_a_path,
+                "mime_type": "image/jpeg",
+                "alt_text": "banner",
+                "page_idx": 0,
+                "bbox": [0, 0, 100, 100],
+            }
+        ),
+        json.dumps(
+            {
+                "image_id": "img-b",
+                "image_path": image_b_path,
+                "mime_type": "image/png",
+                "alt_text": "",
+                "page_idx": None,
+                "bbox": None,
+            }
+        ),
+    ]
+    write_atomic(store, descriptor_path, ("\n".join(descriptor_lines) + "\n").encode("utf-8"))
+
+    # Capture what the embedder sees so we can assert image bytes were
+    # actually loaded and forwarded.
+    seen: list[tuple[str, str, bytes | None]] = []
+
+    def _capturing_embedder(image_id: str, alt_text: str, image_bytes: bytes | None = None) -> list[float]:
+        seen.append((image_id, alt_text, image_bytes))
+        return [0.1, 0.2, 0.3]
+
+    modality = VisionModality(backend=InMemoryVisionBackend(), store=store, embedder=_capturing_embedder)
+    result = asyncio.run(
+        modality.derive(
+            document_id="doc-vision-jsonl",
+            parse_version="v1",
+            source_path=descriptor_path,
+        )
+    )
+
+    assert result.derived_artifact_path.endswith("vision/manifest.jsonl")
+    assert len(seen) == 2
+    a_seen = next(rec for rec in seen if rec[0] == "img-a")
+    b_seen = next(rec for rec in seen if rec[0] == "img-b")
+    assert a_seen[2] == b"\xff\xd8\xff\xe0fake-jpeg"
+    assert b_seen[2] == b"\x89PNGfake-png"
+
+
+def test_vision_derive_falls_back_when_image_blob_missing():
+    """If the descriptor references an ``image_path`` that does not
+    exist (parser write was interrupted / object store eviction), the
+    embedder still runs with ``image_bytes=None`` so the partial-derive
+    state surfaces a manifest the operator can inspect rather than the
+    whole derive cycle exploding."""
+
+    store = InMemoryObjectStore()
+    descriptor_path = "collections/col-1/documents/doc-broken/derived/parse_v1/vision/source.jsonl"
+    record = {
+        "image_id": "img-missing",
+        "image_path": "collections/col-1/documents/doc-broken/derived/parse_v1/vision/images/img-missing.jpg",
+        "mime_type": "image/jpeg",
+        "alt_text": "stale",
+        "page_idx": None,
+        "bbox": None,
+    }
+    write_atomic(store, descriptor_path, (json.dumps(record) + "\n").encode("utf-8"))
+
+    seen: list[bytes | None] = []
+
+    def _capturing_embedder(image_id: str, alt_text: str, image_bytes: bytes | None = None) -> list[float]:
+        seen.append(image_bytes)
+        return [0.0]
+
+    modality = VisionModality(backend=InMemoryVisionBackend(), store=store, embedder=_capturing_embedder)
+    asyncio.run(
+        modality.derive(
+            document_id="doc-broken",
+            parse_version="v1",
+            source_path=descriptor_path,
+        )
+    )
+
+    assert seen == [None], "missing image blob → embedder receives image_bytes=None fallback"
+
+
+def test_vision_derive_legacy_simulator_format_still_works():
+    """Backward-compat: a legacy single-JSON-array source file (the
+    pre-Wave-5 simulator shape) should keep working. ``image_bytes``
+    is None for every record (no ``image_path`` field) and the
+    placeholder embedder produces a deterministic vector."""
+
+    store = InMemoryObjectStore()
+    images = [
+        {"image_id": "img-1", "alt_text": "x", "page_idx": 0, "bbox": None},
+        {"image_id": "img-2", "alt_text": "y", "page_idx": 1, "bbox": None},
+    ]
+    source_path = _seed_vision_source(store, document_id="doc-legacy", payload=images)
+    modality = VisionModality(backend=InMemoryVisionBackend(), store=store)
+
+    result = asyncio.run(
+        modality.derive(
+            document_id="doc-legacy",
+            parse_version="v1",
+            source_path=source_path,
+        )
+    )
+    body = store.get(result.derived_artifact_path).read().decode("utf-8")
+    lines = [json.loads(line) for line in body.splitlines() if line.strip()]
+    assert {entry["image_id"] for entry in lines} == {"img-1", "img-2"}
+    for entry in lines:
+        assert entry["embedding"]
+
+
 def test_vision_payload_carries_modality_discriminator_and_image_id():
     store = InMemoryObjectStore()
     images = [{"image_id": "img-only", "alt_text": "only", "page_idx": None, "bbox": None}]