ql-link
diff --git a/‎.env.example‎
Lines changed: 3 additions & 0 deletions b/‎.env.example‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/internals/chunking.md‎
Lines changed: 15 additions & 3 deletions b/‎docs/internals/chunking.md‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎docs/ops/configure.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/ops/configure.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/config.py‎
Lines changed: 8 additions & 0 deletions b/‎src/config.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/core/splitter/__init__.py‎
Lines changed: 7 additions & 4 deletions b/‎src/core/splitter/__init__.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎src/core/splitter/factory.py‎
Lines changed: 1 addition & 0 deletions b/‎src/core/splitter/factory.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/core/splitter/overlap.py‎
Lines changed: 136 additions & 0 deletions b/‎src/core/splitter/overlap.py‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎src/core/splitter/pipeline_chunker.py‎
Lines changed: 7 additions & 26 deletions b/‎src/core/splitter/pipeline_chunker.py‎
Lines changed: 7 additions & 26 deletions
@@ -53,6 +53,9 @@ CHUNKING_SEMANTIC_PERCENTILE=95
 CHUNKING_SEMANTIC_UNIT=sentence
 CHUNKING_MIN_CHUNK_TOKENS=150
 CHUNKING_MAX_CHUNK_TOKENS=512
+# 是否启用相邻 chunk overlap；关闭后 CHUNKING_OVERLAP_TOKENS 不生效
+CHUNKING_OVERLAP_ENABLED=true
+# overlap token 数允许范围：0-64
 CHUNKING_OVERLAP_TOKENS=64
 CHUNKING_MIN_DISTANCE_GATE=0.25
 CHUNKING_EMBED_BATCH_SIZE=32
 
@@ -11,6 +11,7 @@ src/core/splitter/
 ├── chunking_engine.py      # Markdown 解析与分片编排入口
 ├── rule_chunker.py         # 基于 Markdown AST 的规则分片
 ├── semantic_chunker.py     # 基于 embedding 距离的语义细分
+├── overlap.py              # chunk overlap 配置与上下文拼接
 ├── pipeline_chunker.py     # 结构分片 + 语义细分两阶段分片器
 └── embedding_pipeline.py   # Chunk 向量化批处理管线
 ```
@@ -88,13 +89,24 @@ class BaseChunker(ABC):
 - 先按 `semantic_unit` 配置把文本拆成语义比较原子；默认 `sentence` 保持原有段落、行、句子逐级降级行为，`paragraph` 则以段落作为相似度计算单位。
 - 调用 embedding 模型计算相邻原子的语义距离。
 - 使用距离分位数作为动态阈值寻找断点。
-- 受 `min_chunk_tokens`、`max_chunk_tokens`、`overlap_tokens` 控制。
+- 受 `min_chunk_tokens`、`max_chunk_tokens` 控制；overlap 由独立配置控制，但仍在原切分位置追加，保证算法流程不变。
 
 `paragraph` 模式只改变相似度计算粒度：单个段落超过 `max_chunk_tokens` 时，不会再改用句子级 embedding 计算断点，但最终输出仍会做长度保底拆分，避免生成超长 Chunk。
 
 它通常不直接作为主分片器使用，而是被 `StructuredSemanticChunker` 注入。
 
-### 3.3 StructuredSemanticChunker
+### 3.3 ChunkOverlapper
+
+`ChunkOverlapper` 负责相邻 Chunk 的上下文 overlap，不参与语义断点计算。
+
+配置：
+
+- `CHUNKING_OVERLAP_ENABLED`：是否启用 overlap。
+- `CHUNKING_OVERLAP_TOKENS`：启用后追加的 token 数上限，范围 `0..64`。
+
+`CHUNKING_OVERLAP_ENABLED=false` 或 `CHUNKING_OVERLAP_TOKENS=0` 时，不追加 overlap。默认 `true + 64` 保持现有分片行为。
+
+### 3.4 StructuredSemanticChunker
 
 `StructuredSemanticChunker` 是两阶段分片器：
 
@@ -201,7 +213,7 @@ chunks = engine.process(markdown)
 修改语义分片时关注：
 
 - token 上下限是否合理。
-- overlap 是否造成内容膨胀。
+- overlap 是否按 `CHUNKING_OVERLAP_ENABLED` 与 `CHUNKING_OVERLAP_TOKENS` 生效，且没有造成内容膨胀。
 - embedding 调用是否批量且可测试。
 - 语义断点失败时是否有 fallback。
 
 
@@ -86,7 +86,8 @@
 | --- | --- | --- |
 | `CHUNKING_MIN_CHUNK_TOKENS` | 150 | 短文档可减小 |
 | `CHUNKING_MAX_CHUNK_TOKENS` | 512 | 长上下文模型可加大 |
-| `CHUNKING_OVERLAP_TOKENS` | 64 | 提升召回时加大 |
+| `CHUNKING_OVERLAP_ENABLED` | `true` | 是否启用相邻 chunk overlap |
+| `CHUNKING_OVERLAP_TOKENS` | 64 | overlap token 数，范围 `0..64` |
 | `CHUNKING_HEADING_BREAK_LEVEL` | 3 | 提升结构敏感性时减小 |
 | `CHUNKING_SEMANTIC_PERCENTILE` | 95 | 调整语义边界严格度 |
 | `CHUNKING_SEMANTIC_UNIT` | `sentence` | 语义相似度计算粒度：`sentence` / `paragraph` |
 
@@ -112,6 +112,7 @@ def assemble_redis_url(cls, v: Optional[str], info) -> str:
     CHUNKING_SEMANTIC_UNIT: str = "sentence"
     CHUNKING_MIN_CHUNK_TOKENS: int = 150
     CHUNKING_MAX_CHUNK_TOKENS: int = 512
+    CHUNKING_OVERLAP_ENABLED: bool = True
     CHUNKING_OVERLAP_TOKENS: int = 64
     CHUNKING_MIN_DISTANCE_GATE: float = 0.25
     CHUNKING_EMBED_BATCH_SIZE: int = 32
@@ -124,6 +125,13 @@ def validate_chunking_semantic_unit(cls, v: str) -> str:
             raise ValueError("CHUNKING_SEMANTIC_UNIT must be 'sentence' or 'paragraph'")
         return normalized
 
+    @field_validator("CHUNKING_OVERLAP_TOKENS")
+    @classmethod
+    def validate_chunking_overlap_tokens(cls, v: int) -> int:
+        if v < 0 or v > 64:
+            raise ValueError("CHUNKING_OVERLAP_TOKENS must be between 0 and 64")
+        return v
+
     # ==========================================
     # 向量数据库配置 (Vector Store)
     # ==========================================
 
@@ -10,12 +10,8 @@
     Chunk           — 分片数据模型
 """
 
-from .models import Chunk, EmbeddedChunk, EmbeddingPipelineStats
 from .base import BaseChunker
 from .chunking_engine import ChunkingEngine
-from .rule_chunker import ASTAwareChunker
-from .pipeline_chunker import StructuredSemanticChunker
-from .semantic_chunker import PercentileSemanticChunker, SemanticSplitter
 from .embedding_pipeline import ChunkEmbeddingPipeline
 from .factory import (
     LazyEmbeddingClient,
@@ -24,6 +20,11 @@
     create_lazy_system_embedding_client,
     create_system_embedding_client,
 )
+from .models import Chunk, EmbeddedChunk, EmbeddingPipelineStats
+from .overlap import ChunkOverlapConfig, ChunkOverlapper
+from .pipeline_chunker import StructuredSemanticChunker
+from .rule_chunker import ASTAwareChunker
+from .semantic_chunker import PercentileSemanticChunker, SemanticSplitter
 
 __all__ = [
     "Chunk",
@@ -32,6 +33,8 @@
     "BaseChunker",
     "ChunkingEngine",
     "ASTAwareChunker",
+    "ChunkOverlapConfig",
+    "ChunkOverlapper",
     "StructuredSemanticChunker",
     "PercentileSemanticChunker",
     "SemanticSplitter",
 
@@ -133,6 +133,7 @@ def create_chunking_engine() -> ChunkingEngine:
             semantic_unit=settings.CHUNKING_SEMANTIC_UNIT,
             min_chunk_tokens=settings.CHUNKING_MIN_CHUNK_TOKENS,
             max_chunk_tokens=settings.CHUNKING_MAX_CHUNK_TOKENS,
+            overlap_enabled=settings.CHUNKING_OVERLAP_ENABLED,
             overlap_tokens=settings.CHUNKING_OVERLAP_TOKENS,
             min_distance_gate=settings.CHUNKING_MIN_DISTANCE_GATE,
         )
 
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+"""Chunk overlap 配置与文本上下文处理工具。"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from src.core.llm.tokenizer import Tokenizer
+else:
+    Tokenizer = Any
+
+
+@dataclass(slots=True)
+class ChunkOverlapConfig:
+    """描述 chunk overlap 的独立配置。"""
+
+    enabled: bool = True
+    tokens: int = 64
+
+    def __post_init__(self) -> None:
+        if self.tokens < 0 or self.tokens > 64:
+            raise ValueError("overlap tokens must be between 0 and 64.")
+
+
+class ChunkOverlapper:
+    """集中处理 chunk overlap 的 token 截取与上下文拼接。"""
+
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        config: ChunkOverlapConfig | None = None,
+    ) -> None:
+        self.tokenizer = tokenizer
+        self.config = config or ChunkOverlapConfig()
+
+    @property
+    def effective_tokens(self) -> int:
+        """返回当前实际启用的 overlap token 数。"""
+        if not self.config.enabled:
+            return 0
+        return self.config.tokens
+
+    def count_tokens(self, text: str) -> int:
+        """统计文本 token 数。"""
+        return self.tokenizer.count_tokens(text.strip()) if text else 0
+
+    def take_first_tokens(self, text: str, token_limit: int) -> str:
+        """取出文本开头的指定数量 token。"""
+        if not text or token_limit <= 0:
+            return ""
+        truncated, _ = self.tokenizer.truncate_text(text, token_limit)
+        return truncated.strip()
+
+    def take_last_tokens(self, text: str, token_limit: int) -> str:
+        """取出文本末尾的指定数量 token。"""
+        cleaned = text.strip()
+        if not cleaned or token_limit <= 0:
+            return ""
+        if self.count_tokens(cleaned) <= token_limit:
+            return cleaned
+
+        left = 0
+        right = len(cleaned) - 1
+        best_start = right
+
+        while left <= right:
+            mid = (left + right) // 2
+            candidate = cleaned[mid:].lstrip()
+            tokens = self.count_tokens(candidate)
+            if tokens <= token_limit:
+                best_start = mid
+                right = mid - 1
+            else:
+                left = mid + 1
+
+        return cleaned[best_start:].lstrip()
+
+    def build_next_chunk(
+        self,
+        previous_chunk: str,
+        next_atom: str,
+        *,
+        max_chunk_tokens: int,
+    ) -> str:
+        """在切分发生时，为下一块追加上一块尾部 overlap。"""
+        overlap_budget = self.effective_tokens
+        if overlap_budget <= 0:
+            return next_atom
+
+        next_tokens = self.count_tokens(next_atom)
+        available_for_overlap = max(0, max_chunk_tokens - next_tokens)
+        if available_for_overlap <= 0:
+            return next_atom
+
+        overlap_tail = self.take_last_tokens(
+            previous_chunk,
+            min(overlap_budget, available_for_overlap),
+        )
+        if not overlap_tail:
+            return next_atom
+
+        return f"{overlap_tail}\n\n{next_atom}".strip()
+
+    def build_neighbor_context(
+        self,
+        *,
+        previous_content: str | None,
+        current_content: str,
+        next_content: str | None,
+    ) -> tuple[str, int, int]:
+        """为最终 chunk 构造相邻上下文，并返回实际追加的前后 token 数。"""
+        overlap_budget = self.effective_tokens
+        if overlap_budget <= 0:
+            return current_content, 0, 0
+
+        contextual_parts: list[str] = []
+        previous_tokens = 0
+        next_tokens = 0
+
+        if previous_content:
+            previous_context = self.take_last_tokens(previous_content, overlap_budget)
+            if previous_context:
+                previous_tokens = self.count_tokens(previous_context)
+                contextual_parts.append(previous_context)
+
+        contextual_parts.append(current_content)
+
+        if next_content:
+            next_context = self.take_first_tokens(next_content, overlap_budget)
+            if next_context:
+                next_tokens = self.count_tokens(next_context)
+                contextual_parts.append(next_context)
+
+        return "\n\n".join(contextual_parts).strip(), previous_tokens, next_tokens
@@ -151,38 +151,19 @@ def _apply_neighbor_context(self, chunks: list[Chunk]) -> list[Chunk]:
         Returns:
             list[Chunk]: 追加邻接上下文后的 Chunk 列表。
         """
-        overlap_budget = self.semantic_chunker._resolve_overlap_tokens()
-        if overlap_budget <= 0 or len(chunks) <= 1:
+        if self.semantic_chunker.overlapper.effective_tokens <= 0 or len(chunks) <= 1:
             return chunks
 
         base_contents = [chunk.content for chunk in chunks]
 
         for index, chunk in enumerate(chunks):
-            contextual_parts: list[str] = []
-            previous_tokens = 0
-            next_tokens = 0
-
-            if index > 0:
-                previous_context = self.semantic_chunker._take_last_tokens(
-                    base_contents[index - 1],
-                    overlap_budget,
+            chunk.content, previous_tokens, next_tokens = (
+                self.semantic_chunker.overlapper.build_neighbor_context(
+                    previous_content=base_contents[index - 1] if index > 0 else None,
+                    current_content=base_contents[index],
+                    next_content=base_contents[index + 1] if index + 1 < len(chunks) else None,
                 )
-                if previous_context:
-                    previous_tokens = self.semantic_chunker.tokenizer.count_tokens(previous_context)
-                    contextual_parts.append(previous_context)
-
-            contextual_parts.append(base_contents[index])
-
-            if index + 1 < len(chunks):
-                next_context = self.semantic_chunker._take_first_tokens(
-                    base_contents[index + 1],
-                    overlap_budget,
-                )
-                if next_context:
-                    next_tokens = self.semantic_chunker.tokenizer.count_tokens(next_context)
-                    contextual_parts.append(next_context)
-
-            chunk.content = "\n\n".join(contextual_parts).strip()
+            )
             if previous_tokens > 0:
                 chunk.metadata["context_prev_tokens_applied"] = previous_tokens
             if next_tokens > 0:
Original file line number	Diff line number	Diff line change
`@@ -133,6 +133,7 @@ def create_chunking_engine() -> ChunkingEngine:`
`133`	`133`	`semantic_unit=settings.CHUNKING_SEMANTIC_UNIT,`
`134`	`134`	`min_chunk_tokens=settings.CHUNKING_MIN_CHUNK_TOKENS,`
`135`	`135`	`max_chunk_tokens=settings.CHUNKING_MAX_CHUNK_TOKENS,`
	`136`	`+ overlap_enabled=settings.CHUNKING_OVERLAP_ENABLED,`
`136`	`137`	`overlap_tokens=settings.CHUNKING_OVERLAP_TOKENS,`
`137`	`138`	`min_distance_gate=settings.CHUNKING_MIN_DISTANCE_GATE,`
`138`	`139`	`)`