Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions openviking/parse/parsers/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,17 +601,22 @@ def _generate_merged_filename(self, sections: List[Tuple[str, str, int]]) -> str
count = len(names)
max_len = self.MAX_MERGED_FILENAME_LENGTH

# Build a content-aware hash from ALL section names AND indices to guarantee
# uniqueness even when different merge groups share the same heading names.
full_key = "_".join(f"{n}:{i}" for n, _, i in sections)
hash_suffix = hashlib.sha256(full_key.encode()).hexdigest()[:8]

if count == 1:
name = names[0]
base = names[0]
else:
suffix = f"_{count}more"
max_first_len = max_len - len(suffix)
max_first_len = max_len - len(suffix) - 9 # reserve space for _hash
first_name = names[0][: max(max_first_len, 1)]
name = f"{first_name}{suffix}"
base = f"{first_name}{suffix}"

name = f"{base}_{hash_suffix}"

if len(name) > max_len:
full_key = "_".join(names)
hash_suffix = hashlib.sha256(full_key.encode()).hexdigest()[:8]
name = f"{name[: max_len - 9]}_{hash_suffix}"

name = name.strip("_")
Expand Down
44 changes: 44 additions & 0 deletions tests/parse/test_markdown_filename_collision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
# SPDX-License-Identifier: Apache-2.0
"""Tests for _generate_merged_filename uniqueness when headings collide."""

from openviking.parse.parsers.markdown import MarkdownParser
from openviking_cli.utils.config.parser_config import ParserConfig


class TestGenerateMergedFilenameCollision:
def _make_parser(self) -> MarkdownParser:
return MarkdownParser(ParserConfig())

def test_duplicate_heading_produces_unique_filenames(self):
"""Merge groups with same first heading but different content must get unique filenames."""
parser = self._make_parser()

group1 = [
("Our Culture", "content about values", 1),
("Our Culture", "content about mission", 2),
]
group2 = [
("Our Culture", "content about team", 3),
("Our Culture", "content about vision", 4),
]

name1 = parser._generate_merged_filename(group1)
name2 = parser._generate_merged_filename(group2)

assert name1 != name2, f"Filenames must be unique but both are '{name1}'"
assert "Our" in name1 or "Culture" in name1 # Still human-readable

def test_single_section_filename_still_works(self):
"""Single section should still produce a readable filename."""
parser = self._make_parser()

sections = [("Introduction", "some content", 1)]
name = parser._generate_merged_filename(sections)

assert "Introduction" in name

def test_empty_sections_returns_merged(self):
"""Empty sections list should return 'merged'."""
parser = self._make_parser()
assert parser._generate_merged_filename([]) == "merged"
Loading