@@ -60,14 +60,17 @@ def test_detect_language_mixed_chinese_english(self):
6060class TestLanguageFlow :
6161 """语言检测 + 模板渲染流程测试。"""
6262
63- @pytest .mark .parametrize ("lang,content,file_name" , [
64- ("zh-CN" , "这是一个中文Python文件,包含测试代码" , "chinese_code.py" ),
65- ("en" , "This is an English Python file for testing" , "english_code.py" ),
66- ("ja" , "これは日本語のPythonコードテストファイルです" , "japanese_code.py" ),
67- ("ko" , "이것은 한국어 Python 코드 테스트 파일입니다" , "korean_code.py" ),
68- ("ru" , "Это русский тестовый файл Python кода" , "russian_code.py" ),
69- ("ar" , "هذا ملف اختبار كود بايثون عربي" , "arabic_code.py" ),
70- ])
63+ @pytest .mark .parametrize (
64+ "lang,content,file_name" ,
65+ [
66+ ("zh-CN" , "这是一个中文Python文件,包含测试代码" , "chinese_code.py" ),
67+ ("en" , "This is an English Python file for testing" , "english_code.py" ),
68+ ("ja" , "これは日本語のPythonコードテストファイルです" , "japanese_code.py" ),
69+ ("ko" , "이것은 한국어 Python 코드 테스트 파일입니다" , "korean_code.py" ),
70+ ("ru" , "Это русский тестовый файл Python кода" , "russian_code.py" ),
71+ ("ar" , "هذا ملف اختبار كود بايثون عربي" , "arabic_code.py" ),
72+ ],
73+ )
7174 def test_language_detection_to_template_flow (self , lang , content , file_name ):
7275 """语言检测 -> output_language 注入模板 -> prompt 包含语言指令"""
7376 detected_lang = _detect_language_from_text (content , fallback_language = "en" )
@@ -83,11 +86,14 @@ def test_language_detection_to_template_flow(self, lang, content, file_name):
8386class TestOverviewGenerationFlow :
8487 """目录概述生成流程测试。"""
8588
86- @pytest .mark .parametrize ("lang,file_summaries" , [
87- ("zh-CN" , "[1] file1.py: 这是一个Python文件\n [2] file2.py: 这是另一个文件" ),
88- ("en" , "[1] file1.py: This is a Python file\n [2] file2.py: Another file" ),
89- ("ja" , "[1] file1.py: それはPythonファイルです\n [2] file2.py: これもPython" ),
90- ])
89+ @pytest .mark .parametrize (
90+ "lang,file_summaries" ,
91+ [
92+ ("zh-CN" , "[1] file1.py: 这是一个Python文件\n [2] file2.py: 这是另一个文件" ),
93+ ("en" , "[1] file1.py: This is a Python file\n [2] file2.py: Another file" ),
94+ ("ja" , "[1] file1.py: それはPythonファイルです\n [2] file2.py: これもPython" ),
95+ ],
96+ )
9197 def test_overview_generation_language_flow (self , lang , file_summaries ):
9298 """目录摘要 -> 语言检测 -> overview 模板"""
9399 detected_lang = _detect_language_from_text (file_summaries , fallback_language = "en" )
@@ -104,6 +110,31 @@ def test_overview_generation_language_flow(self, lang, file_summaries):
104110 )
105111 assert f"Output Language: { lang } " in prompt
106112
113+ def test_overview_generation_prompt_preserves_repository_hierarchy (self ):
114+ prompt = render_prompt (
115+ "semantic.overview_generation" ,
116+ {
117+ "dir_name" : "repo-root" ,
118+ "file_summaries" : "[1] pyproject.toml: Python project config" ,
119+ "children_abstracts" : "- backend/: API service\n - frontend/: web UI" ,
120+ "output_language" : "en" ,
121+ },
122+ )
123+
124+ assert "Relationship rules:" in prompt
125+ assert (
126+ "- Treat child directories as parts of the same repository unless the summaries clearly show they are independent projects."
127+ in prompt
128+ )
129+ assert (
130+ "- Do not describe every child directory as an independent project by default."
131+ in prompt
132+ )
133+ assert (
134+ "- When the summaries suggest a code repository, explain how subdirectories relate to the whole repo, such as services, libraries, apps, modules, or support folders."
135+ in prompt
136+ )
137+
107138
108139class LanguageAwareMockVLM :
109140 """语言感知的 MockVLM,根据 prompt 中的 Output Language 返回对应语言的响应。"""
@@ -161,19 +192,27 @@ def temp_multilang_files(self):
161192 files ["chinese_py" ].write_text ("# 中文Python文件\n def 你好():\n print('你好世界')\n " )
162193
163194 files ["english_py" ] = tmppath / "english_code.py"
164- files ["english_py" ].write_text ("# English Python file\n def hello():\n print('Hello World')\n " )
195+ files ["english_py" ].write_text (
196+ "# English Python file\n def hello():\n print('Hello World')\n "
197+ )
165198
166199 files ["japanese_py" ] = tmppath / "japanese_code.py"
167- files ["japanese_py" ].write_text ("# 日本語Pythonファイル\n def こんにちは():\n print('こんにちは世界')\n " )
200+ files ["japanese_py" ].write_text (
201+ "# 日本語Pythonファイル\n def こんにちは():\n print('こんにちは世界')\n "
202+ )
168203
169204 files ["korean_py" ] = tmppath / "korean_code.py"
170- files ["korean_py" ].write_text ("# 한국어 Python 파일\n def 안녕하세요():\n print('안녕하세요')\n " )
205+ files ["korean_py" ].write_text (
206+ "# 한국어 Python 파일\n def 안녕하세요():\n print('안녕하세요')\n "
207+ )
171208
172209 files ["chinese_md" ] = tmppath / "chinese_doc.md"
173210 files ["chinese_md" ].write_text ("# 中文文档\n \n 这是一个测试文档,包含中文技术内容。\n " )
174211
175212 files ["english_md" ] = tmppath / "english_doc.md"
176- files ["english_md" ].write_text ("# English Documentation\n \n This is a test document with English content.\n " )
213+ files ["english_md" ].write_text (
214+ "# English Documentation\n \n This is a test document with English content.\n "
215+ )
177216
178217 yield files
179218
@@ -191,14 +230,17 @@ def _create_mock_config(self, mock_vlm: LanguageAwareMockVLM) -> MagicMock:
191230 return mock_config
192231
193232 @pytest .mark .asyncio
194- @pytest .mark .parametrize ("file_key,file_name,expected_lang" , [
195- ("chinese_py" , "chinese_code.py" , "zh-CN" ),
196- ("english_py" , "english_code.py" , "en" ),
197- ("japanese_py" , "japanese_code.py" , "ja" ),
198- ("korean_py" , "korean_code.py" , "ko" ),
199- ("chinese_md" , "chinese_doc.md" , "zh-CN" ),
200- ("english_md" , "english_doc.md" , "en" ),
201- ])
233+ @pytest .mark .parametrize (
234+ "file_key,file_name,expected_lang" ,
235+ [
236+ ("chinese_py" , "chinese_code.py" , "zh-CN" ),
237+ ("english_py" , "english_code.py" , "en" ),
238+ ("japanese_py" , "japanese_code.py" , "ja" ),
239+ ("korean_py" , "korean_code.py" , "ko" ),
240+ ("chinese_md" , "chinese_doc.md" , "zh-CN" ),
241+ ("english_md" , "english_doc.md" , "en" ),
242+ ],
243+ )
202244 async def test_e2e_code_output_language (
203245 self , temp_multilang_files , file_key , file_name , expected_lang
204246 ):
@@ -210,8 +252,14 @@ async def test_e2e_code_output_language(
210252 mock_viking_fs = self ._create_mock_viking_fs (content )
211253 mock_config = self ._create_mock_config (mock_vlm )
212254
213- with patch ("openviking.storage.queuefs.semantic_processor.get_viking_fs" , return_value = mock_viking_fs ):
214- with patch ("openviking.storage.queuefs.semantic_processor.get_openviking_config" , return_value = mock_config ):
255+ with patch (
256+ "openviking.storage.queuefs.semantic_processor.get_viking_fs" ,
257+ return_value = mock_viking_fs ,
258+ ):
259+ with patch (
260+ "openviking.storage.queuefs.semantic_processor.get_openviking_config" ,
261+ return_value = mock_config ,
262+ ):
215263 processor = SemanticProcessor ()
216264 processor ._current_ctx = MagicMock ()
217265
@@ -222,17 +270,22 @@ async def test_e2e_code_output_language(
222270 )
223271
224272 prompt_sent = mock_vlm .prompts_received [0 ]
225- assert f"Output Language: { expected_lang } " in prompt_sent , \
273+ assert f"Output Language: { expected_lang } " in prompt_sent , (
226274 f"{ file_name } : Prompt missing Output Language: { expected_lang } "
275+ )
227276
228- assert _verify_content_language (result ["summary" ], expected_lang ), \
277+ assert _verify_content_language (result ["summary" ], expected_lang ), (
229278 f"{ file_name } : Content language mismatch. Expected { expected_lang } , got: { result ['summary' ]} "
279+ )
230280
231281 @pytest .mark .asyncio
232- @pytest .mark .parametrize ("content,file_name,expected_lang" , [
233- ("Это русский тестовый файл Python" , "russian_code.py" , "ru" ),
234- ("هذا ملف اختبار كود بايثون عربي" , "arabic_code.py" , "ar" ),
235- ])
282+ @pytest .mark .parametrize (
283+ "content,file_name,expected_lang" ,
284+ [
285+ ("Это русский тестовый файл Python" , "russian_code.py" , "ru" ),
286+ ("هذا ملف اختبار كود بايثون عربي" , "arabic_code.py" , "ar" ),
287+ ],
288+ )
236289 async def test_e2e_russian_arabic_output_language (self , content , file_name , expected_lang ):
237290 """端到端测试:俄文和阿拉伯文内容"""
238291 from openviking .storage .queuefs .semantic_processor import SemanticProcessor
@@ -241,8 +294,14 @@ async def test_e2e_russian_arabic_output_language(self, content, file_name, expe
241294 mock_viking_fs = self ._create_mock_viking_fs (content )
242295 mock_config = self ._create_mock_config (mock_vlm )
243296
244- with patch ("openviking.storage.queuefs.semantic_processor.get_viking_fs" , return_value = mock_viking_fs ):
245- with patch ("openviking.storage.queuefs.semantic_processor.get_openviking_config" , return_value = mock_config ):
297+ with patch (
298+ "openviking.storage.queuefs.semantic_processor.get_viking_fs" ,
299+ return_value = mock_viking_fs ,
300+ ):
301+ with patch (
302+ "openviking.storage.queuefs.semantic_processor.get_openviking_config" ,
303+ return_value = mock_config ,
304+ ):
246305 processor = SemanticProcessor ()
247306 processor ._current_ctx = MagicMock ()
248307
@@ -255,5 +314,6 @@ async def test_e2e_russian_arabic_output_language(self, content, file_name, expe
255314 prompt_sent = mock_vlm .prompts_received [0 ]
256315 assert f"Output Language: { expected_lang } " in prompt_sent
257316
258- assert _verify_content_language (result ["summary" ], expected_lang ), \
317+ assert _verify_content_language (result ["summary" ], expected_lang ), (
259318 f"{ file_name } : Content language mismatch. Expected { expected_lang } , got: { result ['summary' ]} "
319+ )
0 commit comments