Skip to content

Commit ee1b5cb

Browse files
committed
fix: remove unused pytest import and fix whitespace
1 parent b5c573f commit ee1b5cb

2 files changed

Lines changed: 21 additions & 25 deletions

File tree

docs/Fragmentation.md

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ class FragmentRenderer(mistune.BaseRenderer):
102102
def __init__(self):
103103
self.fragments = []
104104
self.current_line = 1
105-
105+
106106
def heading(self, text, level, raw=None):
107107
self.fragments.append({
108108
'kind': 'heading',
@@ -111,15 +111,15 @@ class FragmentRenderer(mistune.BaseRenderer):
111111
'line': self.current_line
112112
})
113113
return ''
114-
114+
115115
def paragraph(self, text):
116116
self.fragments.append({
117117
'kind': 'paragraph',
118118
'content': text,
119119
'line': self.current_line
120120
})
121121
return ''
122-
122+
123123
def block_code(self, code, info=None):
124124
self.fragments.append({
125125
'kind': 'code_block',
@@ -185,7 +185,7 @@ import pysbd
185185
def split_plain_text(text: str, language: str = 'en') -> list[Fragment]:
186186
seg = pysbd.Segmenter(language=language, clean=False)
187187
sentences = seg.segment(text)
188-
188+
189189
fragments = []
190190
current_line = 1
191191
for sentence in sentences:
@@ -196,7 +196,7 @@ def split_plain_text(text: str, language: str = 'en') -> list[Fragment]:
196196
start_line=current_line
197197
))
198198
current_line += sentence.count('\n')
199-
199+
200200
return merge_into_paragraphs(fragments)
201201
```
202202

@@ -238,7 +238,7 @@ class FragmentationStrategy(Protocol):
238238

239239
class TreeSitterStrategy:
240240
"""Стратегия для кода через tree-sitter"""
241-
241+
242242
SUPPORTED_EXTENSIONS = {
243243
'.py': 'python',
244244
'.js': 'javascript',
@@ -251,10 +251,10 @@ class TreeSitterStrategy:
251251
'.rb': 'ruby',
252252
# ... 50+ языков
253253
}
254-
254+
255255
def can_handle(self, path: Path, content: str) -> bool:
256256
return path.suffix.lower() in self.SUPPORTED_EXTENSIONS
257-
257+
258258
def fragment(self, path: Path, content: str) -> list[Fragment]:
259259
lang = self.SUPPORTED_EXTENSIONS[path.suffix.lower()]
260260
# Динамическая загрузка парсера
@@ -264,10 +264,10 @@ class TreeSitterStrategy:
264264

265265
class MarkdownStrategy:
266266
"""Стратегия для Markdown через mistune"""
267-
267+
268268
def can_handle(self, path: Path, content: str) -> bool:
269269
return path.suffix.lower() in {'.md', '.markdown', '.mdx'}
270-
270+
271271
def fragment(self, path: Path, content: str) -> list[Fragment]:
272272
# Используем mistune для парсинга
273273
fragments = []
@@ -276,21 +276,21 @@ class MarkdownStrategy:
276276

277277
class ConfigStrategy:
278278
"""Стратегия для конфигов (YAML, TOML, JSON)"""
279-
279+
280280
def can_handle(self, path: Path, content: str) -> bool:
281281
return path.suffix.lower() in {'.yaml', '.yml', '.toml', '.json'}
282-
282+
283283
def fragment(self, path: Path, content: str) -> list[Fragment]:
284284
if path.suffix.lower() in {'.yaml', '.yml'}:
285285
return self._fragment_yaml(path, content)
286286
# ...
287287

288288
class PlainTextStrategy:
289289
"""Fallback стратегия для plain text"""
290-
290+
291291
def can_handle(self, path: Path, content: str) -> bool:
292292
return True # Обрабатывает всё
293-
293+
294294
def fragment(self, path: Path, content: str) -> list[Fragment]:
295295
# pySBD для sentence detection
296296
# Группировка в параграфы
@@ -299,7 +299,7 @@ class PlainTextStrategy:
299299

300300
class FragmentationEngine:
301301
"""Основной движок с chain of responsibility"""
302-
302+
303303
def __init__(self):
304304
# Порядок важен — от специфичных к общим
305305
self.strategies: list[FragmentationStrategy] = [
@@ -309,7 +309,7 @@ class FragmentationEngine:
309309
HTMLStrategy(),
310310
PlainTextStrategy(), # Fallback
311311
]
312-
312+
313313
def fragment(self, path: Path, content: str) -> list[Fragment]:
314314
for strategy in self.strategies:
315315
if strategy.can_handle(path, content):
@@ -318,7 +318,7 @@ class FragmentationEngine:
318318
except Exception as e:
319319
logging.warning(f"Strategy {strategy} failed: {e}")
320320
continue
321-
321+
322322
# Если всё упало — базовый chunk по строкам
323323
return self._fallback_chunk(path, content)
324324
```
@@ -402,14 +402,14 @@ class TreeSitterStrategy:
402402
def __init__(self):
403403
self._parsers = {}
404404
self._available = self._check_availability()
405-
405+
406406
def _check_availability(self) -> bool:
407407
try:
408408
import tree_sitter
409409
return True
410410
except ImportError:
411411
return False
412-
412+
413413
def can_handle(self, path: Path, content: str) -> bool:
414414
if not self._available:
415415
return False
@@ -435,4 +435,4 @@ class TreeSitterStrategy:
435435
| ruamel.yaml | ~300KB | - | Нет |
436436
| lxml | ~10MB wheel | libxml2 | Нет |
437437

438-
**Рекомендация**: Все библиотеки optional, базовый функционал работает без них через fallback.
438+
**Рекомендация**: Все библиотеки optional, базовый функционал работает без них через fallback.

tests/test_fragment_integrity.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from pathlib import Path
22

3-
import pytest
4-
53
from treemapper.diffctx.fragments import (
64
GenericStrategy,
75
ParagraphStrategy,
@@ -216,9 +214,7 @@ def test_large_paragraph_split_at_sentence(self):
216214
content = frag.content.rstrip()
217215
if content and not content.endswith((".", "!", "?")):
218216
last_word = content.split()[-1] if content.split() else ""
219-
assert last_word.endswith(
220-
(".", "!", "?", '"')
221-
), f"Fragment should end at sentence boundary: ...{content[-50:]}"
217+
assert last_word.endswith((".", "!", "?", '"')), f"Fragment should end at sentence boundary: ...{content[-50:]}"
222218

223219

224220
class TestSmartSplitPoint:

0 commit comments

Comments
 (0)