Skip to content

Commit f7570d6

Browse files
committed
fix: add mask_code_blocks function to prevent code block titles from being misinterpreted
1 parent ae97742 commit f7570d6

1 file changed

Lines changed: 18 additions & 1 deletion

File tree

apps/common/utils/split_model.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,31 @@ def parse_title_level(text, content_level_pattern: List, index):
157157
return result
158158

159159

160+
def mask_code_blocks(text: str) -> str:
161+
"""
162+
将代码块内容替换为等长空格,防止代码块内的#被识别为标题
163+
"""
164+
result = list(text)
165+
for match in re.finditer(r'```[^\n]*\n.*?```', text, re.DOTALL):
166+
start = match.start()
167+
end = match.end()
168+
inner_start = text.index('\n', start) + 1
169+
closing_fence_start = text.rindex('```', start, end)
170+
for i in range(inner_start, closing_fence_start):
171+
if result[i] != '\n':
172+
result[i] = ' '
173+
return ''.join(result)
174+
175+
160176
def parse_level(text, pattern: str):
161177
"""
162178
获取正则匹配到的文本
163179
:param text: 需要匹配的文本
164180
:param pattern: 正则
165181
:return: 符合正则的文本
166182
"""
167-
level_content_list = list(map(to_tree_obj, [r[0:255] for r in re_findall(pattern, text) if r is not None]))
183+
masked_text = mask_code_blocks(text)
184+
level_content_list = list(map(to_tree_obj, [r[0:255] for r in re_findall(pattern, masked_text) if r is not None]))
168185
# 过滤掉空标题或只包含#和空白字符的标题
169186
filtered_list = [item for item in level_content_list
170187
if item['content'].strip(' ') and item['content'].replace('#', '').strip(' ')]

0 commit comments

Comments
 (0)