-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfix_blockquotes.py
More file actions
126 lines (101 loc) · 3.67 KB
/
fix_blockquotes.py
File metadata and controls
126 lines (101 loc) · 3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import re
import os
import glob
from pathlib import Path
def smart_break_line(content, max_length=110):
"""
Break a long blockquote line intelligently.
Returns a list of lines (without '> ' prefix).
"""
if len(content) <= max_length:
return [content]
lines = []
# Priority break points (in order of preference)
# 1. After closing bracket + sentence end if **TL;DR:** or similar prefix
match = re.match(r'^(\[![\w]+\]|\*\*[^:]+:\*\*)\s+(.*)', content)
if match:
prefix = match.group(1)
rest = match.group(2)
lines.append(prefix) # First line has just the marker
# Now break the rest
rest_lines = smart_break_line(rest, max_length)
lines.extend(rest_lines)
return lines
# 2. Break at logical points (in priority order)
break_points = [
(r'(\. )(?=[A-Z])', '. '), # After period before capital
(r'(\. )', '. '), # After any period
(r'(, )', ', '), # After comma
(r'( → )', ' → '), # Before/after arrow
(r'( \| )', ' | '), # Before/after pipe
(r'(: )', ': '), # After colon
(r'( )', ' '), # Last resort: space
]
current_line = ""
words = content.split()
for word in words:
test_line = (current_line + " " + word).lstrip()
if len(test_line) <= max_length:
current_line = test_line
else:
if current_line:
lines.append(current_line)
current_line = word
if current_line:
lines.append(current_line)
return lines
def fix_blockquotes(filepath):
"""
Read a markdown file, find long blockquotes (>150 chars),
and break them intelligently across multiple lines with '> ' prefix.
"""
try:
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
except Exception as e:
print(f"Error reading {filepath}: {e}")
return False
new_lines = []
changes_made = 0
for i, line in enumerate(lines):
# Check if it's a blockquote line
if line.startswith('>'):
# Remove the '> ' prefix and get content
content = line[1:].lstrip()
content = content.rstrip('\n')
# If it's longer than 150 chars, break it
if len(content) > 150:
broken = smart_break_line(content, max_length=110)
for broken_line in broken:
new_lines.append(f"> {broken_line}\n")
changes_made += 1
else:
new_lines.append(line)
else:
new_lines.append(line)
# Write back only if changes were made
if changes_made > 0:
try:
with open(filepath, 'w', encoding='utf-8') as f:
f.writelines(new_lines)
return True
except Exception as e:
print(f"Error writing {filepath}: {e}")
return False
return False
def main():
# Find all markdown files recursively
md_files = glob.glob('/Users/esmaeelnabil/Pdev/android/**/*.md', recursive=True)
total_files = len(md_files)
fixed_files = 0
print(f"Found {total_files} markdown files")
print("Processing...\n")
for filepath in sorted(md_files):
if fix_blockquotes(filepath):
fixed_files += 1
print(f"✓ Fixed: {filepath}")
print(f"\n{'='*60}")
print(f"Summary: Fixed {fixed_files}/{total_files} files")
print(f"{'='*60}")
if __name__ == '__main__':
main()