Android-KMP-Kotlin-IPCS/fix_blockquotes.py at main · EsmaeelNabil/Android-KMP-Kotlin-IPCS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import re
import os
import glob
from pathlib import Path

def smart_break_line(content, max_length=110):
    """
    Break a long blockquote line intelligently.
    Returns a list of lines (without '> ' prefix).
    """
    if len(content) <= max_length:
        return [content]

    lines = []

    # Priority break points (in order of preference)
    # 1. After closing bracket + sentence end if **TL;DR:** or similar prefix
    match = re.match(r'^(\[![\w]+\]|\*\*[^:]+:\*\*)\s+(.*)', content)
    if match:
        prefix = match.group(1)
        rest = match.group(2)
        lines.append(prefix)  # First line has just the marker

        # Now break the rest
        rest_lines = smart_break_line(rest, max_length)
        lines.extend(rest_lines)
        return lines

    # 2. Break at logical points (in priority order)
    break_points = [
        (r'(\. )(?=[A-Z])', '. '),      # After period before capital
        (r'(\. )', '. '),                # After any period
        (r'(, )', ', '),                 # After comma
        (r'( → )', ' → '),               # Before/after arrow
        (r'( \| )', ' | '),              # Before/after pipe
        (r'(: )', ': '),                 # After colon
        (r'( )', ' '),                   # Last resort: space
    ]

    current_line = ""
    words = content.split()

    for word in words:
        test_line = (current_line + " " + word).lstrip()

        if len(test_line) <= max_length:
            current_line = test_line
        else:
            if current_line:
                lines.append(current_line)
            current_line = word

    if current_line:
        lines.append(current_line)

    return lines


def fix_blockquotes(filepath):
    """
    Read a markdown file, find long blockquotes (>150 chars),
    and break them intelligently across multiple lines with '> ' prefix.
    """
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            lines = f.readlines()
    except Exception as e:
        print(f"Error reading {filepath}: {e}")
        return False

    new_lines = []
    changes_made = 0

    for i, line in enumerate(lines):
        # Check if it's a blockquote line
        if line.startswith('>'):
            # Remove the '> ' prefix and get content
            content = line[1:].lstrip()
            content = content.rstrip('\n')

            # If it's longer than 150 chars, break it
            if len(content) > 150:
                broken = smart_break_line(content, max_length=110)
                for broken_line in broken:
                    new_lines.append(f"> {broken_line}\n")
                changes_made += 1
            else:
                new_lines.append(line)
        else:
            new_lines.append(line)

    # Write back only if changes were made
    if changes_made > 0:
        try:
            with open(filepath, 'w', encoding='utf-8') as f:
                f.writelines(new_lines)
            return True
        except Exception as e:
            print(f"Error writing {filepath}: {e}")
            return False

    return False


def main():
    # Find all markdown files recursively
    md_files = glob.glob('/Users/esmaeelnabil/Pdev/android/**/*.md', recursive=True)

    total_files = len(md_files)
    fixed_files = 0

    print(f"Found {total_files} markdown files")
    print("Processing...\n")

    for filepath in sorted(md_files):
        if fix_blockquotes(filepath):
            fixed_files += 1
            print(f"✓ Fixed: {filepath}")

    print(f"\n{'='*60}")
    print(f"Summary: Fixed {fixed_files}/{total_files} files")
    print(f"{'='*60}")


if __name__ == '__main__':
    main()