Cobol-Moderniser/main.py at main · Atri2-code/Cobol-Moderniser · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
"""
cobol-moderniser — main entry point

Usage:
    python main.py --input samples/payroll.cbl
    python main.py --input samples/payroll.cbl --report output/report.md
    python main.py --input samples/payroll.cbl --ast
    python main.py --input samples/payroll.cbl --annotate
"""

import argparse
import os
import sys

from parser.structure   import parse
from parser.antipatterns import detect
from parser.complexity  import score_all
from reporter.markdown  import render


def main():
    parser = argparse.ArgumentParser(
        description="COBOL Moderniser — static analysis and annotation tool"
    )
    parser.add_argument("--input",    required=True, help="Path to .cbl source file")
    parser.add_argument("--report",   default=None,  help="Output path for Markdown report")
    parser.add_argument("--ast",      action="store_true", help="Print parsed structure to stdout")
    parser.add_argument("--annotate", action="store_true", help="Generate LLM paragraph annotations")
    args = parser.parse_args()

    if not os.path.exists(args.input):
        print(f"Error: file not found: {args.input}", file=sys.stderr)
        sys.exit(1)

    with open(args.input, 'r', encoding='utf-8', errors='replace') as f:
        source = f.read()

    # ── Parse ────────────────────────────────────────────────────────────────
    prog = parse(source)

    if args.ast:
        print(f"Program ID : {prog.program_id}")
        print(f"Data items : {len(prog.data_items)}")
        for d in prog.data_items:
            print(f"  {d.level:02d} {d.name:<30} PIC {d.pic or '-':<12} VALUE {d.value or '-'}")
        print(f"Paragraphs : {len(prog.paragraphs)}")
        for p in prog.paragraphs:
            print(f"  {p.name} ({len(p.statements)} stmts, line {p.line})")
            for s in p.statements:
                print(f"    [{s.verb}] {s.text[:60]}")

    # ── Anti-pattern detection ────────────────────────────────────────────────
    findings = detect(prog)

    # ── Complexity scoring ────────────────────────────────────────────────────
    scores = score_all(prog)

    # ── Console summary ───────────────────────────────────────────────────────
    print(f"\n{'='*60}")
    print(f"  COBOL Moderniser — {os.path.basename(args.input)}")
    print(f"{'='*60}")
    print(f"  Program ID  : {prog.program_id}")
    print(f"  Lines       : {len(prog.raw_lines)}")
    print(f"  Data items  : {len(prog.data_items)}")
    print(f"  Paragraphs  : {len(prog.paragraphs)}")
    print()

    high = sum(1 for f in findings if f.severity == 'HIGH')
    med  = sum(1 for f in findings if f.severity == 'MEDIUM')
    low  = sum(1 for f in findings if f.severity == 'LOW')
    print(f"  Findings: 🔴 {high} HIGH  🟡 {med} MEDIUM  🟢 {low} LOW")
    print()

    for finding in findings:
        sym = {'HIGH': '🔴', 'MEDIUM': '🟡', 'LOW': '🟢'}.get(finding.severity, '')
        print(f"  {sym}  [{finding.category}] line {finding.line}: {finding.description[:70]}")

    print()
    print(f"  Top 3 most complex paragraphs:")
    for s in scores[:3]:
        print(f"    {s.paragraph:<30} complexity: {s.score}")
    print(f"{'='*60}\n")

    # ── Annotations (optional) ────────────────────────────────────────────────
    annotations = {}
    if args.annotate:
        annotations = _generate_annotations(prog)

    # ── Report ────────────────────────────────────────────────────────────────
    report_path = args.report or args.input.replace('.cbl', '_report.md')
    os.makedirs(os.path.dirname(report_path) if os.path.dirname(report_path) else '.', exist_ok=True)
    report_text = render(prog, findings, scores, args.input, annotations)
    with open(report_path, 'w') as f:
        f.write(report_text)
    print(f"  Report written to: {report_path}\n")


def _generate_annotations(prog) -> dict[str, str]:
    """
    Generate plain-English summaries for each paragraph.
    Uses a simple rule-based approach; swap in an LLM call for richer output.
    """
    annotations = {}
    verb_descriptions = {
        'MOVE':     'moves data between variables',
        'COMPUTE':  'performs arithmetic computation',
        'DISPLAY':  'outputs data to the screen',
        'PERFORM':  'calls another paragraph',
        'IF':       'makes a conditional decision',
        'ADD':      'adds values',
        'SUBTRACT': 'subtracts values',
        'MULTIPLY': 'multiplies values',
        'DIVIDE':   'divides values',
        'STOP':     'terminates the program',
    }

    for para in prog.paragraphs:
        if not para.statements:
            continue
        verbs = [s.verb for s in para.statements]
        verb_counts: dict[str, int] = {}
        for v in verbs:
            verb_counts[v] = verb_counts.get(v, 0) + 1

        parts = []
        for verb, count in sorted(verb_counts.items(), key=lambda x: -x[1]):
            desc = verb_descriptions.get(verb, f'executes {verb}')
            parts.append(f"{desc} ({count}×)")

        summary = (
            f"Paragraph `{para.name}` contains {len(para.statements)} statement(s). "
            f"Primary operations: {', '.join(parts[:3])}."
        )
        annotations[para.name] = summary

    return annotations


if __name__ == '__main__':
    main()