|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import argparse |
| 4 | +import os |
| 5 | +import re |
| 6 | + |
| 7 | +found_classes = {} |
| 8 | + |
| 9 | +def found_class(name, gcbm_declared=None, gcbm_defined=None, gcbm_initialized=None, gcbm_set=None, gcdescr_defined=None, gcdescr_set=None, malloc=None): |
| 10 | + if name not in found_classes: |
| 11 | + found_classes[name] = { |
| 12 | + "gcbm_declared": gcbm_declared, |
| 13 | + 'gcbm_defined': gcbm_defined, |
| 14 | + 'gcbm_initialized': gcbm_initialized, |
| 15 | + 'gcbm_set': gcbm_set, |
| 16 | + 'gcdescr_defined': gcdescr_defined, |
| 17 | + 'gcdescr_set': gcdescr_set, |
| 18 | + 'malloc': malloc, |
| 19 | + } |
| 20 | + if gcbm_declared is not None: |
| 21 | + found_classes[name]['gcbm_declared'] = gcbm_declared |
| 22 | + if gcbm_defined is not None: |
| 23 | + found_classes[name]["gcbm_defined"] = gcbm_defined |
| 24 | + if gcbm_initialized is not None: |
| 25 | + found_classes[name]["gcbm_initialized"] = gcbm_initialized |
| 26 | + if gcdescr_defined is not None: |
| 27 | + found_classes[name]["gcdescr_defined"] = gcdescr_defined |
| 28 | + if gcdescr_set is not None: |
| 29 | + found_classes[name]["gcdescr_set"] = gcdescr_set |
| 30 | + if malloc is not None: |
| 31 | + found_classes[name]["malloc"] = malloc |
| 32 | + |
| 33 | +def find_c_and_h_files(root_dir): |
| 34 | + for dirpath, dirnames, filenames in os.walk(root_dir): |
| 35 | + for filename in filenames: |
| 36 | + if filename.endswith(".c") or filename.endswith(".h"): |
| 37 | + file_path = os.path.join(dirpath, filename) |
| 38 | + if filename.endswith(".c"): |
| 39 | + process_file(file_path, patch_c_file) |
| 40 | + elif filename.endswith(".h"): |
| 41 | + process_file(file_path, patch_h_file) |
| 42 | + |
| 43 | +def process_file(file_path, patch_function): |
| 44 | + print(f"Processing {file_path}") |
| 45 | + lines = read_file(file_path) |
| 46 | + modified_lines = patch_function(lines) |
| 47 | + write_file(file_path, modified_lines) |
| 48 | + |
| 49 | +def read_file(file_path): |
| 50 | + with open(file_path, 'r') as f: |
| 51 | + lines = f.readlines() |
| 52 | + return lines |
| 53 | + |
| 54 | +def write_file(file_path, lines): |
| 55 | + with open(file_path, 'w') as f: |
| 56 | + # For __builtin__.h and __builtin__.c, add include at the top. |
| 57 | + if file_path.endswith("__builtin__.h") or file_path.endswith("__builtin__.c"): |
| 58 | + f.write('#include "gc/gc_typed.h"\n') |
| 59 | + f.writelines(lines) |
| 60 | + |
| 61 | +def patch_c_file(lines): |
| 62 | + res = [] |
| 63 | + malloc_line = None |
| 64 | + for line in lines: |
| 65 | + m = re.match(r"^struct (([^ ]+)G_class) ([^ ]+)_methods;", line) |
| 66 | + if m: |
| 67 | + found_class(m.group(2), gcbm_defined=True) |
| 68 | + res.append(line) |
| 69 | + res.append(f"GC_word {m.group(2)}D_gcbm[GC_BITMAP_SIZE(struct {m.group(2)})];\n") |
| 70 | + continue |
| 71 | + |
| 72 | + # Find struct definitions of method tables, which match a struct |
| 73 | + # definition name ending with _class and a variable name ending with |
| 74 | + # _methods. Insert 0 as the first element when initializing the struct. |
| 75 | + m = re.match(r"^struct ([^ ]+)G_class ([^ ]+)G_methods = {$", line) |
| 76 | + if m: |
| 77 | + found_class(m.group(2), gcbm_initialized=True) |
| 78 | + res.append(f"struct {m.group(1)}G_class {m.group(2)}G_methods = {{\n 0,\n") |
| 79 | + continue |
| 80 | + |
| 81 | + m = re.match(r"^struct ([^ ]+)G_class ([^ ]+)G_methods = {(.*)", line) |
| 82 | + if m: |
| 83 | + found_class(m.group(2), gcbm_initialized=True) |
| 84 | + res.append(f"struct {m.group(1)}G_class {m.group(2)}G_methods = {{0," + m.group(3)) |
| 85 | + continue |
| 86 | + |
| 87 | + # Replace $GCINFO with $name |
| 88 | + m = re.match(r"^( +)([^ ]+)\.\$GCINFO =(.*)", line) |
| 89 | + if m: |
| 90 | + oname = re.sub(r"G_methods", "", m.group(2)) |
| 91 | + gcbm = re.sub(r"G_methods", "D_gcbm", m.group(2)) |
| 92 | + res.append(f"{m.group(1)}memset({gcbm}, 0xFF, sizeof({gcbm}));\n") |
| 93 | + found_class(m.group(2).replace("G_methods", ""), gcbm_initialized=True) |
| 94 | + res.append(f"{m.group(1)}{m.group(2)}.$GCdescr = GC_make_descriptor({gcbm}, GC_WORD_LEN(struct {oname}));\n") |
| 95 | + found_class(m.group(2).replace("G_methods", ""), gcdescr_set=True) |
| 96 | + res.append(f"{m.group(1)}{m.group(2)}.$name ={m.group(3)}\n") |
| 97 | + continue |
| 98 | + |
| 99 | + m = re.search(r"\$class->\$GCINFO", line) |
| 100 | + if m: |
| 101 | + res.append(re.sub(r"\$class->\$GCINFO", "$class->$name", line)) |
| 102 | + continue |
| 103 | + |
| 104 | + # Replace malloc with GC_MALLOC_EXPLICITLY_TYPED |
| 105 | + m = re.match(r"^( +)self = malloc\(sizeof\(struct ([^)]+)\)\);", line) |
| 106 | + if m: |
| 107 | + # Suppress malloc here and emit it on next loop |
| 108 | + malloc_line = m |
| 109 | + #res.append(f"{m.group(1)}{m.group(2)} = GC_MALLOC_EXPLICITLY_TYPED(sizeof(struct {m.group(3)}), );\n") |
| 110 | + continue |
| 111 | + |
| 112 | + m = re.match(r"^( +)self->\$class = &([^;]+G_methods);", line) |
| 113 | + if malloc_line is not None and m: |
| 114 | + if malloc_line.group(2) != m.group(2).replace("G_methods", ""): |
| 115 | + raise ValueError(f"ERROR: malloc and class name mismatch: {malloc_line.group(2)} != {m.group(1)}") |
| 116 | + |
| 117 | + res.append(f"{malloc_line.group(1)}self = GC_MALLOC_EXPLICITLY_TYPED(sizeof(struct {malloc_line.group(2)}), {m.group(2)}.$GCdescr);\n") |
| 118 | + res.append(line) |
| 119 | + found_class(malloc_line.group(2).replace("G_methods", ""), malloc=True) |
| 120 | + malloc_line = None |
| 121 | + continue |
| 122 | + |
| 123 | + # In line like this: |
| 124 | + # B_TimesD_str $tmp = malloc(sizeof(struct B_TimesD_str)); |
| 125 | + # replace malloc with GC_MALLOC_EXPLICITLY_TYPED |
| 126 | + m = re.match(r"^( +)([^ ]+) \$tmp = malloc\(sizeof\(struct ([^)]+)\)\);", line) |
| 127 | + if m: |
| 128 | + res.append(f"{m.group(1)}{m.group(2)} $tmp = GC_MALLOC_EXPLICITLY_TYPED(sizeof(struct {m.group(3)}), {m.group(3)}G_methods.$GCdescr);\n") |
| 129 | + found_class(m.group(3), malloc=True) |
| 130 | + continue |
| 131 | + |
| 132 | + res.append(line) |
| 133 | + return res |
| 134 | + |
| 135 | +def patch_h_file(lines): |
| 136 | + res = [] |
| 137 | + in_struct = False |
| 138 | + add_after = None |
| 139 | + for line in lines: |
| 140 | + |
| 141 | + m = re.match(r"^struct ([^ ]+) {", line) |
| 142 | + if m and m.group(1) not in ("$R", "$ROW", "$ROWLISTHEADER"): |
| 143 | + in_struct = True |
| 144 | + if not m.group(1).endswith('_class'): |
| 145 | + found_class(m.group(1), gcbm_declared=True) |
| 146 | + found_class(m.group(1), gcdescr_defined=True) |
| 147 | + add_after = f"extern GC_word {m.group(1)}D_gcbm[GC_BITMAP_SIZE(struct {m.group(1)})];\n" |
| 148 | + |
| 149 | + if in_struct: |
| 150 | + if line.strip() == "};": |
| 151 | + in_struct = False |
| 152 | + # Replace $GCINFO with $GCdescr |
| 153 | + m = re.match(r"^( +)char.*GCINFO;", line) |
| 154 | + if m: |
| 155 | + res.append(f"{m.group(1)}GC_descr $GCdescr;\n") |
| 156 | + res.append(f"{m.group(1)}char *$name;\n") |
| 157 | + continue |
| 158 | + |
| 159 | + res.append(line) |
| 160 | + |
| 161 | + if add_after and re.match(r"^};", line): |
| 162 | + res.append(add_after) |
| 163 | + add_after = None |
| 164 | + |
| 165 | + return res |
| 166 | + |
| 167 | + |
| 168 | +if __name__ == "__main__": |
| 169 | + parser = argparse.ArgumentParser(description="Find and patch .c and .h files.") |
| 170 | + parser.add_argument( |
| 171 | + "-d", "--directory", type=str, default=".", help="The directory to scan (default: current directory)" |
| 172 | + ) |
| 173 | + args = parser.parse_args() |
| 174 | + |
| 175 | + find_c_and_h_files("builtin") |
| 176 | + find_c_and_h_files("rts") |
| 177 | + find_c_and_h_files("stdlib") |
| 178 | + # Show found classes and their status as indicated in the found_classes |
| 179 | + # dict. Each field is marked with an X if set to True or an empty space if |
| 180 | + # set to False. The fields are: |
| 181 | + # - gcbm_declared: _gcbm declared in .h file |
| 182 | + # - gcbm_defined: _gcbm defined in .c file |
| 183 | + # - gcbm_initialized: _gcbm initialized in .c file to 0 |
| 184 | + # - gcbm_set: _gcbm set to proper value |
| 185 | + # - gcdescr_defined: gcdescr defined in .h file |
| 186 | + # - gcdescr_set: gcdescr set to proper value |
| 187 | + # - malloc: malloc is used in a struct definition |
| 188 | + for c in sorted(found_classes.keys()): |
| 189 | + cl = found_classes[c] |
| 190 | + print(f"{c:40} {cl['gcbm_declared'] and 'X' or ' '}{cl['gcbm_defined'] and 'X' or ' '}{cl['gcbm_initialized'] and 'X' or ' '}{cl['gcbm_set'] and 'X' or ' '}{cl['gcdescr_defined'] and 'X' or ' '}{cl['gcdescr_set'] and 'X' or ' '}{cl['malloc'] and 'X' or ' '}") |
0 commit comments