Skip to content

Commit 78ba866

Browse files
committed
Add GC code utils
1 parent d191c98 commit 78ba866

2 files changed

Lines changed: 351 additions & 0 deletions

File tree

utils/gc/check.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import os
5+
import re
6+
7+
found_classes = {}
8+
9+
def found_class(name,
10+
gcbm_declared=None,
11+
gcbm_defined=None,
12+
gcbm_zeroinit=None,
13+
gcbm_safeinit=None,
14+
gcbm_set=None,
15+
gcdescr_declared=None,
16+
gcdescr_zeroinit=None,
17+
gcdescr_set=None,
18+
malloc=0,
19+
tmalloc=0):
20+
if name not in found_classes:
21+
found_classes[name] = {
22+
"gcbm_declared": gcbm_declared,
23+
'gcbm_defined': gcbm_defined,
24+
'gcbm_zeroinit': gcbm_zeroinit,
25+
'gcbm_safeinit': gcbm_safeinit,
26+
'gcbm_set': gcbm_set,
27+
'gcdescr_declared': gcdescr_declared,
28+
'gcdescr_zeroinit': gcdescr_zeroinit,
29+
'gcdescr_set': gcdescr_set,
30+
'malloc': 0,
31+
'tmalloc': 0,
32+
}
33+
34+
if gcbm_declared is not None:
35+
found_classes[name]['gcbm_declared'] = gcbm_declared
36+
if gcbm_defined is not None:
37+
found_classes[name]["gcbm_defined"] = gcbm_defined
38+
if gcbm_zeroinit is not None:
39+
found_classes[name]["gcbm_zeroinit"] = gcbm_zeroinit
40+
if gcbm_safeinit is not None:
41+
found_classes[name]["gcbm_safeinit"] = gcbm_safeinit
42+
if gcdescr_declared is not None:
43+
found_classes[name]["gcdescr_declared"] = gcdescr_declared
44+
if gcdescr_zeroinit is not None:
45+
found_classes[name]["gcdescr_zeroinit"] = gcdescr_zeroinit
46+
if gcdescr_set is not None:
47+
found_classes[name]["gcdescr_set"] = gcdescr_set
48+
49+
found_classes[name]["malloc"] += malloc
50+
found_classes[name]["tmalloc"] += tmalloc
51+
52+
53+
def find_c_and_h_files(root_dir):
54+
for dirpath, dirnames, filenames in os.walk(root_dir):
55+
for filename in filenames:
56+
if filename.endswith(".c") or filename.endswith(".h"):
57+
file_path = os.path.join(dirpath, filename)
58+
if filename.endswith(".c"):
59+
process_file(file_path, check_c_file)
60+
elif filename.endswith(".h"):
61+
process_file(file_path, check_h_file)
62+
63+
def process_file(file_path, check_function):
64+
print(f"Processing {file_path}")
65+
lines = read_file(file_path)
66+
check_function(lines)
67+
68+
def read_file(file_path):
69+
with open(file_path, 'r') as f:
70+
lines = f.readlines()
71+
return lines
72+
73+
def check_c_file(lines):
74+
for line in lines:
75+
# Look for definition of the GC bitmap
76+
m = re.match(r"^GC_word ([^ ]+)D_gcbm\[", line)
77+
if m:
78+
found_class(m.group(1), gcbm_defined=True)
79+
80+
# Check if memset is used to initialize the GC bitmap _gcbm
81+
m = re.search(r"memset\(&([^ ]+)D_gcbm, 0, sizeof\([^ ]+D_gcbm\)\);", line)
82+
if m:
83+
found_class(m.group(1), gcbm_safeinit=True)
84+
85+
# Find struct definitions for our classes, i.e. the struct used will end
86+
# with G_class and the name will end with G_methods
87+
m = re.match(r"^struct\s+([^ ]+)G_class ([^ ]+)G_methods = {", line)
88+
if m:
89+
found_class(m.group(2), gcdescr_zeroinit=True)
90+
91+
# Look for GCdescr in the method table being set using GC_make_descriptor
92+
m = re.match(r" +([^ ]+)G_methods.\$GCdescr = GC_make_descriptor", line)
93+
if m:
94+
found_class(m.group(1), gcdescr_set=True)
95+
96+
# Look for plain malloc
97+
m = re.search(r"malloc\(sizeof\(struct ([^)]+)\)", line)
98+
if m:
99+
found_class(m.group(1), malloc=1)
100+
101+
# Look for GC_MALLOC_EXPLICITLY_TYPED using the class name and GCdescr
102+
m = re.search(r"GC_MALLOC_EXPLICITLY_TYPED\(sizeof\(struct ([^ ]+)\), ([^ ]+)G_methods.\$GCdescr\)", line)
103+
if m:
104+
found_class(m.group(1), tmalloc=1)
105+
106+
107+
def check_h_file(lines):
108+
in_struct = None
109+
for line in lines:
110+
m = re.match(r"^struct ([^ ]+)G_class {", line)
111+
if m:
112+
in_struct = m.group(1)
113+
114+
if in_struct is not None:
115+
if line.strip() == "};":
116+
in_struct = None
117+
if re.match(r"^ +GC_descr \$GCdescr;", line):
118+
found_class(in_struct, gcdescr_declared=True)
119+
120+
m = re.match(r"extern GC_word ([^ ]+)D_gcbm", line)
121+
if m:
122+
found_class(m.group(1), gcbm_declared=True)
123+
124+
125+
if __name__ == "__main__":
126+
parser = argparse.ArgumentParser(description="Find and patch .c and .h files.")
127+
parser.add_argument(
128+
"-d", "--directory", type=str, default=".", help="The directory to scan (default: current directory)"
129+
)
130+
args = parser.parse_args()
131+
132+
find_c_and_h_files("builtin")
133+
find_c_and_h_files("rts")
134+
find_c_and_h_files("stdlib")
135+
# Show found classes and their status as indicated in the found_classes
136+
# dict. Each field is marked with an X if set to True or an empty space if
137+
# set to False. The fields are:
138+
# - gcbm_declared: _gcbm declared in .h file
139+
# - gcbm_defined: _gcbm defined in .c file but not initialized
140+
# - gcbm_zeroinit: _gcbm initialized in .c file to 0
141+
# - gcbm_safeinit: _gcbm initialized in .c file to 1 (will be scanned, thus safe)
142+
# - gcbm_set: _gcbm set to proper value
143+
# - gcdescr_defined: gcdescr defined in .h file
144+
# - gcdescr_set: gcdescr set to proper value
145+
# - malloc: malloc is used in a struct definition
146+
for c in sorted(found_classes.keys()):
147+
cl = found_classes[c]
148+
# Skip classes that don't have anything set, they're likely not relevant
149+
# for us, e.g. plain mallocs of entirely different classes
150+
print(f"{c:40} BM:" +
151+
f"{cl['gcbm_declared'] and 'd' or ' '}" +
152+
f"{cl['gcbm_defined'] and 'D' or ' '}" +
153+
f"{cl['gcbm_zeroinit'] and '0' or ' '}" +
154+
f"{cl['gcbm_safeinit'] and '1' or ' '}" +
155+
f"{cl['gcbm_set'] and '+' or ' '}" +
156+
" GCdescr:" +
157+
f"{cl['gcdescr_declared'] and 'd' or ' '}" +
158+
f"{cl['gcdescr_declared'] and 'D' or ' '}" +
159+
f"{cl['gcdescr_zeroinit'] and '0' or ' '}" +
160+
f"{cl['gcdescr_set'] and '+' or ' '}" +
161+
f" malloc: {cl['malloc']} / {cl['tmalloc']}")

utils/gc/patchy.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import os
5+
import re
6+
7+
found_classes = {}
8+
9+
def found_class(name, gcbm_declared=None, gcbm_defined=None, gcbm_initialized=None, gcbm_set=None, gcdescr_defined=None, gcdescr_set=None, malloc=None):
10+
if name not in found_classes:
11+
found_classes[name] = {
12+
"gcbm_declared": gcbm_declared,
13+
'gcbm_defined': gcbm_defined,
14+
'gcbm_initialized': gcbm_initialized,
15+
'gcbm_set': gcbm_set,
16+
'gcdescr_defined': gcdescr_defined,
17+
'gcdescr_set': gcdescr_set,
18+
'malloc': malloc,
19+
}
20+
if gcbm_declared is not None:
21+
found_classes[name]['gcbm_declared'] = gcbm_declared
22+
if gcbm_defined is not None:
23+
found_classes[name]["gcbm_defined"] = gcbm_defined
24+
if gcbm_initialized is not None:
25+
found_classes[name]["gcbm_initialized"] = gcbm_initialized
26+
if gcdescr_defined is not None:
27+
found_classes[name]["gcdescr_defined"] = gcdescr_defined
28+
if gcdescr_set is not None:
29+
found_classes[name]["gcdescr_set"] = gcdescr_set
30+
if malloc is not None:
31+
found_classes[name]["malloc"] = malloc
32+
33+
def find_c_and_h_files(root_dir):
34+
for dirpath, dirnames, filenames in os.walk(root_dir):
35+
for filename in filenames:
36+
if filename.endswith(".c") or filename.endswith(".h"):
37+
file_path = os.path.join(dirpath, filename)
38+
if filename.endswith(".c"):
39+
process_file(file_path, patch_c_file)
40+
elif filename.endswith(".h"):
41+
process_file(file_path, patch_h_file)
42+
43+
def process_file(file_path, patch_function):
44+
print(f"Processing {file_path}")
45+
lines = read_file(file_path)
46+
modified_lines = patch_function(lines)
47+
write_file(file_path, modified_lines)
48+
49+
def read_file(file_path):
50+
with open(file_path, 'r') as f:
51+
lines = f.readlines()
52+
return lines
53+
54+
def write_file(file_path, lines):
55+
with open(file_path, 'w') as f:
56+
# For __builtin__.h and __builtin__.c, add include at the top.
57+
if file_path.endswith("__builtin__.h") or file_path.endswith("__builtin__.c"):
58+
f.write('#include "gc/gc_typed.h"\n')
59+
f.writelines(lines)
60+
61+
def patch_c_file(lines):
62+
res = []
63+
malloc_line = None
64+
for line in lines:
65+
m = re.match(r"^struct (([^ ]+)G_class) ([^ ]+)_methods;", line)
66+
if m:
67+
found_class(m.group(2), gcbm_defined=True)
68+
res.append(line)
69+
res.append(f"GC_word {m.group(2)}D_gcbm[GC_BITMAP_SIZE(struct {m.group(2)})];\n")
70+
continue
71+
72+
# Find struct definitions of method tables, which match a struct
73+
# definition name ending with _class and a variable name ending with
74+
# _methods. Insert 0 as the first element when initializing the struct.
75+
m = re.match(r"^struct ([^ ]+)G_class ([^ ]+)G_methods = {$", line)
76+
if m:
77+
found_class(m.group(2), gcbm_initialized=True)
78+
res.append(f"struct {m.group(1)}G_class {m.group(2)}G_methods = {{\n 0,\n")
79+
continue
80+
81+
m = re.match(r"^struct ([^ ]+)G_class ([^ ]+)G_methods = {(.*)", line)
82+
if m:
83+
found_class(m.group(2), gcbm_initialized=True)
84+
res.append(f"struct {m.group(1)}G_class {m.group(2)}G_methods = {{0," + m.group(3))
85+
continue
86+
87+
# Replace $GCINFO with $name
88+
m = re.match(r"^( +)([^ ]+)\.\$GCINFO =(.*)", line)
89+
if m:
90+
oname = re.sub(r"G_methods", "", m.group(2))
91+
gcbm = re.sub(r"G_methods", "D_gcbm", m.group(2))
92+
res.append(f"{m.group(1)}memset({gcbm}, 0xFF, sizeof({gcbm}));\n")
93+
found_class(m.group(2).replace("G_methods", ""), gcbm_initialized=True)
94+
res.append(f"{m.group(1)}{m.group(2)}.$GCdescr = GC_make_descriptor({gcbm}, GC_WORD_LEN(struct {oname}));\n")
95+
found_class(m.group(2).replace("G_methods", ""), gcdescr_set=True)
96+
res.append(f"{m.group(1)}{m.group(2)}.$name ={m.group(3)}\n")
97+
continue
98+
99+
m = re.search(r"\$class->\$GCINFO", line)
100+
if m:
101+
res.append(re.sub(r"\$class->\$GCINFO", "$class->$name", line))
102+
continue
103+
104+
# Replace malloc with GC_MALLOC_EXPLICITLY_TYPED
105+
m = re.match(r"^( +)self = malloc\(sizeof\(struct ([^)]+)\)\);", line)
106+
if m:
107+
# Suppress malloc here and emit it on next loop
108+
malloc_line = m
109+
#res.append(f"{m.group(1)}{m.group(2)} = GC_MALLOC_EXPLICITLY_TYPED(sizeof(struct {m.group(3)}), );\n")
110+
continue
111+
112+
m = re.match(r"^( +)self->\$class = &([^;]+G_methods);", line)
113+
if malloc_line is not None and m:
114+
if malloc_line.group(2) != m.group(2).replace("G_methods", ""):
115+
raise ValueError(f"ERROR: malloc and class name mismatch: {malloc_line.group(2)} != {m.group(1)}")
116+
117+
res.append(f"{malloc_line.group(1)}self = GC_MALLOC_EXPLICITLY_TYPED(sizeof(struct {malloc_line.group(2)}), {m.group(2)}.$GCdescr);\n")
118+
res.append(line)
119+
found_class(malloc_line.group(2).replace("G_methods", ""), malloc=True)
120+
malloc_line = None
121+
continue
122+
123+
# In line like this:
124+
# B_TimesD_str $tmp = malloc(sizeof(struct B_TimesD_str));
125+
# replace malloc with GC_MALLOC_EXPLICITLY_TYPED
126+
m = re.match(r"^( +)([^ ]+) \$tmp = malloc\(sizeof\(struct ([^)]+)\)\);", line)
127+
if m:
128+
res.append(f"{m.group(1)}{m.group(2)} $tmp = GC_MALLOC_EXPLICITLY_TYPED(sizeof(struct {m.group(3)}), {m.group(3)}G_methods.$GCdescr);\n")
129+
found_class(m.group(3), malloc=True)
130+
continue
131+
132+
res.append(line)
133+
return res
134+
135+
def patch_h_file(lines):
136+
res = []
137+
in_struct = False
138+
add_after = None
139+
for line in lines:
140+
141+
m = re.match(r"^struct ([^ ]+) {", line)
142+
if m and m.group(1) not in ("$R", "$ROW", "$ROWLISTHEADER"):
143+
in_struct = True
144+
if not m.group(1).endswith('_class'):
145+
found_class(m.group(1), gcbm_declared=True)
146+
found_class(m.group(1), gcdescr_defined=True)
147+
add_after = f"extern GC_word {m.group(1)}D_gcbm[GC_BITMAP_SIZE(struct {m.group(1)})];\n"
148+
149+
if in_struct:
150+
if line.strip() == "};":
151+
in_struct = False
152+
# Replace $GCINFO with $GCdescr
153+
m = re.match(r"^( +)char.*GCINFO;", line)
154+
if m:
155+
res.append(f"{m.group(1)}GC_descr $GCdescr;\n")
156+
res.append(f"{m.group(1)}char *$name;\n")
157+
continue
158+
159+
res.append(line)
160+
161+
if add_after and re.match(r"^};", line):
162+
res.append(add_after)
163+
add_after = None
164+
165+
return res
166+
167+
168+
if __name__ == "__main__":
169+
parser = argparse.ArgumentParser(description="Find and patch .c and .h files.")
170+
parser.add_argument(
171+
"-d", "--directory", type=str, default=".", help="The directory to scan (default: current directory)"
172+
)
173+
args = parser.parse_args()
174+
175+
find_c_and_h_files("builtin")
176+
find_c_and_h_files("rts")
177+
find_c_and_h_files("stdlib")
178+
# Show found classes and their status as indicated in the found_classes
179+
# dict. Each field is marked with an X if set to True or an empty space if
180+
# set to False. The fields are:
181+
# - gcbm_declared: _gcbm declared in .h file
182+
# - gcbm_defined: _gcbm defined in .c file
183+
# - gcbm_initialized: _gcbm initialized in .c file to 0
184+
# - gcbm_set: _gcbm set to proper value
185+
# - gcdescr_defined: gcdescr defined in .h file
186+
# - gcdescr_set: gcdescr set to proper value
187+
# - malloc: malloc is used in a struct definition
188+
for c in sorted(found_classes.keys()):
189+
cl = found_classes[c]
190+
print(f"{c:40} {cl['gcbm_declared'] and 'X' or ' '}{cl['gcbm_defined'] and 'X' or ' '}{cl['gcbm_initialized'] and 'X' or ' '}{cl['gcbm_set'] and 'X' or ' '}{cl['gcdescr_defined'] and 'X' or ' '}{cl['gcdescr_set'] and 'X' or ' '}{cl['malloc'] and 'X' or ' '}")

0 commit comments

Comments
 (0)