Skip to content

Commit 1686058

Browse files
committed
translation
1 parent 509a4c4 commit 1686058

2 files changed

Lines changed: 89 additions & 77 deletions

File tree

fuzz/ast_utils.py

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,26 @@
55
import os
66
import re
77

8+
89
class TestFunctionTransformer(ast.NodeTransformer):
910
"""AST transformer for test function conversion"""
1011

1112
def visit_FunctionDef(self, node):
12-
# 首先处理 main 函数(移除)
13+
# First, process main function (remove it)
1314
if node.name == "main":
1415
return None
1516

16-
# 处理 TestInput/TestOneInput 函数
17+
# Process TestInput/TestOneInput functions
1718
if node.name in ["TestInput", "TestOneInput"]:
18-
# a. 记录参数名称(假设只有一个参数)
19+
# a. Record parameter name (assume only one parameter)
1920
param_name = None
2021
if node.args.args:
2122
param_name = node.args.args[0].arg
2223

23-
# b. 将函数名改为 test_
24+
# b. Rename function to test_
2425
node.name = "test_"
2526

26-
# c. 移除参数(将参数列表设为空)
27+
# c. Remove parameters (set argument list to empty)
2728
node.args = ast.arguments(
2829
posonlyargs=[],
2930
args=[],
@@ -34,48 +35,47 @@ def visit_FunctionDef(self, node):
3435
defaults=[],
3536
)
3637

37-
# d. 在函数体开头插入 原参数名 = b""
38+
# d. Insert param_name = b"" at the beginning of the function body
3839
if param_name:
3940
self.add_param_assignment(node, param_name)
4041

41-
# 确保继续遍历子节点
42+
# Ensure traversing child nodes continues
4243
self.generic_visit(node)
4344
return node
4445

4546
def add_param_assignment(self, node, param_name):
4647
"""Add param_name = b"..." at the beginning of the function body with an inline comment"""
47-
# 创建包含赋值和注释的复合值
48+
# Create a compound value containing assignment and comment
4849
value_with_comment = ast.JoinedStr(
4950
values=[
5051
ast.FormattedValue(value=ast.Constant(value=b""), conversion=-1),
51-
ast.Constant(value=" # This is a test template")
52+
ast.Constant(value=" # This is a test template"),
5253
]
5354
)
54-
55-
# 创建赋值节点
55+
56+
# Create an assignment node
5657
assign_node = ast.Assign(
57-
targets=[ast.Name(id=param_name, ctx=ast.Store())],
58-
value=value_with_comment
58+
targets=[ast.Name(id=param_name, ctx=ast.Store())], value=value_with_comment
5959
)
60-
61-
# 如果有文档字符串,插入在文档字符串之后
60+
61+
# If there is a docstring, insert after the docstring
6262
if (
6363
node.body
6464
and isinstance(node.body[0], ast.Expr)
6565
and isinstance(node.body[0].value, ast.Constant)
6666
and isinstance(node.body[0].value.value, str)
6767
):
68-
# 插入在文档字符串后面
68+
# Insert right after the docstring
6969
node.body.insert(1, assign_node)
7070
else:
71-
# 插入在函数开头
71+
# Insert at the beginning of the function
7272
node.body.insert(0, assign_node)
7373

7474
def remove_print_param(self, node, param_name):
7575
"""Remove print statements for the specific parameter"""
7676
new_body = []
7777
for stmt in node.body:
78-
# 跳过 print(param_name) 调用
78+
# Skip print(param_name) calls
7979
if (
8080
isinstance(stmt, ast.Expr)
8181
and isinstance(stmt.value, ast.Call)
@@ -92,7 +92,7 @@ def remove_print_param(self, node, param_name):
9292

9393
def visit_If(self, node):
9494
"""Remove if __name__ == '__main__' blocks"""
95-
# 检查是否是主函数保护
95+
# Check if this is the main function guard
9696
if (
9797
isinstance(node.test, ast.Compare)
9898
and isinstance(node.test.left, ast.Name)
@@ -102,10 +102,10 @@ def visit_If(self, node):
102102
and node.test.comparators[0].value == "__main__"
103103
):
104104

105-
# 移除整个 if
105+
# Remove the entire if block
106106
return None
107107

108-
# 确保继续遍历子节点
108+
# Ensure traversing child nodes continues
109109
self.generic_visit(node)
110110
return node
111111

@@ -119,22 +119,22 @@ def __init__(self, idx, fuzz_input):
119119
def visit_FunctionDef(self, node):
120120
if node.name == "test_":
121121
self.found_test_function = True
122-
123-
# 1. 修改函数名
122+
123+
# 1. Modify function name
124124
node.name = f"test_{self.idx}"
125-
126-
# 2. 查找并替换包含特定注释的赋值语句
125+
126+
# 2. Find and replace assignment statements with the special comment
127127
for i, stmt in enumerate(node.body):
128-
# 检查是否是赋值语句
128+
# Check if it's an assignment statement
129129
if isinstance(stmt, ast.Assign):
130-
# 检查赋值语句的值是否是带有注释的复合值
130+
# Check if the value is a compound value with a comment
131131
if (
132132
isinstance(stmt.value, ast.JoinedStr)
133133
and len(stmt.value.values) >= 2
134134
and isinstance(stmt.value.values[1], ast.Constant)
135135
and stmt.value.values[1].value == " # This is a test template"
136136
):
137-
# 替换为新的输入值
137+
# Replace with new fuzz input
138138
stmt.value = ast.Constant(value=self.fuzz_input)
139139
break
140140
return node
@@ -145,9 +145,9 @@ def generate_test_template(target_name: str, repo_path: str):
145145
Generate Python test template using AST for more precise code transformations
146146
"""
147147
src_file = os.path.join(repo_path, target_name)
148-
logging.info(f"Generating test template for {src_file}")
148+
logging.info(f"Generating test template for {src_file}")
149149
if not src_file.endswith(".py"):
150-
src_file += ".py"
150+
src_file += ".py"
151151
if not os.path.exists(src_file):
152152
logging.error(f"Source target file not found: {src_file}")
153153
return None
@@ -191,11 +191,11 @@ def generate_test_template(target_name: str, repo_path: str):
191191
with open(init_path, "w", encoding="utf-8") as f:
192192
f.write("")
193193

194-
# 使用目标名称的基础部分(移除扩展名)作为输出文件名
194+
# Use the base part of target_name (remove extension) as the output file name
195195
base_target_name = os.path.splitext(target_name)[0]
196196
template_path = os.path.join(template_dir, f"{base_target_name}.py")
197197
with open(template_path, "w", encoding="utf-8") as f:
198198
f.write(shebang + cleaned_code.strip() + "\n")
199199

200200
logging.info(f"Generated cleaned template: {template_path}")
201-
return template_path
201+
return template_path

fuzz/collect_fuzz_python.py

Lines changed: 57 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,15 @@
1919
from difflib import SequenceMatcher
2020
from itertools import islice
2121
from datetime import datetime
22+
import re
23+
24+
# Import AST-related functionality
25+
from ast_utils import (
26+
TestFunctionTransformer,
27+
TestGenTransformer,
28+
generate_test_template,
29+
)
2230

23-
# 导入AST相关的功能
24-
from ast_utils import TestFunctionTransformer, TestGenTransformer, generate_test_template
2531

2632
def build_image(repos: list[str], jobs: int):
2733
"""
@@ -200,22 +206,23 @@ def _transform_repo(repo: str):
200206
project_name = os.path.basename(repo)
201207
oss_fuzz_dir = Path(repo).parent.parent
202208
raw_targets = discover_targets(project_name, oss_fuzz_dir)
203-
204-
# 只需移除目标名称中的 "_print1",不要添加任何新后缀
209+
210+
# Simply remove "_print1" from target names, don't add any new suffix
205211
transformed_targets = [t.replace("_print1", "") for t in raw_targets]
206-
207-
# 去重
212+
213+
# Remove duplicates
208214
targets = list(set(transformed_targets))
209-
210-
# 传递给 generate_test_template 的是简单目标名称
215+
216+
# Pass simple target names to generate_test_template
211217
return [generate_test_template(t, repo) for t in targets]
212218

213219
with ProcessingPool(jobs) as p:
214220
return list(p.map(_transform_repo, repos))
215221

222+
216223
def substitute_one_repo(
217224
repo: str,
218-
targets: list[tuple], # 每个元素是 (transformed_target, raw_target)
225+
targets: list[tuple], # Each element is (transformed_target, raw_target)
219226
n_fuzz: int,
220227
strategy: str,
221228
max_len: int,
@@ -225,57 +232,58 @@ def substitute_one_repo(
225232
Copy files from fuzz target template and generate multiple testgen files based on fuzz inputs
226233
using AST transformations
227234
"""
235+
228236
input_dir = pjoin(repo, "fuzz_inputs")
229237
template_dir = pjoin(repo, "tests-gen")
230238
os.makedirs(template_dir, exist_ok=True)
231239

232240
for transformed_target, raw_target in targets:
233-
# 使用转换后的目标名称构建模板文件路径
241+
# Build template file path using transformed target name
234242
source_file = pjoin(template_dir, transformed_target + ".py")
235-
236-
# 使用原始目标名称构建输入文件路径
243+
244+
# Build input file path using raw target name
237245
input_path = pjoin(input_dir, raw_target)
238-
239-
# 确保源文件存在
246+
247+
# Ensure source file exists
240248
if not os.path.exists(source_file):
241249
logging.warning(f"Source file not found: {source_file}")
242250
continue
243251
if not os.path.exists(input_path):
244252
logging.warning(f"Input file not found: {input_path}")
245253
continue
246-
247-
# 读取所有有效的输入数据
254+
255+
# Read all valid input data
248256
valid_inputs = []
249257
with open(input_path, "rb") as f_input:
250258
lines = f_input.readlines()
251-
# 文件已关闭,现在处理数据
259+
# File is closed, now process data
252260
for line in lines:
253-
# 使用 errors='replace' 确保解码不会失败
261+
# Use errors='replace' to ensure decoding doesn't fail
254262
decoded = line.decode("utf-8", errors="replace")
255-
256-
# 只处理以 b' b" 开头的行
263+
264+
# Only process lines starting with b' or b"
257265
if decoded.startswith(("b'", 'b"')):
258266
if decoded.startswith("b'") and decoded.endswith("'\n"):
259267
byte_data = line[2:-2]
260268
elif decoded.startswith('b"') and decoded.endswith('"\n'):
261269
byte_data = line[2:-2]
262270
else:
263271
continue
264-
272+
265273
if 0 < len(byte_data) <= max_len:
266274
valid_inputs.append(byte_data)
267-
# 对于其他行,如果长度在范围内且不是以 b' b" 开头,也考虑加入
275+
# For other lines, if length is within range and doesn't start with b' or b", also consider adding
268276
elif 0 < len(line) <= max_len:
269277
valid_inputs.append(line)
270278

271279
if not valid_inputs:
272-
# 使用 transformed_target 而不是 target_name
280+
# Use transformed_target instead of target_name
273281
logging.warning(f"No valid inputs found for {transformed_target}")
274282
continue
275283

276-
# 使用 transformed_target 而不是 target_name
284+
# Use transformed_target instead of target_name
277285
logging.info(f"Loaded {len(valid_inputs)} inputs for {transformed_target}")
278-
# 策略选择输入
286+
# Strategy for selecting inputs
279287
if strategy == "shuffle":
280288
random.shuffle(valid_inputs)
281289
inputs = valid_inputs[:n_fuzz]
@@ -284,44 +292,47 @@ def substitute_one_repo(
284292
else:
285293
inputs = valid_inputs[:n_fuzz]
286294

287-
# 每个 fuzz input 生成一个单独的文件(使用 AST
295+
# Generate a separate file for each fuzz input (using AST)
288296
for idx, fuzz_input in enumerate(inputs, start=1):
289297
with open(source_file, "r") as f_src:
290298
code = f_src.read()
291299

292300
try:
293-
# 解析为 AST
301+
# Parse into AST
294302
tree = ast.parse(code)
295303

296-
# 应用转换器
304+
# Apply transformer
297305
transformer = TestGenTransformer(idx, fuzz_input)
298306
new_tree = transformer.visit(tree)
299307
ast.fix_missing_locations(new_tree)
300308

301-
# 确保找到并处理了测试函数
309+
# Ensure test function was found and processed
302310
if not transformer.found_test_function:
303311
logging.warning(f"No test_ function found in {source_file}")
304312
continue
305313

306-
# 生成新代码
314+
# Generate new code
307315
new_code = astunparse.unparse(new_tree)
308316

309-
# 使用 transformed_target 而不是 target_name
317+
# Use transformed_target instead of target_name
310318
out_path = pjoin(template_dir, f"{transformed_target}.testgen_{idx}.py")
311319
with open(out_path, "w") as f_out:
312320
f_out.write(new_code)
313321

314-
# 格式化代码
322+
# Format code
315323
try:
316324
subprocess.run(["black", out_path], check=False)
317325
except FileNotFoundError:
318326
logging.warning("Black formatter not found, skipping formatting")
319-
327+
320328
except SyntaxError as e:
321329
logging.error(f"Syntax error when processing {source_file}: {e}")
322330
except Exception as e:
323-
# 使用 transformed_target 而不是 target_name
324-
logging.error(f"Error generating test case for {transformed_target}: {e}")
331+
# Use transformed_target instead of target_name
332+
logging.error(
333+
f"Error generating test case for {transformed_target}: {e}"
334+
)
335+
325336

326337
def testgen_repos(
327338
repos: list[str],
@@ -348,28 +359,29 @@ def testgen_repos(
348359
project_name = os.path.basename(repo)
349360
oss_fuzz_dir = Path(repo).parent.parent
350361
raw_targets = discover_targets(project_name, oss_fuzz_dir)
351-
352-
# 保存原始目标名称和转换后的目标名称
362+
363+
# Save original target names and transformed target names
353364
transformed_targets = [t.replace("_print1", "") for t in raw_targets]
354-
targets = list(zip(transformed_targets, raw_targets)) # (转换后, 原始)
365+
targets = list(zip(transformed_targets, raw_targets)) # (transformed, raw)
355366
target_map[repo] = targets
356367

357368
# Process each repository in parallel
358369
with ProcessingPool(jobs) as p:
359370
list(
360371
p.map(
361372
lambda item: substitute_one_repo(
362-
item[0], # repo path
363-
item[1], # list of (transformed, raw) targets
364-
n_fuzz,
365-
strategy,
366-
max_len,
367-
sim_thresh
373+
item[0], # repo path
374+
item[1], # list of (transformed, raw) targets
375+
n_fuzz,
376+
strategy,
377+
max_len,
378+
sim_thresh,
368379
),
369380
target_map.items(),
370381
)
371382
)
372383

384+
373385
def main(
374386
repo_id: str = "data/valid_projects.txt",
375387
repo_root: str = "fuzz/oss-fuzz/projects/",
@@ -431,4 +443,4 @@ def main(
431443

432444
if __name__ == "__main__":
433445
logging.basicConfig(level=logging.INFO)
434-
fire.Fire(main)
446+
fire.Fire(main)

0 commit comments

Comments
 (0)