Skip to content

Commit 58d1f76

Browse files
committed
testgen need to ^ help: add ; here
1 parent ba61ca1 commit 58d1f76

1 file changed

Lines changed: 181 additions & 28 deletions

File tree

fuzz/collect_fuzz_python.py

Lines changed: 181 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,16 @@
1515
from difflib import SequenceMatcher
1616
from itertools import islice
1717
from datetime import datetime
18+
import re
1819

1920
def build_image(repos: list[str], jobs: int):
20-
"""构建每个仓库对应的OSS-Fuzz项目的Docker镜像"""
21+
"""
22+
构建每个仓库对应的OSS-Fuzz项目的Docker镜像
23+
24+
Args:
25+
repos (list[str]): 仓库路径列表
26+
jobs (int): 并行任务数
27+
"""
2128
logging.info(f"Building Docker images for {len(repos)} OSS-Fuzz projects")
2229
log_dir = os.path.abspath("fuzz_pipeline_log")
2330
os.makedirs(log_dir, exist_ok=True)
@@ -26,6 +33,8 @@ def _build_cmd(path: str):
2633
project_name = os.path.basename(path.rstrip("/"))
2734
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
2835
log_file = os.path.join(log_dir, f"{project_name}_{timestamp}.log")
36+
37+
logging.info(f"Start building {project_name}, logging to {log_file}")
2938
return subprocess.Popen(
3039
f"yes | python3 infra/helper.py build_image {project_name}",
3140
cwd=os.path.abspath(os.path.join(path, "../../")),
@@ -37,7 +46,13 @@ def _build_cmd(path: str):
3746
_ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
3847

3948
def build_fuzzer(repos: list[str], jobs: int):
40-
"""对构建成功的项目并行构建模糊测试器"""
49+
"""
50+
对构建成功的项目并行构建模糊测试器
51+
52+
Args:
53+
repos (list[str]): 仓库路径列表
54+
jobs (int): 并行任务数
55+
"""
4156
logging.info(f"Building fuzzers for {len(repos)} OSS-Fuzz projects")
4257
log_dir = os.path.abspath("fuzz_pipeline_log")
4358
os.makedirs(log_dir, exist_ok=True)
@@ -46,6 +61,8 @@ def _build_cmd(path: str):
4661
project_name = os.path.basename(path.rstrip("/"))
4762
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
4863
log_file = os.path.join(log_dir, f"{project_name}_fuzzer_{timestamp}.log")
64+
65+
logging.info(f"Start building fuzzers for {project_name}, logging to {log_file}")
4966
return subprocess.Popen(
5067
f"python3 infra/helper.py build_fuzzers --sanitizer address {project_name}",
5168
cwd=os.path.abspath(os.path.join(path, "../../")),
@@ -57,7 +74,16 @@ def _build_cmd(path: str):
5774
_ = parallel_subprocess(repos, jobs, _build_cmd, on_exit=None)
5875

5976
def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
60-
"""发现模糊测试目标"""
77+
"""
78+
发现模糊测试目标
79+
80+
Args:
81+
project_name (str): 项目名称
82+
oss_fuzz_dir (Path): OSS-Fuzz根目录
83+
84+
Returns:
85+
list[str]: 目标名称列表
86+
"""
6187
out_dir = oss_fuzz_dir / "build" / "out" / project_name
6288
targets = []
6389

@@ -77,7 +103,16 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
77103
return targets
78104

79105
def fuzz_one_target(target: tuple[str, str], timeout: int):
80-
"""对单个模糊测试目标执行模糊测试"""
106+
"""
107+
对单个模糊测试目标执行模糊测试
108+
109+
Args:
110+
target (tuple[str, str]): (仓库路径, 目标名称)
111+
timeout (int): 超时时间(秒)
112+
113+
Returns:
114+
subprocess.Popen: 子进程对象
115+
"""
81116
repo_path, target_name = target
82117
project_name = os.path.basename(repo_path)
83118
oss_fuzz_root = os.path.dirname(os.path.dirname(repo_path))
@@ -103,7 +138,14 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
103138
return None
104139

105140
def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
106-
"""对一组仓库执行模糊测试"""
141+
"""
142+
对一组仓库执行模糊测试
143+
144+
Args:
145+
repos (list[str]): 仓库路径列表
146+
jobs (int): 并行任务数
147+
timeout (int): 超时时间(秒)
148+
"""
107149
logging.info("Discovering fuzz targets")
108150

109151
# 获取所有目标
@@ -130,17 +172,26 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
130172
parallel_subprocess(targets, jobs, lambda p: fuzz_one_target(p, timeout), on_exit=None)
131173

132174
def generate_test_template(target_name: str, repo_path: str):
133-
"""为单个目标生成测试模板"""
175+
"""
176+
为单个目标生成测试模板
177+
178+
Args:
179+
target_name (str): 目标名称
180+
repo_path (str): 仓库路径
181+
182+
Returns:
183+
str: 模板文件路径
184+
"""
134185
template_dir = pjoin(repo_path, "tests-gen")
135186
os.makedirs(template_dir, exist_ok=True)
136187
template_path = pjoin(template_dir, f"{target_name}.rs")
137188

138-
# 基本测试模板
189+
# 基本测试模板 - 使用字节数组而不是字节字符
139190
template = f"""
140191
#[test]
141192
fn test_{target_name}() {{
142193
// 测试逻辑将在这里生成
143-
let input = []; // 模糊测试输入将替换这里
194+
let input = b""; // 模糊测试输入将替换这里
144195
let result = process_input(&input);
145196
assert!(result.is_ok());
146197
}}
@@ -152,7 +203,13 @@ def generate_test_template(target_name: str, repo_path: str):
152203
return template_path
153204

154205
def transform_repos(repos: list[str], jobs: int):
155-
"""为所有目标生成测试模板"""
206+
"""
207+
为所有目标生成测试模板
208+
209+
Args:
210+
repos (list[str]): 仓库路径列表
211+
jobs (int): 并行任务数
212+
"""
156213
logging.info("Generating test templates")
157214

158215
def _transform_repo(repo: str):
@@ -164,18 +221,67 @@ def _transform_repo(repo: str):
164221
with ProcessingPool(jobs) as p:
165222
return list(p.map(_transform_repo, repos))
166223

167-
def substitute_input(template: str, input_data: str, idx: int) -> str:
168-
"""将模糊测试输入替换到测试模板中"""
169-
return template.replace(
170-
'let input = []; // 模糊测试输入将替换这里',
171-
f"let input = {input_data};"
172-
).replace(
173-
f"fn test_",
174-
f"fn test_{idx}_"
224+
def escape_special_chars(input_data: str) -> str:
225+
"""
226+
转义输入数据中的特殊字符
227+
228+
Args:
229+
input_data (str): 原始输入数据
230+
231+
Returns:
232+
str: 转义后的输入数据
233+
"""
234+
# 转义反斜杠和双引号
235+
escaped = input_data.replace('\\', '\\\\').replace('"', '\\"')
236+
237+
# 处理非ASCII字符
238+
if any(ord(c) > 127 for c in escaped):
239+
# 如果包含非ASCII字符,使用字节数组表示
240+
byte_array = [str(b) for b in input_data.encode()]
241+
return f"b\"\" // Original: {input_data}\n let input = vec![{', '.join(byte_array)}];"
242+
243+
return f"b\"{escaped}\""
244+
245+
def substitute_input(template: str, input_data: str, idx: int, target_name: str) -> str:
246+
"""
247+
将模糊测试输入替换到测试模板中
248+
249+
Args:
250+
template (str): 模板内容
251+
input_data (str): 输入数据
252+
idx (int): 测试索引
253+
target_name (str): 目标名称
254+
255+
Returns:
256+
str: 替换后的测试代码
257+
"""
258+
# 转义特殊字符并处理非ASCII字符
259+
escaped_input = escape_special_chars(input_data)
260+
261+
# 替换输入占位符
262+
new_template = template.replace(
263+
'let input = b""; // 模糊测试输入将替换这里',
264+
escaped_input
265+
)
266+
267+
# 替换函数名避免重复
268+
return new_template.replace(
269+
f"fn test_{target_name}()",
270+
f"fn test_{target_name}_{idx}()"
175271
)
176272

177273
def has_similar(selected: list[str], x: str, thresh: float = 0.8) -> bool:
178-
"""检查字符串是否与已选列表中的任何字符串足够相似"""
274+
"""
275+
检查字符串是否与已选列表中的任何字符串足够相似
276+
277+
Args:
278+
selected (list[str]): 已选字符串列表
279+
x (str): 待检查字符串
280+
thresh (float): 相似度阈值
281+
282+
Returns:
283+
bool: 是否相似
284+
"""
179285
def similar(a, b):
180286
return SequenceMatcher(None, a, b).ratio()
181287
return any(similar(x, y) > thresh for y in selected)
@@ -188,21 +294,45 @@ def substitute_one_repo(
188294
max_len: int,
189295
sim_thresh: float,
190296
):
191-
"""处理单个仓库,将模糊测试输入替换到测试模板中"""
297+
"""
298+
处理单个仓库,将模糊测试输入替换到测试模板中
299+
300+
Args:
301+
repo (str): 仓库路径
302+
targets (list[str]): 目标列表
303+
n_fuzz (int): 使用的输入数量
304+
strategy (str): 选择策略
305+
max_len (int): 最大长度
306+
sim_thresh (float): 相似度阈值
307+
"""
192308
template_dir = pjoin(repo, "tests-gen")
193309
input_dir = pjoin(repo, "fuzz_inputs")
194310

195-
for t in targets:
196-
template_path = pjoin(template_dir, f"{t}.rs")
197-
input_path = pjoin(input_dir, t)
311+
for target_name in targets: # 使用target_name作为循环变量
312+
template_path = pjoin(template_dir, f"{target_name}.rs")
313+
input_path = pjoin(input_dir, target_name)
198314

199315
try:
316+
if not os.path.exists(template_path):
317+
logging.warning(f"Template file not found: {template_path}")
318+
continue
319+
320+
if not os.path.exists(input_path):
321+
logging.warning(f"Input file not found: {input_path}")
322+
continue
323+
200324
with open(template_path) as f_template:
201325
template = f_template.read()
202326

203327
with open(input_path, "r") as f_input:
204-
all_inputs = [i for i in f_input.read().splitlines() if i]
328+
all_inputs = [line.strip() for line in f_input if line.strip()]
205329

330+
if not all_inputs:
331+
logging.warning(f"No valid inputs found for {target_name}")
332+
continue
333+
334+
logging.info(f"Loaded {len(all_inputs)} inputs for {target_name}")
335+
206336
# 选择输入策略
207337
if strategy == "shuffle":
208338
random.shuffle(all_inputs)
@@ -221,20 +351,20 @@ def substitute_one_repo(
221351

222352
# 生成测试用例
223353
tests = [
224-
substitute_input(template, input_data, i)
354+
substitute_input(template, input_data, i, target_name) # 传递target_name
225355
for i, input_data in enumerate(inputs)
226356
]
227357

228358
# 写入生成的测试文件
229-
generated_path = pjoin(template_dir, f"{t}.inputs.rs")
359+
generated_path = pjoin(template_dir, f"{target_name}.inputs.rs")
230360
with open(generated_path, "w") as f:
231361
f.write("\n".join(tests))
232362

233363
# 格式化代码
234364
subprocess.run(["rustfmt", generated_path], check=False)
235365

236366
except Exception as e:
237-
logging.error(f"Error processing {t}: {e}")
367+
logging.error(f"Error processing {target_name}: {e}")
238368

239369
def testgen_repos(
240370
repos: list[str],
@@ -244,7 +374,17 @@ def testgen_repos(
244374
max_len: int = 100,
245375
sim_thresh: float = 0.8,
246376
):
247-
"""从模糊测试输入生成测试用例"""
377+
"""
378+
从模糊测试输入生成测试用例
379+
380+
Args:
381+
repos (list[str]): 仓库路径列表
382+
jobs (int): 并行任务数
383+
n_fuzz (int): 使用的输入数量
384+
strategy (str): 选择策略
385+
max_len (int): 最大长度
386+
sim_thresh (float): 相似度阈值
387+
"""
248388
# 首先获取所有目标
249389
targets_list = []
250390
for repo in repos:
@@ -275,7 +415,20 @@ def main(
275415
max_len: int = 100,
276416
sim_thresh: float = 0.8,
277417
):
278-
"""主函数,控制整个模糊测试流程"""
418+
"""
419+
主函数,控制整个模糊测试流程
420+
421+
Args:
422+
repo_id (str): 项目ID文件路径
423+
repo_root (str): 项目根目录
424+
timeout (int): 超时时间
425+
jobs (int): 并行任务数
426+
pipeline (str): 流程类型
427+
n_fuzz (int): 使用的输入数量
428+
strategy (str): 选择策略
429+
max_len (int): 最大长度
430+
sim_thresh (float): 相似度阈值
431+
"""
279432
try:
280433
with open(repo_id, "r") as f:
281434
repo_id_list = [line.strip() for line in f if line.strip()]

0 commit comments

Comments
 (0)