1515from difflib import SequenceMatcher
1616from itertools import islice
1717from datetime import datetime
18+ import re
1819
1920def build_image (repos : list [str ], jobs : int ):
20- """构建每个仓库对应的OSS-Fuzz项目的Docker镜像"""
21+ """
22+ 构建每个仓库对应的OSS-Fuzz项目的Docker镜像
23+
24+ Args:
25+ repos (list[str]): 仓库路径列表
26+ jobs (int): 并行任务数
27+ """
2128 logging .info (f"Building Docker images for { len (repos )} OSS-Fuzz projects" )
2229 log_dir = os .path .abspath ("fuzz_pipeline_log" )
2330 os .makedirs (log_dir , exist_ok = True )
@@ -26,6 +33,8 @@ def _build_cmd(path: str):
2633 project_name = os .path .basename (path .rstrip ("/" ))
2734 timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
2835 log_file = os .path .join (log_dir , f"{ project_name } _{ timestamp } .log" )
36+
37+ logging .info (f"Start building { project_name } , logging to { log_file } " )
2938 return subprocess .Popen (
3039 f"yes | python3 infra/helper.py build_image { project_name } " ,
3140 cwd = os .path .abspath (os .path .join (path , "../../" )),
@@ -37,7 +46,13 @@ def _build_cmd(path: str):
3746 _ = parallel_subprocess (repos , jobs , _build_cmd , on_exit = None )
3847
3948def build_fuzzer (repos : list [str ], jobs : int ):
40- """对构建成功的项目并行构建模糊测试器"""
49+ """
50+ 对构建成功的项目并行构建模糊测试器
51+
52+ Args:
53+ repos (list[str]): 仓库路径列表
54+ jobs (int): 并行任务数
55+ """
4156 logging .info (f"Building fuzzers for { len (repos )} OSS-Fuzz projects" )
4257 log_dir = os .path .abspath ("fuzz_pipeline_log" )
4358 os .makedirs (log_dir , exist_ok = True )
@@ -46,6 +61,8 @@ def _build_cmd(path: str):
4661 project_name = os .path .basename (path .rstrip ("/" ))
4762 timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
4863 log_file = os .path .join (log_dir , f"{ project_name } _fuzzer_{ timestamp } .log" )
64+
65+ logging .info (f"Start building fuzzers for { project_name } , logging to { log_file } " )
4966 return subprocess .Popen (
5067 f"python3 infra/helper.py build_fuzzers --sanitizer address { project_name } " ,
5168 cwd = os .path .abspath (os .path .join (path , "../../" )),
@@ -57,7 +74,16 @@ def _build_cmd(path: str):
5774 _ = parallel_subprocess (repos , jobs , _build_cmd , on_exit = None )
5875
5976def discover_targets (project_name : str , oss_fuzz_dir : Path ) -> list [str ]:
60- """发现模糊测试目标"""
77+ """
78+ 发现模糊测试目标
79+
80+ Args:
81+ project_name (str): 项目名称
82+ oss_fuzz_dir (Path): OSS-Fuzz根目录
83+
84+ Returns:
85+ list[str]: 目标名称列表
86+ """
6187 out_dir = oss_fuzz_dir / "build" / "out" / project_name
6288 targets = []
6389
@@ -77,7 +103,16 @@ def discover_targets(project_name: str, oss_fuzz_dir: Path) -> list[str]:
77103 return targets
78104
79105def fuzz_one_target (target : tuple [str , str ], timeout : int ):
80- """对单个模糊测试目标执行模糊测试"""
106+ """
107+ 对单个模糊测试目标执行模糊测试
108+
109+ Args:
110+ target (tuple[str, str]): (仓库路径, 目标名称)
111+ timeout (int): 超时时间(秒)
112+
113+ Returns:
114+ subprocess.Popen: 子进程对象
115+ """
81116 repo_path , target_name = target
82117 project_name = os .path .basename (repo_path )
83118 oss_fuzz_root = os .path .dirname (os .path .dirname (repo_path ))
@@ -103,7 +138,14 @@ def fuzz_one_target(target: tuple[str, str], timeout: int):
103138 return None
104139
105140def fuzz_repos (repos : list [str ], jobs : int , timeout : int = 60 ):
106- """对一组仓库执行模糊测试"""
141+ """
142+ 对一组仓库执行模糊测试
143+
144+ Args:
145+ repos (list[str]): 仓库路径列表
146+ jobs (int): 并行任务数
147+ timeout (int): 超时时间(秒)
148+ """
107149 logging .info ("Discovering fuzz targets" )
108150
109151 # 获取所有目标
@@ -130,17 +172,26 @@ def fuzz_repos(repos: list[str], jobs: int, timeout: int = 60):
130172 parallel_subprocess (targets , jobs , lambda p : fuzz_one_target (p , timeout ), on_exit = None )
131173
132174def generate_test_template (target_name : str , repo_path : str ):
133- """为单个目标生成测试模板"""
175+ """
176+ 为单个目标生成测试模板
177+
178+ Args:
179+ target_name (str): 目标名称
180+ repo_path (str): 仓库路径
181+
182+ Returns:
183+ str: 模板文件路径
184+ """
134185 template_dir = pjoin (repo_path , "tests-gen" )
135186 os .makedirs (template_dir , exist_ok = True )
136187 template_path = pjoin (template_dir , f"{ target_name } .rs" )
137188
138- # 基本测试模板
189+ # 基本测试模板 - 使用字节数组而不是字节字符
139190 template = f"""
140191 #[test]
141192 fn test_{ target_name } () {{
142193 // 测试逻辑将在这里生成
143- let input = [] ; // 模糊测试输入将替换这里
194+ let input = b"" ; // 模糊测试输入将替换这里
144195 let result = process_input(&input);
145196 assert!(result.is_ok());
146197 }}
@@ -152,7 +203,13 @@ def generate_test_template(target_name: str, repo_path: str):
152203 return template_path
153204
154205def transform_repos (repos : list [str ], jobs : int ):
155- """为所有目标生成测试模板"""
206+ """
207+ 为所有目标生成测试模板
208+
209+ Args:
210+ repos (list[str]): 仓库路径列表
211+ jobs (int): 并行任务数
212+ """
156213 logging .info ("Generating test templates" )
157214
158215 def _transform_repo (repo : str ):
@@ -164,18 +221,67 @@ def _transform_repo(repo: str):
164221 with ProcessingPool (jobs ) as p :
165222 return list (p .map (_transform_repo , repos ))
166223
167- def substitute_input (template : str , input_data : str , idx : int ) -> str :
168- """将模糊测试输入替换到测试模板中"""
169- return template .replace (
170- 'let input = []; // 模糊测试输入将替换这里' ,
171- f"let input = { input_data } ;"
172- ).replace (
173- f"fn test_" ,
174- f"fn test_{ idx } _"
224+ def escape_special_chars (input_data : str ) -> str :
225+ """
226+ 转义输入数据中的特殊字符
227+
228+ Args:
229+ input_data (str): 原始输入数据
230+
231+ Returns:
232+ str: 转义后的输入数据
233+ """
234+ # 转义反斜杠和双引号
235+ escaped = input_data .replace ('\\ ' , '\\ \\ ' ).replace ('"' , '\\ "' )
236+
237+ # 处理非ASCII字符
238+ if any (ord (c ) > 127 for c in escaped ):
239+ # 如果包含非ASCII字符,使用字节数组表示
240+ byte_array = [str (b ) for b in input_data .encode ()]
241+ return f"b\" \" // Original: { input_data } \n let input = vec![{ ', ' .join (byte_array )} ];"
242+
243+ return f"b\" { escaped } \" "
244+
245+ def substitute_input (template : str , input_data : str , idx : int , target_name : str ) -> str :
246+ """
247+ 将模糊测试输入替换到测试模板中
248+
249+ Args:
250+ template (str): 模板内容
251+ input_data (str): 输入数据
252+ idx (int): 测试索引
253+ target_name (str): 目标名称
254+
255+ Returns:
256+ str: 替换后的测试代码
257+ """
258+ # 转义特殊字符并处理非ASCII字符
259+ escaped_input = escape_special_chars (input_data )
260+
261+ # 替换输入占位符
262+ new_template = template .replace (
263+ 'let input = b""; // 模糊测试输入将替换这里' ,
264+ escaped_input
265+ )
266+
267+ # 替换函数名避免重复
268+ return new_template .replace (
269+ f"fn test_{ target_name } ()" ,
270+ f"fn test_{ target_name } _{ idx } ()"
175271 )
176272
177273def has_similar (selected : list [str ], x : str , thresh : float = 0.8 ) -> bool :
178- """检查字符串是否与已选列表中的任何字符串足够相似"""
274+ """
275+ 检查字符串是否与已选列表中的任何字符串足够相似
276+
277+ Args:
278+ selected (list[str]): 已选字符串列表
279+ x (str): 待检查字符串
280+ thresh (float): 相似度阈值
281+
282+ Returns:
283+ bool: 是否相似
284+ """
179285 def similar (a , b ):
180286 return SequenceMatcher (None , a , b ).ratio ()
181287 return any (similar (x , y ) > thresh for y in selected )
@@ -188,21 +294,45 @@ def substitute_one_repo(
188294 max_len : int ,
189295 sim_thresh : float ,
190296):
191- """处理单个仓库,将模糊测试输入替换到测试模板中"""
297+ """
298+ 处理单个仓库,将模糊测试输入替换到测试模板中
299+
300+ Args:
301+ repo (str): 仓库路径
302+ targets (list[str]): 目标列表
303+ n_fuzz (int): 使用的输入数量
304+ strategy (str): 选择策略
305+ max_len (int): 最大长度
306+ sim_thresh (float): 相似度阈值
307+ """
192308 template_dir = pjoin (repo , "tests-gen" )
193309 input_dir = pjoin (repo , "fuzz_inputs" )
194310
195- for t in targets :
196- template_path = pjoin (template_dir , f"{ t } .rs" )
197- input_path = pjoin (input_dir , t )
311+ for target_name in targets : # 使用target_name作为循环变量
312+ template_path = pjoin (template_dir , f"{ target_name } .rs" )
313+ input_path = pjoin (input_dir , target_name )
198314
199315 try :
316+ if not os .path .exists (template_path ):
317+ logging .warning (f"Template file not found: { template_path } " )
318+ continue
319+
320+ if not os .path .exists (input_path ):
321+ logging .warning (f"Input file not found: { input_path } " )
322+ continue
323+
200324 with open (template_path ) as f_template :
201325 template = f_template .read ()
202326
203327 with open (input_path , "r" ) as f_input :
204- all_inputs = [i for i in f_input . read (). splitlines () if i ]
328+ all_inputs = [line . strip () for line in f_input if line . strip () ]
205329
330+ if not all_inputs :
331+ logging .warning (f"No valid inputs found for { target_name } " )
332+ continue
333+
334+ logging .info (f"Loaded { len (all_inputs )} inputs for { target_name } " )
335+
206336 # 选择输入策略
207337 if strategy == "shuffle" :
208338 random .shuffle (all_inputs )
@@ -221,20 +351,20 @@ def substitute_one_repo(
221351
222352 # 生成测试用例
223353 tests = [
224- substitute_input (template , input_data , i )
354+ substitute_input (template , input_data , i , target_name ) # 传递target_name
225355 for i , input_data in enumerate (inputs )
226356 ]
227357
228358 # 写入生成的测试文件
229- generated_path = pjoin (template_dir , f"{ t } .inputs.rs" )
359+ generated_path = pjoin (template_dir , f"{ target_name } .inputs.rs" )
230360 with open (generated_path , "w" ) as f :
231361 f .write ("\n " .join (tests ))
232362
233363 # 格式化代码
234364 subprocess .run (["rustfmt" , generated_path ], check = False )
235365
236366 except Exception as e :
237- logging .error (f"Error processing { t } : { e } " )
367+ logging .error (f"Error processing { target_name } : { e } " )
238368
239369def testgen_repos (
240370 repos : list [str ],
@@ -244,7 +374,17 @@ def testgen_repos(
244374 max_len : int = 100 ,
245375 sim_thresh : float = 0.8 ,
246376):
247- """从模糊测试输入生成测试用例"""
377+ """
378+ 从模糊测试输入生成测试用例
379+
380+ Args:
381+ repos (list[str]): 仓库路径列表
382+ jobs (int): 并行任务数
383+ n_fuzz (int): 使用的输入数量
384+ strategy (str): 选择策略
385+ max_len (int): 最大长度
386+ sim_thresh (float): 相似度阈值
387+ """
248388 # 首先获取所有目标
249389 targets_list = []
250390 for repo in repos :
@@ -275,7 +415,20 @@ def main(
275415 max_len : int = 100 ,
276416 sim_thresh : float = 0.8 ,
277417):
278- """主函数,控制整个模糊测试流程"""
418+ """
419+ 主函数,控制整个模糊测试流程
420+
421+ Args:
422+ repo_id (str): 项目ID文件路径
423+ repo_root (str): 项目根目录
424+ timeout (int): 超时时间
425+ jobs (int): 并行任务数
426+ pipeline (str): 流程类型
427+ n_fuzz (int): 使用的输入数量
428+ strategy (str): 选择策略
429+ max_len (int): 最大长度
430+ sim_thresh (float): 相似度阈值
431+ """
279432 try :
280433 with open (repo_id , "r" ) as f :
281434 repo_id_list = [line .strip () for line in f if line .strip ()]
0 commit comments