1919from difflib import SequenceMatcher
2020from itertools import islice
2121from datetime import datetime
22+ import re
23+
24+ # Import AST-related functionality
25+ from ast_utils import (
26+ TestFunctionTransformer ,
27+ TestGenTransformer ,
28+ generate_test_template ,
29+ )
2230
23- # 导入AST相关的功能
24- from ast_utils import TestFunctionTransformer , TestGenTransformer , generate_test_template
2531
2632def build_image (repos : list [str ], jobs : int ):
2733 """
@@ -200,22 +206,23 @@ def _transform_repo(repo: str):
200206 project_name = os .path .basename (repo )
201207 oss_fuzz_dir = Path (repo ).parent .parent
202208 raw_targets = discover_targets (project_name , oss_fuzz_dir )
203-
204- # 只需移除目标名称中的 "_print1",不要添加任何新后缀
209+
210+ # Simply remove "_print1" from target names, don't add any new suffix
205211 transformed_targets = [t .replace ("_print1" , "" ) for t in raw_targets ]
206-
207- # 去重
212+
213+ # Remove duplicates
208214 targets = list (set (transformed_targets ))
209-
210- # 传递给 generate_test_template 的是简单目标名称
215+
216+ # Pass simple target names to generate_test_template
211217 return [generate_test_template (t , repo ) for t in targets ]
212218
213219 with ProcessingPool (jobs ) as p :
214220 return list (p .map (_transform_repo , repos ))
215221
222+
216223def substitute_one_repo (
217224 repo : str ,
218- targets : list [tuple ], # 每个元素是 (transformed_target, raw_target)
225+ targets : list [tuple ], # Each element is (transformed_target, raw_target)
219226 n_fuzz : int ,
220227 strategy : str ,
221228 max_len : int ,
@@ -225,57 +232,58 @@ def substitute_one_repo(
225232 Copy files from fuzz target template and generate multiple testgen files based on fuzz inputs
226233 using AST transformations
227234 """
235+
228236 input_dir = pjoin (repo , "fuzz_inputs" )
229237 template_dir = pjoin (repo , "tests-gen" )
230238 os .makedirs (template_dir , exist_ok = True )
231239
232240 for transformed_target , raw_target in targets :
233- # 使用转换后的目标名称构建模板文件路径
241+ # Build template file path using transformed target name
234242 source_file = pjoin (template_dir , transformed_target + ".py" )
235-
236- # 使用原始目标名称构建输入文件路径
243+
244+ # Build input file path using raw target name
237245 input_path = pjoin (input_dir , raw_target )
238-
239- # 确保源文件存在
246+
247+ # Ensure source file exists
240248 if not os .path .exists (source_file ):
241249 logging .warning (f"Source file not found: { source_file } " )
242250 continue
243251 if not os .path .exists (input_path ):
244252 logging .warning (f"Input file not found: { input_path } " )
245253 continue
246-
247- # 读取所有有效的输入数据
254+
255+ # Read all valid input data
248256 valid_inputs = []
249257 with open (input_path , "rb" ) as f_input :
250258 lines = f_input .readlines ()
251- # 文件已关闭,现在处理数据
259+ # File is closed, now process data
252260 for line in lines :
253- # 使用 errors='replace' 确保解码不会失败
261+ # Use errors='replace' to ensure decoding doesn't fail
254262 decoded = line .decode ("utf-8" , errors = "replace" )
255-
256- # 只处理以 b' 或 b" 开头的行
263+
264+ # Only process lines starting with b' or b"
257265 if decoded .startswith (("b'" , 'b"' )):
258266 if decoded .startswith ("b'" ) and decoded .endswith ("'\n " ):
259267 byte_data = line [2 :- 2 ]
260268 elif decoded .startswith ('b"' ) and decoded .endswith ('"\n ' ):
261269 byte_data = line [2 :- 2 ]
262270 else :
263271 continue
264-
272+
265273 if 0 < len (byte_data ) <= max_len :
266274 valid_inputs .append (byte_data )
267- # 对于其他行,如果长度在范围内且不是以 b' 或 b" 开头,也考虑加入
275+ # For other lines, if length is within range and doesn't start with b' or b", also consider adding
268276 elif 0 < len (line ) <= max_len :
269277 valid_inputs .append (line )
270278
271279 if not valid_inputs :
272- # 使用 transformed_target 而不是 target_name
280+ # Use transformed_target instead of target_name
273281 logging .warning (f"No valid inputs found for { transformed_target } " )
274282 continue
275283
276- # 使用 transformed_target 而不是 target_name
284+ # Use transformed_target instead of target_name
277285 logging .info (f"Loaded { len (valid_inputs )} inputs for { transformed_target } " )
278- # 策略选择输入
286+ # Strategy for selecting inputs
279287 if strategy == "shuffle" :
280288 random .shuffle (valid_inputs )
281289 inputs = valid_inputs [:n_fuzz ]
@@ -284,44 +292,47 @@ def substitute_one_repo(
284292 else :
285293 inputs = valid_inputs [:n_fuzz ]
286294
287- # 每个 fuzz input 生成一个单独的文件(使用 AST)
295+ # Generate a separate file for each fuzz input (using AST)
288296 for idx , fuzz_input in enumerate (inputs , start = 1 ):
289297 with open (source_file , "r" ) as f_src :
290298 code = f_src .read ()
291299
292300 try :
293- # 解析为 AST
301+ # Parse into AST
294302 tree = ast .parse (code )
295303
296- # 应用转换器
304+ # Apply transformer
297305 transformer = TestGenTransformer (idx , fuzz_input )
298306 new_tree = transformer .visit (tree )
299307 ast .fix_missing_locations (new_tree )
300308
301- # 确保找到并处理了测试函数
309+ # Ensure test function was found and processed
302310 if not transformer .found_test_function :
303311 logging .warning (f"No test_ function found in { source_file } " )
304312 continue
305313
306- # 生成新代码
314+ # Generate new code
307315 new_code = astunparse .unparse (new_tree )
308316
309- # 使用 transformed_target 而不是 target_name
317+ # Use transformed_target instead of target_name
310318 out_path = pjoin (template_dir , f"{ transformed_target } .testgen_{ idx } .py" )
311319 with open (out_path , "w" ) as f_out :
312320 f_out .write (new_code )
313321
314- # 格式化代码
322+ # Format code
315323 try :
316324 subprocess .run (["black" , out_path ], check = False )
317325 except FileNotFoundError :
318326 logging .warning ("Black formatter not found, skipping formatting" )
319-
327+
320328 except SyntaxError as e :
321329 logging .error (f"Syntax error when processing { source_file } : { e } " )
322330 except Exception as e :
323- # 使用 transformed_target 而不是 target_name
324- logging .error (f"Error generating test case for { transformed_target } : { e } " )
331+ # Use transformed_target instead of target_name
332+ logging .error (
333+ f"Error generating test case for { transformed_target } : { e } "
334+ )
335+
325336
326337def testgen_repos (
327338 repos : list [str ],
@@ -348,28 +359,29 @@ def testgen_repos(
348359 project_name = os .path .basename (repo )
349360 oss_fuzz_dir = Path (repo ).parent .parent
350361 raw_targets = discover_targets (project_name , oss_fuzz_dir )
351-
352- # 保存原始目标名称和转换后的目标名称
362+
363+ # Save original target names and transformed target names
353364 transformed_targets = [t .replace ("_print1" , "" ) for t in raw_targets ]
354- targets = list (zip (transformed_targets , raw_targets )) # (转换后, 原始 )
365+ targets = list (zip (transformed_targets , raw_targets )) # (transformed, raw )
355366 target_map [repo ] = targets
356367
357368 # Process each repository in parallel
358369 with ProcessingPool (jobs ) as p :
359370 list (
360371 p .map (
361372 lambda item : substitute_one_repo (
362- item [0 ], # repo path
363- item [1 ], # list of (transformed, raw) targets
364- n_fuzz ,
365- strategy ,
366- max_len ,
367- sim_thresh
373+ item [0 ], # repo path
374+ item [1 ], # list of (transformed, raw) targets
375+ n_fuzz ,
376+ strategy ,
377+ max_len ,
378+ sim_thresh ,
368379 ),
369380 target_map .items (),
370381 )
371382 )
372383
384+
373385def main (
374386 repo_id : str = "data/valid_projects.txt" ,
375387 repo_root : str = "fuzz/oss-fuzz/projects/" ,
@@ -431,4 +443,4 @@ def main(
431443
432444if __name__ == "__main__" :
433445 logging .basicConfig (level = logging .INFO )
434- fire .Fire (main )
446+ fire .Fire (main )
0 commit comments