55
66import subprocess
77import logging
8-
8+ from tqdm import tqdm
99import pandas as pd
1010
1111class RunBugRun (Benchmark ):
@@ -25,32 +25,68 @@ def initialize(self) -> None:
2525 logging .info ("Initializing RunBugRun benchmark..." )
2626
2727 python_path = Path (self .get_path (), 'python_valid0.jsonl' )
28- # test_path = Path(self.get_path(), 'tests_all.jsonl')
28+ test_path = Path (self .get_path (), 'tests_all.jsonl' )
2929
3030 python_df = pd .read_json (python_path , lines = True ).set_index ('problem_id' )
31-
31+ test_df = pd .read_json (test_path , lines = True ).set_index ('id' )
32+
33+ subprocess .run (
34+ f"mkdir -p { self .path } /buggy" ,
35+ shell = True ,
36+ capture_output = True ,
37+ check = True ,
38+ )
39+
40+ subprocess .run (
41+ f"mkdir -p { self .path } /fixed" ,
42+ shell = True ,
43+ capture_output = True ,
44+ check = True ,
45+ )
46+
47+ buggy_submissions = python_df .drop_duplicates (subset = ['buggy_submission_id' ]).head (10 )
48+
3249 for prob_id , (buggy_submission_id , buggy_code , fixed_submission_id , fixed_code ) \
33- in python_df .drop_duplicates (subset = ['buggy_submission_id' ])[
34- ['buggy_submission_id' ,'buggy_code' , 'fixed_submission_id' , 'fixed_code' ]
35- ].iterrows ():
36-
37- buggy_file = Path (self .path , f'{ prob_id } _{ buggy_submission_id } .py' )
38- fixed_file = Path (self .path , f'{ prob_id } _{ fixed_submission_id } .py' )
50+ in tqdm (
51+ buggy_submissions [['buggy_submission_id' ,'buggy_code' , 'fixed_submission_id' , 'fixed_code' ]].iterrows (),
52+ total = len (buggy_submissions )
53+ ):
54+
55+ buggy_file = Path (self .path , 'buggy' , f'{ prob_id } _{ buggy_submission_id } .py' )
56+ fixed_file = Path (self .path , 'fixed' , f'{ prob_id } _{ buggy_submission_id } .py' ) # using buggy id for both to maintain file correspondence
57+
58+ with open (buggy_file , 'w' ) as f :
59+ f .write (buggy_code )
60+ f .write ('\n ' )
61+
62+ with open (fixed_file , 'w' ) as f :
63+ f .write (fixed_code )
64+ f .write ('\n ' )
3965
4066 run = subprocess .run (
4167 f"""cd { self .get_path ()} &&
42- echo '''{ buggy_code } ''' > { buggy_file } &&
43- echo '''{ fixed_code } ''' > { fixed_file } &&
4468 diff --unified { fixed_file .relative_to (self .path )} { buggy_file .relative_to (self .path )} """ ,
4569 shell = True ,
4670 capture_output = True
4771 )
48- if run .returncode :
49- print (run )
5072
5173 diff = PatchSet (run .stdout .decode ("utf-8" ))
5274 # Change the source file path to point to the buggy version
5375 diff [0 ].source_file = f"{ buggy_file .relative_to (self .path )} "
76+
77+ failing_tests = {}
78+
79+ for test_id , (test_input , test_output ) in test_df [test_df .problem_id == prob_id ][['input' , 'output' ]].iterrows ():
80+ error_code , result = RunBugRunBug .execute_test_case (buggy_file , test_input )
81+
82+ if error_code :
83+ cause = f"""Function with input { test_input .replace ('"' , "'" )} failed with error: { result } """
84+ elif result != test_output .strip ():
85+ cause = f"""Expected function with input { test_input .replace ('"' , "'" )} to output { test_output .replace ('"' , "'" ).replace ("'" , r"\'" )} but got { result } """
86+ else :
87+ continue # skip passing
88+
89+ failing_tests [f"""{ test_input } -> { test_output } """ ] = cause
5490
55- self .add_bug (RunBugRunBug (self , f"{ prob_id } _{ buggy_submission_id } " , str (diff )))
91+ self .add_bug (RunBugRunBug (self , f"{ prob_id } _{ buggy_submission_id } " , str (diff ), failing_tests ))
5692
0 commit comments