Skip to content

Commit 737e2b6

Browse files
committed
fix pylint for evaluation
1 parent e31897a commit 737e2b6

10 files changed

Lines changed: 107 additions & 258 deletions

File tree

.github/workflows/pylint.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ on: [pull_request]
55
jobs:
66
build:
77
runs-on: ubuntu-latest
8-
strategy:
9-
matrix:
10-
python-version: ["3.10.12"]
8+
119
steps:
1210
- uses: actions/checkout@v2
1311
with:

evaluation/ablation_n_fuzz.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import matplotlib.pyplot as plt
2-
import pandas as pd
3-
import numpy as np
42
import fire
53

64
plt.style.use("_mpl-gallery")
@@ -10,6 +8,7 @@
108

119

1210
def barplot(ns, xs, name):
11+
"""make a bar plot for n_fuzz experiment"""
1312
plt.figure(figsize=(12, 6))
1413
plt.bar(ns, xs, color="skyblue", width=10)
1514
plt.plot(ns, xs, color="red", marker="o")
@@ -22,6 +21,7 @@ def barplot(ns, xs, name):
2221

2322

2423
def main():
24+
"""script to make RQ4 plots"""
2525
ns = [20 * i for i in range(7)]
2626
cov = [17.64, 20.94, 18.69, 19.39, 19.31, 21.69, 21.50]
2727
acc = [34.94, 35.85, 36.93, 37.01, 37.93, 37.44, 39.21]

evaluation/compile.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from typing import Iterable
2-
import fire
32
import os
4-
from tree_sitter.binding import Node
3+
4+
from tree_sitter import Node
5+
from funcy import mapcat
6+
57
from UniTSyn.frontend.parser import RUST_LANGUAGE
68
from UniTSyn.frontend.parser.ast_util import ASTUtil
7-
from funcy import mapcat
89

910

1011
def flatten_use_delc(use_delc_code: str) -> list[str]:
@@ -84,7 +85,7 @@ def collect_rs_files(root: str):
8485
yield os.path.join(dirpath, filename)
8586

8687

87-
def construct_use_delcs(workspace_dir: str, type: str) -> set[str]:
88+
def construct_use_delcs(workspace_dir: str, test_type: str) -> set[str]:
8889
"""construct a set of unique use_list for a project from all use declarations in
8990
a subdirectory to
9091
@@ -93,12 +94,12 @@ def construct_use_delcs(workspace_dir: str, type: str) -> set[str]:
9394
9495
Args:
9596
workspace_dir (str): path to project's workdir
96-
type (str): tests or fuzz to collect use_delcs.
97+
test_type (str): tests or fuzz to collect use_delcs.
9798
9899
Returns:
99100
set[str]: set of use declarations to write to generated test files
100101
"""
101-
subdir = os.path.join(workspace_dir, type)
102+
subdir = os.path.join(workspace_dir, test_type)
102103

103104
def get_use_list_from_file(fpath: str) -> Iterable[str]:
104105
with open(fpath) as f:
@@ -129,15 +130,3 @@ def write_tests_to_workspace(workspace_dir: str, tests: list[str], test_type: st
129130
p = os.path.join(workspace_dir, "tests", f"generated_{test_type}_{i}.rs")
130131
with open(p, "w") as f:
131132
f.write(use_delc + "\n\n" + "#[test]\n" + test)
132-
133-
134-
def main():
135-
workspace_dir = os.path.abspath(
136-
"data/repos/marshallpierce-rust-base64/marshallpierce-rust-base64-4ef33cc"
137-
)
138-
139-
print(construct_use_delcs(workspace_dir, "tests"))
140-
141-
142-
if __name__ == "__main__":
143-
fire.Fire(main)

evaluation/coverage.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,27 @@
22

33
import json
44
from typing import Iterable
5-
import fire
65
import os
76
import subprocess
8-
from returns.maybe import Maybe, Some, Nothing
7+
8+
from funcy import mapcat
9+
from returns.maybe import Nothing
910
from returns.result import Result, Success, Failure
1011
from UniTSyn.frontend.parser import RUST_LANGUAGE
1112
from UniTSyn.frontend.parser.ast_util import ASTUtil
1213
from UniTSyn.frontend.rust.rust_util import get_test_functions
1314
from UniTSyn.frontend.rust.collect_all import collect_test_files
1415
from evaluation.util import CovError
15-
from funcy import mapcat
1616

1717

1818
def clean_workspace(workspace_dir: str):
19+
"""clean-up a workspace by removing coverage script and target dir"""
1920
subprocess.run(["rm", "rust_test_coverage.sh"], cwd=workspace_dir)
2021
subprocess.run(["rm", "-r", "target"], cwd=workspace_dir)
2122

2223

2324
def init_workspace(workspace_dir: str):
25+
"""initialize a workspace by copying coverage script"""
2426
fuzz_aug_home = os.environ["FUZZ_AUG_HOME"]
2527
cov_script_path = f"{fuzz_aug_home}/evaluation/rust_test_coverage.sh"
2628
subprocess.run(["cp", cov_script_path, workspace_dir])
@@ -29,6 +31,7 @@ def init_workspace(workspace_dir: str):
2931
def get_coverage(
3032
workspace_dir: str, test_target: str, clean_run: bool = False, timeout: int = 60
3133
) -> Result[float, CovError]:
34+
"""get coverage of a specific test target in the project"""
3235
if clean_run:
3336
clean_workspace(workspace_dir)
3437

@@ -105,15 +108,3 @@ def get_testcase_coverages(workspace_dir: str) -> dict[str, float]:
105108
cov = get_coverage(workspace_dir, test_name).unwrap()
106109
coverages[test_name] = cov
107110
return coverages
108-
109-
110-
def main():
111-
workspace_dir = os.path.abspath(
112-
"data/repos/marshallpierce-rust-base64/marshallpierce-rust-base64-4ef33cc"
113-
)
114-
115-
print(get_testcase_coverages(workspace_dir))
116-
117-
118-
if __name__ == "__main__":
119-
fire.Fire(main)

evaluation/humaneval.py

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,51 @@
11
import json
2+
import os
3+
import re
4+
import tempfile
5+
import logging
6+
from multiprocessing import cpu_count
7+
from typing import Iterable
8+
29
import fire
310
from funcy import func_partial
411
from funcy_chain import Chain
5-
from functools import reduce
612
from dacite import from_dict
7-
from returns.result import Result, Success, Failure
8-
import os
9-
import re
10-
import tempfile
13+
from returns.result import Success, Failure
1114
from tqdm import tqdm
1215
import pandas as pd
13-
import logging
14-
from multiprocessing import cpu_count
1516
from pathos.multiprocessing import ProcessingPool
16-
from returns.result import Result, Success, Failure
17-
from typing import Iterable
18-
from evaluation.util import CovError, HumanEvalTask
17+
18+
from evaluation.util import HumanEvalTask
1919
from evaluation.result_analysis import to_record
2020

2121

2222
def init_humaneval_x_workspace(tmpdir: str = "tmp"):
23+
"""setup a temporary workspace for compiling and running humaneval-x programs"""
2324
fuzz_aug_home = os.environ["FUZZ_AUG_HOME"]
2425
cargo_files = f"{fuzz_aug_home}/evaluation/cargo/Cargo.*"
2526
os.system(f"cp {cargo_files} {tmpdir}/")
2627
os.mkdir(f"{tmpdir}/src")
2728

2829

2930
def evaluate_program(program: str, fn_name: str, timeout: int):
30-
from evaluation.coverage import get_coverage
31+
"""write, compile, and run a HumanEval-X program, return coverage result"""
32+
from evaluation.coverage import ( # pylint: disable=import-outside-toplevel
33+
get_coverage,
34+
)
35+
36+
with tempfile.TemporaryDirectory() as tmpdir_path:
3137

32-
tmpdir = tempfile.TemporaryDirectory()
33-
tmpdir_path = tmpdir.name
38+
init_humaneval_x_workspace(tmpdir=tmpdir_path)
39+
test_target = f"test_{fn_name}"
40+
with open(f"{tmpdir_path}/src/main.rs", "w") as fp:
41+
fp.write(program)
42+
cov = get_coverage(tmpdir_path, test_target, timeout=timeout)
3443

35-
init_humaneval_x_workspace(tmpdir=tmpdir_path)
36-
test_target = f"test_{fn_name}"
37-
with open(f"{tmpdir_path}/src/main.rs", "w") as fp:
38-
fp.write(program)
39-
cov = get_coverage(tmpdir_path, test_target, timeout=timeout)
40-
tmpdir.cleanup()
41-
return cov
44+
return cov
4245

4346

4447
def evaluate_assertions(solution: HumanEvalTask, timeout: int = 60):
48+
"""evaluate individual assertions in the generated test function"""
4549
assert solution.fn_name is not None
4650
return [
4751
evaluate_program(p, solution.fn_name, timeout)
@@ -92,17 +96,20 @@ def extract_assertions(test_function: str) -> list[str]:
9296

9397

9498
def concat_assertions_to_test(solution, assertions):
99+
"""concatenate multiple extracted assertions into a single test function"""
95100
return solution.assertion_to_program("\n".join(assertions))
96101

97102

98103
def generated_assertions(sol: HumanEvalTask) -> list[str]:
104+
"""extract assertions from the generated test function"""
99105
assert sol.generated_test is not None
100106
return extract_assertions(sol.test_prompt_header + sol.generated_test)
101107

102108

103109
def evaluate_whole_function_coverage(solution: HumanEvalTask, timeout: int = 60):
104110
"""
105-
Evaluate the whole function coverage using the combined test function of correct assertions, which is similar to evaluate_assertion
111+
Evaluate the whole function coverage using the combined test function of correct assertions,
112+
which is similar to evaluate_assertion
106113
"""
107114
assert solution.fn_name is not None
108115

@@ -120,8 +127,8 @@ def main(
120127
"""evaluate coverage on HumanEval-X
121128
122129
Args:
123-
input_human_eval_x_path (str, optional): input path. Defaults to "data/humaneval_rust.jsonl".
124-
output_result_file (str, optional): write to write results. Defaults to "humaneval_rust_coverage.jsonl".
130+
input_human_eval_x_path (str, optional): input path.
131+
output_result_file (str, optional): write to write results.
125132
nproc (int, optional): number of proc to use. Defaults to cpu_count().
126133
timeout (int, optional): allowance time for each exec, in seconds. Defaults to 60.
127134
whole_function_coverage(bool, optional): whether or not test whole function coverage

0 commit comments

Comments
 (0)