Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: "Black – code-style check"

on:
pull_request:
paths: ["**/*.py"]
push:
branches: [main]
paths: ["**/*.py"]

jobs:
black:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

# ---------- pip wheel cache ----------
- name: Cache pip
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-black-pip-${{ hashFiles('pyproject.toml') }}
restore-keys: |
${{ runner.os }}-black-pip-

# ---------- Black cache (formatting state) ----------
- name: Cache Black .cache
uses: actions/cache@v4
with:
path: .cache/black
key: ${{ runner.os }}-black-${{ github.sha }}
restore-keys: |
${{ runner.os }}-black-

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"

- name: Install Black (pinned)
run: |
python -m pip install --upgrade pip
pip install black==24.10.0

- name: Run Black in check mode
run: black --check --diff src
47 changes: 47 additions & 0 deletions .github/workflows/mypy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: "mypy – static type checks"

on:
pull_request:
paths: ["**/*.py"]
push:
branches: [main]
paths: ["**/*.py"]

jobs:
mypy:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

# ---------- pip wheel cache ----------
- name: Cache pip
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-py${{ matrix.python-version }}-pip-${{ hashFiles('requirements.txt') }}
restore-keys: |
${{ runner.os }}-py${{ matrix.python-version }}-pip-

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"

- name: Install deps + mypy
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt

# ---------- mypy incremental cache ----------
- name: Cache mypy .mypy_cache
uses: actions/cache@v4
with:
path: .mypy_cache
key: ${{ runner.os }}-mypy-${{ github.sha }}
restore-keys: |
${{ runner.os }}-mypy-

- name: Type‑check
run: |
mypy src
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ We are keeping implementing more agents and will open-source them very soon. Uti

## Installation

1. Create and activate a conda environment with Python 3.9.18:
1. Create and activate a conda environment with Python 3.13:

```sh
conda create -n repoaudit python=3.9.18
conda create -n repoaudit python=3.13
conda activate repoaudit
```

Expand Down
16 changes: 16 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[tool.black]
line-length = 88 # keep the default PEP‑8‑plus setting
target-version = ["py39"] # ensures Python‑3.9 compatible formatting
skip-string-normalization = false
include = '\.pyi?$'

exclude = '''
/(
\.git
| \.mypy_cache
| \.pytest_cache
| \.venv
| build
| dist
)/
'''
6 changes: 5 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,8 @@ streamlit
botocore
boto3
black
anthropic
anthropic
mypy
types-networkx
types-tqdm
boto3-stubs[essential]
Empty file removed src/__init__.py
Empty file.
112 changes: 66 additions & 46 deletions src/agent/dfbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@
class DFBScanAgent(Agent):
def __init__(
self,
bug_type,
is_reachable,
project_path,
language,
ts_analyzer,
model_name,
temperature,
call_depth,
max_neural_workers=30,
bug_type: str,
is_reachable: bool,
project_path: str,
language: str,
ts_analyzer: TSAnalyzer,
model_name: str,
temperature: float,
call_depth: int,
max_neural_workers: int = 30,
agent_id: int = 0,
) -> None:
self.bug_type = bug_type
Expand Down Expand Up @@ -112,7 +112,9 @@ def __obtain_extractor(self) -> DFBScanExtractor:
elif self.language == "Go":
if self.bug_type == "NPD":
return Go_NPD_Extractor(self.ts_analyzer)
return None
raise NotImplementedError(
f"Unsupported bug type: {self.bug_type} in {self.language}"
)

def __update_worklist(
self,
Expand Down Expand Up @@ -174,21 +176,23 @@ def __update_worklist(
if not is_CFL_reachable:
continue

for para in callee_function.paras:
if para.index == value.index:
delta_worklist.append(
(para, callee_function, new_call_context)
)
self.state.update_external_value_match(
(value, call_context), set({(para, new_call_context)})
)
if callee_function.paras is not None:
for para in callee_function.paras:
if para.index == value.index:
delta_worklist.append(
(para, callee_function, new_call_context)
)
self.state.update_external_value_match(
(value, call_context),
set({(para, new_call_context)}),
)

if value.label == ValueLabel.PARA:
# Consider side-effect.
# Example: the parameter *p is used in the function: p->f = null;
# We need to consider the side-effect of p.
caller_function = self.ts_analyzer.get_all_caller_functions(function)
for caller_function in caller_function:
caller_functions = self.ts_analyzer.get_all_caller_functions(function)
for caller_function in caller_functions:
new_call_context = copy.deepcopy(call_context)
top_unmatched_context_label = (
new_call_context.get_top_unmatched_context_label()
Expand Down Expand Up @@ -442,9 +446,13 @@ def start_scan_sequential(self) -> None:
ret.name,
ret.line_number - start_function.start_line_number + 1,
)
for ret in start_function.retvals
for ret in (
start_function.retvals
if start_function.retvals is not None
else []
)
]
input = IntraDataFlowAnalyzerInput(
df_input = IntraDataFlowAnalyzerInput(
start_function,
start_value,
sink_values,
Expand All @@ -453,20 +461,22 @@ def start_scan_sequential(self) -> None:
)

# Invoke the intra-procedural data-flow analysis
output = self.intra_dfa.invoke(input)
if output is None:
df_output = self.intra_dfa.invoke(
df_input, IntraDataFlowAnalyzerOutput
)
if df_output is None:
continue

for path_index in range(len(output.reachable_values)):
for path_index in range(len(df_output.reachable_values)):
reachable_values_in_single_path = set([])
for value in output.reachable_values[path_index]:
for value in df_output.reachable_values[path_index]:
reachable_values_in_single_path.add((value, call_context))
self.state.update_reachable_values_per_path(
(start_value, call_context), reachable_values_in_single_path
)

delta_worklist = self.__update_worklist(
input, output, call_context, path_index
df_input, df_output, call_context, path_index
)
worklist.extend(delta_worklist)

Expand All @@ -479,20 +489,22 @@ def start_scan_sequential(self) -> None:
continue

for buggy_path in self.state.potential_buggy_paths[src_value].values():
input = PathValidatorInput(
pv_input = PathValidatorInput(
self.bug_type,
buggy_path,
{
value: self.ts_analyzer.get_function_from_localvalue(value)
for value in buggy_path
},
)
output: PathValidatorOutput = self.path_validator.invoke(input)
pv_output = self.path_validator.invoke(
pv_input, PathValidatorOutput
)

if output is None:
if pv_output is None:
continue

if output.is_reachable:
if pv_output.is_reachable:
relevant_functions = {}
for value in buggy_path:
function = self.ts_analyzer.get_function_from_localvalue(
Expand All @@ -505,7 +517,7 @@ def start_scan_sequential(self) -> None:
self.bug_type,
src_value,
relevant_functions,
output.explanation_str,
pv_output.explanation_str,
)
self.state.update_bug_report(bug_report)

Expand Down Expand Up @@ -606,28 +618,30 @@ def __process_src_value(self, src_value: Value) -> None:

ret_values = [
(ret.name, ret.line_number - start_function.start_line_number + 1)
for ret in start_function.retvals
for ret in (
start_function.retvals if start_function.retvals is not None else []
)
]
input = IntraDataFlowAnalyzerInput(
df_input = IntraDataFlowAnalyzerInput(
start_function, start_value, sink_values, call_statements, ret_values
)

# Invoke the intra-procedural data-flow analysis
output = self.intra_dfa.invoke(input)
df_output = self.intra_dfa.invoke(df_input, IntraDataFlowAnalyzerOutput)

if output is None:
if df_output is None:
continue

for path_index in range(len(output.reachable_values)):
for path_index in range(len(df_output.reachable_values)):
reachable_values_in_single_path = set([])
for value in output.reachable_values[path_index]:
for value in df_output.reachable_values[path_index]:
reachable_values_in_single_path.add((value, call_context))
self.state.update_reachable_values_per_path(
(start_value, call_context), reachable_values_in_single_path
)

delta_worklist = self.__update_worklist(
input, output, call_context, path_index
df_input, df_output, call_context, path_index
)
worklist.extend(delta_worklist)

Expand All @@ -645,30 +659,36 @@ def __process_src_value(self, src_value: Value) -> None:
for value in buggy_path
}

relevant_functions = values_to_functions.values()
functions: Set[Function] = set()
for func in values_to_functions.values():
if func is not None:
functions.add(func)

if self.state.check_existence(src_value, relevant_functions):
if self.state.check_existence(src_value, functions):
continue

input = PathValidatorInput(
pv_input = PathValidatorInput(
self.bug_type,
buggy_path,
values_to_functions,
)
output: PathValidatorOutput = self.path_validator.invoke(input)
pv_output = self.path_validator.invoke(pv_input, PathValidatorOutput)

if output is None:
if pv_output is None:
continue

if output.is_reachable:
if pv_output.is_reachable:
relevant_functions = {}
for value in buggy_path:
function = self.ts_analyzer.get_function_from_localvalue(value)
if function is not None:
relevant_functions[function.function_id] = function

bug_report = BugReport(
self.bug_type, src_value, relevant_functions, output.explanation_str
self.bug_type,
src_value,
relevant_functions,
pv_output.explanation_str,
)
self.state.update_bug_report(bug_report)
bug_report_dict = {
Expand Down
Loading