Skip to content

Commit bfaf79c

Browse files
authored
Merge pull request #1 from OPPIDA/spotbugs
2 parents e795578 + a9e6e31 commit bfaf79c

39 files changed

Lines changed: 1399 additions & 752 deletions

.pre-commit-config.yaml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1+
default_install_hook_types:
2+
- pre-commit
3+
- commit-msg
4+
15
fail_fast: true
6+
27
repos:
38
- repo: https://github.com/astral-sh/uv-pre-commit
49
rev: 0.8.9
@@ -15,4 +20,11 @@ repos:
1520
hooks:
1621
- id: ruff-check
1722
args: [--fix]
18-
- id: ruff-format
23+
- id: ruff-format
24+
25+
- repo: https://github.com/compilerla/conventional-pre-commit
26+
rev: v4.3.0
27+
hooks:
28+
- id: conventional-pre-commit
29+
stages: [commit-msg]
30+
args: []

codesectools/cli.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -68,24 +68,28 @@ def status(
6868
if sasts or (not sasts and not datasets):
6969
table = Table(show_lines=True)
7070
table.add_column("SAST", justify="center", no_wrap=True)
71+
table.add_column("Type", justify="center", no_wrap=True)
7172
table.add_column("Status", justify="center", no_wrap=True)
7273
table.add_column("Note", justify="center")
7374
for sast_name, sast_data in SASTS_ALL.items():
7475
if sast_data["status"] == "full":
7576
table.add_row(
7677
sast_name,
78+
sast_data["sast"].__bases__[0].__name__,
7779
"Full ✅",
7880
"[b]Analysis[/b] and [b]result parsing[/b] are available",
7981
)
8082
elif sast_data["status"] == "partial":
8183
table.add_row(
8284
sast_name,
85+
sast_data["sast"].__bases__[0].__name__,
8386
"Partial ⚠️",
8487
f"Only [b]result parsing[/b] is available\nMissing: [red]{sast_data['missing']}[/red]",
8588
)
8689
else:
8790
table.add_row(
8891
sast_name,
92+
sast_data["sast"].__bases__[0].__name__,
8993
"None ❌",
9094
f"[b]Nothing[/b] is available\nMissing: [red]{sast_data['missing']}[/red]",
9195
)
@@ -143,18 +147,24 @@ def get_downloadable() -> dict[str, DownloadableRequirement | Dataset]:
143147

144148

145149
if DOWNLOADABLE := get_downloadable():
146-
147-
@cli.command()
148-
def download(
149-
name: Annotated[
150-
str,
151-
typer.Argument(
152-
click_type=Choice(["all"] + list(DOWNLOADABLE)),
153-
metavar="NAME",
154-
),
155-
],
156-
) -> None:
157-
"""Download missing resources."""
150+
download_hidden = False
151+
download_arg_type = str
152+
download_arg_value = typer.Argument(
153+
click_type=Choice(["all"] + list(DOWNLOADABLE)),
154+
metavar="NAME",
155+
)
156+
else:
157+
download_hidden = True
158+
download_arg_type = Optional[str]
159+
download_arg_value = None
160+
161+
162+
@cli.command(hidden=download_hidden)
163+
def download(name: download_arg_type = download_arg_value) -> None:
164+
"""Download any missing resources that are available for download."""
165+
if name is None:
166+
print("All downloadable resources have been retrieved.")
167+
else:
158168
if name == "all":
159169
targets = DOWNLOADABLE.values()
160170
else:

codesectools/datasets/BenchmarkJava/dataset.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
"""
77

88
import csv
9+
from pathlib import Path
910
from typing import Self
1011

1112
import git
1213

13-
from codesectools.datasets.core.dataset import File, FileDataset
14+
from codesectools.datasets.core.dataset import File, PrebuiltFileDataset
1415
from codesectools.shared.cwe import CWE, CWEs
1516

1617

@@ -50,7 +51,7 @@ def __init__(
5051
self.vuln_type = vuln_type
5152

5253

53-
class BenchmarkJava(FileDataset):
54+
class BenchmarkJava(PrebuiltFileDataset):
5455
"""Represents the BenchmarkJava dataset.
5556
5657
This class handles the loading of the dataset, which includes Java test files
@@ -67,6 +68,9 @@ class BenchmarkJava(FileDataset):
6768
license = "GPL-2.0"
6869
license_url = "https://github.com/OWASP-Benchmark/BenchmarkJava/blob/master/LICENSE"
6970

71+
build_command = "mvn clean compile"
72+
prebuilt_expected = (Path("target/classes/org/owasp/benchmark/testcode"), "*.class")
73+
7074
def __init__(self, lang: None | str = None) -> None:
7175
"""Initialize the BenchmarkJava dataset.
7276
@@ -97,21 +101,8 @@ def __eq__(self, other: str | Self) -> bool:
97101

98102
def download_files(self: Self) -> None:
99103
"""Download the dataset files from the official Git repository."""
100-
repo = git.Repo.clone_from(
101-
"https://github.com/OWASP-Benchmark/BenchmarkJava.git",
102-
self.directory,
103-
depth=1,
104-
sparse=True,
105-
filter=["tree:0"],
106-
)
107-
repo.git.sparse_checkout(
108-
"set",
109-
"--no-cone",
110-
*[
111-
"src/main/java/org/owasp/benchmark/testcode/",
112-
"expectedresults-1.2.csv",
113-
"LICENSE",
114-
],
104+
git.Repo.clone_from(
105+
"https://github.com/OWASP-Benchmark/BenchmarkJava.git", self.directory
115106
)
116107

117108
def load_dataset(self) -> list[TestCode]:

codesectools/datasets/core/dataset.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from __future__ import annotations
1010

1111
from abc import ABC, abstractmethod
12+
from pathlib import Path
1213
from typing import TYPE_CHECKING
1314

1415
import git
@@ -21,7 +22,6 @@
2122
from codesectools.utils import USER_CACHE_DIR
2223

2324
if TYPE_CHECKING:
24-
from pathlib import Path
2525
from typing import Self
2626

2727
from codesectools.sasts.core.parser import AnalysisResult, Defect
@@ -132,6 +132,31 @@ def list_dataset_full_names(cls) -> list[str]:
132132
return sorted([f"{cls.name}_{lang}" for lang in cls.supported_languages])
133133

134134

135+
class PrebuiltDatasetMixin:
136+
"""Provide functionality for datasets that require a build step."""
137+
138+
build_command: str
139+
prebuilt_expected: tuple[Path, str]
140+
141+
def is_built(self) -> bool:
142+
"""Check if the dataset has been built."""
143+
if not self.build_command:
144+
return False
145+
prebuilt_dir, prebuilt_glob = self.prebuilt_expected
146+
prebuilt_path = USER_CACHE_DIR / self.name / prebuilt_dir
147+
if not prebuilt_path.is_dir():
148+
return False
149+
if not self.list_prebuilt_files():
150+
return False
151+
return True
152+
153+
def list_prebuilt_files(self) -> list[Path]:
154+
"""List the pre-built artefact files."""
155+
prebuilt_dir, prebuilt_glob = self.prebuilt_expected
156+
prebuilt_path = USER_CACHE_DIR / self.name / prebuilt_dir
157+
return list(prebuilt_path.glob(prebuilt_glob))
158+
159+
135160
class DatasetUnit:
136161
"""Base class for a single unit within a dataset.
137162
@@ -271,7 +296,7 @@ def validate(self, analysis_result: AnalysisResult) -> FileDatasetData:
271296
if not defect.cwe or defect.cwe.id == -1:
272297
continue
273298

274-
file_cwe_pair = (defect.file_path, defect.cwe)
299+
file_cwe_pair = (Path(defect.file).name, defect.cwe) # TODO: USE FULL PATH
275300
if file_cwe_pair not in unique_reported_defects:
276301
unique_reported_defects[file_cwe_pair] = defect
277302

@@ -329,6 +354,12 @@ def validate(self, analysis_result: AnalysisResult) -> FileDatasetData:
329354
)
330355

331356

357+
class PrebuiltFileDataset(PrebuiltDatasetMixin, FileDataset):
358+
"""Represent a file-based dataset that requires a build step."""
359+
360+
pass
361+
362+
332363
class FileDatasetData(BenchmarkData):
333364
"""Store the results of validating an analysis against a FileDataset.
334365

codesectools/sasts/__init__.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,31 +22,32 @@
2222
from codesectools.utils import SASTS_DIR
2323

2424
SASTS_ALL = {}
25-
for child in SASTS_DIR.iterdir():
25+
for child in (SASTS_DIR / "tools").iterdir():
2626
if child.is_dir():
27-
if list(child.glob("sast.py")) and child.name not in ["all", "core"]:
28-
sast_name = child.name
29-
30-
sast_module = importlib.import_module(
31-
f"codesectools.sasts.{sast_name}.sast"
32-
)
33-
34-
sast: SAST = getattr(sast_module, f"{sast_name}SAST")
35-
sast_instance = sast()
36-
analysis_result: AnalysisResult = getattr(
37-
sast_module, f"{sast_name}AnalysisResult"
38-
)
39-
40-
cli_module = importlib.import_module(f"codesectools.sasts.{sast_name}.cli")
41-
cli_factory: typer.Typer = getattr(cli_module, f"{sast_name}CLIFactory")
42-
43-
SASTS_ALL[sast_name] = {
44-
"status": sast_instance.status,
45-
"missing": sast_instance.missing,
46-
"properties": sast_instance.properties,
47-
"sast": sast,
48-
"analysis_result": analysis_result,
49-
"cli_factory": cli_factory,
50-
}
27+
sast_name = child.name
28+
29+
sast_module = importlib.import_module(
30+
f"codesectools.sasts.tools.{sast_name}.sast"
31+
)
32+
33+
sast: SAST = getattr(sast_module, f"{sast_name}SAST")
34+
sast_instance = sast()
35+
analysis_result: AnalysisResult = getattr(
36+
sast_module, f"{sast_name}AnalysisResult"
37+
)
38+
39+
cli_module = importlib.import_module(
40+
f"codesectools.sasts.tools.{sast_name}.cli"
41+
)
42+
cli_factory: typer.Typer = getattr(cli_module, f"{sast_name}CLIFactory")
43+
44+
SASTS_ALL[sast_name] = {
45+
"status": sast_instance.status,
46+
"missing": sast_instance.missing,
47+
"properties": sast_instance.properties,
48+
"sast": sast,
49+
"analysis_result": analysis_result,
50+
"cli_factory": cli_factory,
51+
}
5152

5253
SASTS_ALL = dict(sorted(SASTS_ALL.items()))

codesectools/sasts/all/cli.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from codesectools.sasts import SASTS_ALL
2121
from codesectools.sasts.all.graphics import ProjectGraphics
2222
from codesectools.sasts.all.sast import AllSAST
23+
from codesectools.sasts.core.sast import PrebuiltSAST
2324

2425

2526
def build_cli() -> typer.Typer:
@@ -70,6 +71,15 @@ def analyze(
7071
metavar="LANG",
7172
),
7273
],
74+
# Additional options
75+
artifact_dir: Annotated[
76+
Path | None,
77+
typer.Option(
78+
help="Pre-built artifacts directory (for PrebuiltSAST only)",
79+
metavar="ARTIFACT_DIR",
80+
),
81+
] = None,
82+
# Common NOT REQUIRED option
7383
overwrite: Annotated[
7484
bool,
7585
typer.Option(
@@ -80,16 +90,27 @@ def analyze(
8090
) -> None:
8191
"""Run analysis on the current project with all available SASTs."""
8292
for sast in all_sast.sasts:
93+
if isinstance(sast, PrebuiltSAST) and artifact_dir is None:
94+
print(f"{sast.name} required pre-built artifacts for analysis")
95+
print(
96+
"Please provide the directory with artifacts (with --artifact-dir) to include this tool"
97+
)
98+
continue
99+
83100
output_dir = sast.output_dir / Path.cwd().name
84101
if output_dir.is_dir():
85102
if overwrite:
86103
shutil.rmtree(output_dir)
87-
sast.run_analysis(lang, Path.cwd(), output_dir)
104+
sast.run_analysis(
105+
lang, Path.cwd(), output_dir, artifact_dir=artifact_dir
106+
)
88107
else:
89108
print(f"Found existing analysis result at {output_dir}")
90109
print("Use --overwrite to overwrite it")
91110
else:
92-
sast.run_analysis(lang, Path.cwd(), output_dir)
111+
sast.run_analysis(
112+
lang, Path.cwd(), output_dir, artifact_dir=artifact_dir
113+
)
93114

94115
@cli.command(help="Benchmark a dataset using all SASTs.")
95116
def benchmark(

codesectools/sasts/all/parser.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,8 @@ def stats_by_scores(self) -> dict:
149149

150150
defect_locations = {}
151151
for defect in defects:
152-
if location := defect.location:
153-
start, end = location
152+
if any(defect.location):
153+
start, end = defect.location
154154
for line in range(start, end + 1):
155155
if not defect_locations.get(line):
156156
defect_locations[line] = []
@@ -202,11 +202,12 @@ def prepare_report_data(self) -> dict:
202202

203203
locations = []
204204
for defect in defects:
205-
start, end = defect.location
206-
if start and end:
207-
locations.append(
208-
(defect.sast, defect.cwe, defect.message, (start, end))
209-
)
205+
if any(defect.location):
206+
start, end = defect.location
207+
if start and end:
208+
locations.append(
209+
(defect.sast, defect.cwe, defect.message, (start, end))
210+
)
210211

211212
report["defects"][defect_file] = {
212213
"score": scores[defect_file]["score"],

0 commit comments

Comments
 (0)