|
9 | 9 | from __future__ import annotations |
10 | 10 |
|
11 | 11 | from abc import ABC, abstractmethod |
| 12 | +from pathlib import Path |
12 | 13 | from typing import TYPE_CHECKING |
13 | 14 |
|
14 | 15 | import git |
|
21 | 22 | from codesectools.utils import USER_CACHE_DIR |
22 | 23 |
|
23 | 24 | if TYPE_CHECKING: |
24 | | - from pathlib import Path |
25 | 25 | from typing import Self |
26 | 26 |
|
27 | 27 | from codesectools.sasts.core.parser import AnalysisResult, Defect |
@@ -132,6 +132,31 @@ def list_dataset_full_names(cls) -> list[str]: |
132 | 132 | return sorted([f"{cls.name}_{lang}" for lang in cls.supported_languages]) |
133 | 133 |
|
134 | 134 |
|
| 135 | +class PrebuiltDatasetMixin: |
| 136 | + """Provide functionality for datasets that require a build step.""" |
| 137 | + |
| 138 | + build_command: str |
| 139 | + prebuilt_expected: tuple[Path, str] |
| 140 | + |
| 141 | + def is_built(self) -> bool: |
| 142 | + """Check if the dataset has been built.""" |
| 143 | + if not self.build_command: |
| 144 | + return False |
| 145 | + prebuilt_dir, prebuilt_glob = self.prebuilt_expected |
| 146 | + prebuilt_path = USER_CACHE_DIR / self.name / prebuilt_dir |
| 147 | + if not prebuilt_path.is_dir(): |
| 148 | + return False |
| 149 | + if not self.list_prebuilt_files(): |
| 150 | + return False |
| 151 | + return True |
| 152 | + |
| 153 | + def list_prebuilt_files(self) -> list[Path]: |
| 154 | + """List the pre-built artefact files.""" |
| 155 | + prebuilt_dir, prebuilt_glob = self.prebuilt_expected |
| 156 | + prebuilt_path = USER_CACHE_DIR / self.name / prebuilt_dir |
| 157 | + return list(prebuilt_path.glob(prebuilt_glob)) |
| 158 | + |
| 159 | + |
135 | 160 | class DatasetUnit: |
136 | 161 | """Base class for a single unit within a dataset. |
137 | 162 |
|
@@ -271,7 +296,7 @@ def validate(self, analysis_result: AnalysisResult) -> FileDatasetData: |
271 | 296 | if not defect.cwe or defect.cwe.id == -1: |
272 | 297 | continue |
273 | 298 |
|
274 | | - file_cwe_pair = (defect.file_path, defect.cwe) |
| 299 | + file_cwe_pair = (Path(defect.file).name, defect.cwe) # TODO: USE FULL PATH |
275 | 300 | if file_cwe_pair not in unique_reported_defects: |
276 | 301 | unique_reported_defects[file_cwe_pair] = defect |
277 | 302 |
|
@@ -329,6 +354,12 @@ def validate(self, analysis_result: AnalysisResult) -> FileDatasetData: |
329 | 354 | ) |
330 | 355 |
|
331 | 356 |
|
| 357 | +class PrebuiltFileDataset(PrebuiltDatasetMixin, FileDataset): |
| 358 | + """Represent a file-based dataset that requires a build step.""" |
| 359 | + |
| 360 | + pass |
| 361 | + |
| 362 | + |
332 | 363 | class FileDatasetData(BenchmarkData): |
333 | 364 | """Store the results of validating an analysis against a FileDataset. |
334 | 365 |
|
|
0 commit comments