OPPIDA · nolliv22 · Nov 6, 2025 · Nov 4, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/codesectools/datasets/BenchmarkJava/dataset.py b/codesectools/datasets/BenchmarkJava/dataset.py
@@ -29,7 +29,7 @@ class TestCode(File):
 
     def __init__(
         self,
-        filename: str,
+        filepath: Path,
         content: str | bytes,
         cwes: list[CWE],
         vuln_type: str,
@@ -38,15 +38,15 @@ def __init__(
         """Initialize a TestCode instance.
 
         Args:
-            filename: The name of the file.
+            filepath: The path to the file.
             content: The content of the file, as a string or bytes.
             cwes: A list of CWEs associated with the file.
             vuln_type: The type of vulnerability.
             has_vuln: A boolean indicating if the vulnerability is real or a false positive test case.
 
         """
         super().__init__(
-            filename=filename, content=content, cwes=cwes, has_vuln=has_vuln
+            filepath=filepath, content=content, cwes=cwes, has_vuln=has_vuln
         )
 
         self.vuln_type = vuln_type
@@ -148,10 +148,19 @@ def load_dataset(self) -> list[TestCode]:
         next(reader)
         for row in reader:
             filename = f"{row[0]}.java"
-            content = (testcode_dir / filename).read_text()
+            filepath = testcode_dir / filename
+            content = filepath.read_text()
             cwes = [CWEs.from_id(int(row[3]))]
             vuln_type = row[1]
             has_vuln = True if row[2] == "true" else False
-            files.append(TestCode(filename, content, cwes, vuln_type, has_vuln))
+            files.append(
+                TestCode(
+                    filepath.relative_to(self.directory),
+                    content,
+                    cwes,
+                    vuln_type,
+                    has_vuln,
+                )
+            )
 
         return files
diff --git a/codesectools/datasets/core/dataset.py b/codesectools/datasets/core/dataset.py
@@ -9,6 +9,7 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
+from pathlib import Path
 from typing import TYPE_CHECKING
 
 import git
@@ -21,7 +22,6 @@
 from codesectools.utils import USER_CACHE_DIR
 
 if TYPE_CHECKING:
-    from pathlib import Path
     from typing import Self
 
     from codesectools.sasts.core.parser import AnalysisResult, Defect
@@ -197,7 +197,7 @@ class File(DatasetUnit):
     """Represent a single file in a dataset.
 
     Attributes:
-        filename (str): The name of the file.
+        filepath (Path): The relative path to the file.
         content (bytes): The byte content of the file.
         cwes (list[CWE]): A list of CWEs associated with the file.
         has_vuln (bool): True if the vulnerability is real, False if it's
@@ -206,20 +206,21 @@ class File(DatasetUnit):
     """
 
     def __init__(
-        self, filename: str, content: str | bytes, cwes: list[CWE], has_vuln: bool
+        self, filepath: Path, content: str | bytes, cwes: list[CWE], has_vuln: bool
     ) -> None:
         """Initialize a File instance.
 
         Args:
-            filename: The name of the file.
+            filepath: The relative path of the file.
             content: The content of the file, as a string or bytes. It will be
                 converted to bytes if provided as a string.
             cwes: A list of CWEs associated with the file.
             has_vuln: True if the vulnerability is real, False if it's
                 intended to be a false positive test case.
 
         """
-        self.filename = filename
+        self.filepath = filepath
+        self.filename = self.filepath.name
         self.content = content
         self.cwes = cwes
         self.has_vuln = has_vuln
@@ -231,29 +232,29 @@ def __repr__(self) -> str:
         """Return a developer-friendly string representation of the File.
 
         Returns:
-            A string showing the class name, filename, and CWE IDs.
+            A string showing the class name, filepath, and CWE IDs.
 
         """
         return f"""{self.__class__.__name__}(
-    filename: \t{self.filename}
+    filepath: \t{self.filepath}
     cwes: \t{self.cwes}
 )"""
 
-    def __eq__(self, other: str | Self) -> bool:
-        """Compare this File with another object for equality based on filename.
+    def __eq__(self, other: str | Path | Self) -> bool:
+        """Compare this File with another object for equality based on filepath.
 
         Args:
-            other: The object to compare with. Can be a string (filename) or
+            other: The object to compare with. Can be a string/Path (filepath) or
                    another File instance.
 
         Returns:
-            True if the filenames are equal, False otherwise.
+            True if the filepaths are equal, False otherwise.
 
         """
-        if isinstance(other, str):
-            return self.filename == other
+        if isinstance(other, (str, Path)):
+            return self.filepath == Path(other)
         elif isinstance(other, self.__class__):
-            return self.filename == other.filename
+            return self.filepath == other.filepath
         else:
             return False
 
@@ -264,7 +265,9 @@ def save(self, dir: Path) -> None:
             dir: The path to the directory where the file should be saved.
 
         """
-        (dir / self.filename).write_bytes(self.content)
+        target_path = dir / self.filepath
+        target_path.parent.mkdir(parents=True, exist_ok=True)
+        target_path.write_bytes(self.content)
 
 
 class FileDataset(Dataset):
@@ -303,7 +306,7 @@ def validate(self, analysis_result: AnalysisResult) -> FileDatasetData:
         """
         # 1. Prepare ground truth from all files in the dataset
         ground_truth: dict[str, tuple[bool, set[CWE]]] = {
-            file.filename: (file.has_vuln, set(file.cwes)) for file in self.files
+            str(file.filepath): (file.has_vuln, set(file.cwes)) for file in self.files
         }
 
         # 2. Process reported defects to get unique (file, cwe) pairs
@@ -313,32 +316,32 @@ def validate(self, analysis_result: AnalysisResult) -> FileDatasetData:
             if not defect.cwe or defect.cwe.id == -1:
                 continue
 
-            file_cwe_pair = (defect.filename, defect.cwe)
+            file_cwe_pair = (str(defect.filepath), defect.cwe)
             if file_cwe_pair not in unique_reported_defects:
                 unique_reported_defects[file_cwe_pair] = defect
 
         # 3. Classify unique reported vulnerabilities as TP or FP
         tp_defects_map: dict[tuple[str, CWE], Defect] = {}
         fp_defects_map: dict[tuple[str, CWE], Defect] = {}
 
-        for (filename, cwe), defect in unique_reported_defects.items():
-            has_vuln, expected_cwes = ground_truth.get(filename, (False, set()))
+        for (filepath, cwe), defect in unique_reported_defects.items():
+            has_vuln, expected_cwes = ground_truth.get(filepath, (False, set()))
 
             if has_vuln and cwe in expected_cwes:
                 # Correctly identified a vulnerability
-                tp_defects_map[(filename, cwe)] = defect
+                tp_defects_map[(filepath, cwe)] = defect
             else:
                 # Reported a vuln in a non-vulnerable file, with wrong CWE,
                 # or in a file not part of the dataset.
-                fp_defects_map[(filename, cwe)] = defect
+                fp_defects_map[(filepath, cwe)] = defect
 
         # 4. Determine False Negatives by finding what was missed from the ground truth.
         fn_defects_set: set[tuple[str, CWE]] = set()
-        for filename, (has_vuln, expected_cwes) in ground_truth.items():
+        for filepath, (has_vuln, expected_cwes) in ground_truth.items():
             if has_vuln:
                 for expected_cwe in expected_cwes:
-                    if (filename, expected_cwe) not in tp_defects_map:
-                        fn_defects_set.add((filename, expected_cwe))
+                    if (filepath, expected_cwe) not in tp_defects_map:
+                        fn_defects_set.add((filepath, expected_cwe))
 
         # 5. Convert maps and sets to lists of objects for downstream use
         tp_defects = list(tp_defects_map.values())
@@ -354,7 +357,7 @@ def validate(self, analysis_result: AnalysisResult) -> FileDatasetData:
         fp_cwes = [cwe for _, cwe in fp_defects_map.keys()]
         fn_cwes = [cwe for _, cwe in fn_defects_set]
 
-        unique_correct_number = len({filename for filename, _ in tp_defects_map.keys()})
+        unique_correct_number = len({filepath for filepath, _ in tp_defects_map.keys()})
 
         return FileDatasetData(
             dataset=self,

diff --git a/codesectools/sasts/all/cli.py b/codesectools/sasts/all/cli.py
@@ -119,7 +119,13 @@ def benchmark(
         dataset: Annotated[
             str,
             typer.Argument(
-                click_type=Choice([d.name for d in all_sast.sasts_by_dataset]),
+                click_type=Choice(
+                    [
+                        f"{d.name}_{lang}"
+                        for d in all_sast.sasts_by_dataset
+                        for lang in d.supported_languages
+                    ]
+                ),
                 metavar="DATASET",
             ),
         ],
@@ -140,7 +146,7 @@ def benchmark(
     ) -> None:
         """Run a benchmark on a dataset using all available SAST tools."""
         dataset_name, lang = dataset.split("_")
-        for sast in all_sast.sasts_by_dataset.get(lang, []):
+        for sast in all_sast.sasts_by_dataset.get(DATASETS_ALL[dataset_name], []):
             dataset = DATASETS_ALL[dataset_name](lang)
             if isinstance(dataset, FileDataset):
                 sast.analyze_files(dataset, overwrite, testing)

diff --git a/codesectools/sasts/all/graphics.py b/codesectools/sasts/all/graphics.py
@@ -2,7 +2,6 @@
 
 import shutil
 import tempfile
-from pathlib import Path
 
 import matplotlib
 import matplotlib.pyplot as plt
@@ -11,6 +10,7 @@
 from rich import print
 
 from codesectools.sasts.all.sast import AllSAST
+from codesectools.utils import shorten_path
 
 ## Matplotlib config
 matplotlib.rcParams.update(
@@ -107,7 +107,7 @@ def __init__(self, project_name: str) -> None:
     def plot_overview(self) -> Figure:
         """Generate an overview plot with stats by files, SAST tools, and categories."""
         fig, (ax1, ax2, ax3) = plt.subplots(1, 3, layout="constrained")
-        by_files = {Path(k).name: v for k, v in self.result.stats_by_files().items()}
+        by_files = self.result.stats_by_files()
         by_sasts = self.result.stats_by_sasts()
         by_categories = self.result.stats_by_categories()
 
@@ -117,7 +117,7 @@ def plot_overview(self) -> Figure:
             list(by_files.items()), key=lambda e: e[1]["count"], reverse=True
         )
         for k, v in sorted_files[: self.limit]:
-            X_files.append(k)
+            X_files.append(shorten_path(k))
             Y_files.append(v["count"])
 
             COLORS_COUNT = {v: 0 for k, v in self.color_mapping.items()}
@@ -130,11 +130,11 @@ def plot_overview(self) -> Figure:
             current_height = 0
             for color, height in COLORS_COUNT.items():
                 if height > 0:
-                    bars.append((k, current_height + height, color))
+                    bars.append((shorten_path(k), current_height + height, color))
                     current_height += height
 
-            for k, height, color in bars[::-1]:
-                ax1.bar(k, height, color=color)
+            for k_short, height, color in bars[::-1]:
+                ax1.bar(k_short, height, color=color)
 
         ax1.set_xticks(X_files, X_files, rotation=45, ha="right")
         ax1.set_title(f"Stats by files (limit to {self.limit})")
@@ -231,7 +231,7 @@ def plot_top_cwes(self) -> Figure:
     def plot_top_scores(self) -> Figure:
         """Generate a stacked bar plot for files with the highest scores."""
         fig, ax = plt.subplots(1, 1, layout="constrained")
-        by_scores = {Path(k).name: v for k, v in self.result.stats_by_scores().items()}
+        by_scores = self.result.stats_by_scores()
 
         for file, data in by_scores.items():
             by_scores[file]["total_score"] = sum(data["score"].values())
@@ -244,7 +244,7 @@ def plot_top_scores(self) -> Figure:
 
         X_files, score_data = [], []
         for file, data in sorted_files[: self.limit]:
-            X_files.append(file)
+            X_files.append(shorten_path(file))
             score_data.append(data["score"])
 
         score_keys = score_data[0].keys()

diff --git a/codesectools/sasts/core/graphics.py b/codesectools/sasts/core/graphics.py
@@ -7,7 +7,6 @@
 
 import shutil
 import tempfile
-from pathlib import Path
 
 import matplotlib
 import matplotlib.pyplot as plt
@@ -19,6 +18,7 @@
 from codesectools.datasets.core.dataset import FileDataset, GitRepoDataset
 from codesectools.sasts.core.sast import SAST
 from codesectools.shared.cwe import CWE
+from codesectools.utils import shorten_path
 
 ## Matplotlib config
 matplotlib.rcParams.update(
@@ -153,7 +153,7 @@ def plot_overview(self) -> Figure:
         project_name = self.result.name
 
         fig, (ax1, ax2, ax3) = plt.subplots(1, 3, layout="constrained")
-        by_files = {Path(k).name: v for k, v in self.result.stats_by_files().items()}
+        by_files = self.result.stats_by_files()
         by_checkers = self.result.stats_by_checkers()
         by_categories = self.result.stats_by_categories()
 
@@ -163,7 +163,7 @@ def plot_overview(self) -> Figure:
             list(by_files.items()), key=lambda e: e[1]["count"], reverse=True
         )
         for k, v in sorted_files[: self.limit]:
-            X_files.append(k)
+            X_files.append(shorten_path(k))
             Y_files.append(v["count"])
 
             COLORS_COUNT = {v: 0 for k, v in self.color_mapping.items()}
@@ -177,11 +177,11 @@ def plot_overview(self) -> Figure:
             current_height = 0
             for color, height in COLORS_COUNT.items():
                 if height > 0:
-                    bars.append((k, current_height + height, color))
+                    bars.append((shorten_path(k), current_height + height, color))
                     current_height += height
 
-            for k, height, color in bars[::-1]:
-                ax1.bar(k, height, color=color)
+            for k_short, height, color in bars[::-1]:
+                ax1.bar(k_short, height, color=color)
 
         ax1.set_xticks(X_files, X_files, rotation=45, ha="right")
         ax1.set_title(f"Stats by files (limit to {self.limit})")

diff --git a/codesectools/sasts/core/parser.py b/codesectools/sasts/core/parser.py
@@ -72,7 +72,7 @@ def __repr__(self) -> str:
 
         """
         return f"""{self.__class__.__name__}(
-    file: \t{self.file}
+    filepath: \t{self.filepath}
     checker: \t{self.checker}
     category: \t{self.category}
     cwe: \t{self.cwe}
@@ -127,6 +127,17 @@ def __init__(
         self.loc = loc
         self.data = data
 
+        # Ensure all defect filepaths are relative to the source path
+        for defect in self.defects:
+            if defect.filepath.is_absolute():
+                try:
+                    defect.filepath = defect.filepath.relative_to(self.source_path)
+                    defect.filepath_str = str(defect.filepath)
+                except ValueError:
+                    # This can happen if the path is outside the source_path tree.
+                    # We leave it as is, but it will likely not match during validation.
+                    pass
+
     def __repr__(self) -> str:
         """Return a developer-friendly string representation of the AnalysisResult.