fix: remove computed unique_id_from_tool from IriusRisk parser

skywalke34 · skywalke34 · commit 27bde603cb4e · 2026-03-04T13:19:11.000-07:00
Per PR review feedback, parsers must not compute unique_id_from_tool.
Removed SHA-256 hash generation and related tests. Deduplication now
relies on DefectDojo's default hashcode algorithm. Updated docs
to reflect the change.

Authored by T. Walker - DefectDojo
diff --git a/docs/content/supported_tools/parsers/file/iriusrisk.md b/docs/content/supported_tools/parsers/file/iriusrisk.md
@@ -25,8 +25,6 @@ By default, DefectDojo identifies duplicate Findings using these [hashcode field
 - file_path
 - description
 
-The parser also populates `unique_id_from_tool` with a SHA-256 hash of the Component, Threat, and Risk Response fields, providing an additional layer of deduplication across reimports.
-
 ### Sample Scan Data
 
 Sample IriusRisk scans can be found in the [sample scan data folder](https://github.com/DefectDojo/django-DefectDojo/tree/master/unittests/scans/iriusrisk).
@@ -69,7 +67,6 @@ Sample IriusRisk scans can be found in the [sample scan data folder](https://git
 | Risk Response            | mitigation           | 94            | Mitigation status percentages from IriusRisk                          |
 | MITRE reference          | cwe                  | 82-85         | When value matches CWE-NNN pattern, integer is extracted to cwe field |
 | MITRE reference          | references           | 86-87         | When value does not match CWE pattern, stored as references           |
-| Component + Threat + Risk Response | unique_id_from_tool | 74-77 | SHA-256 hash used for deduplication across reimports                  |
 
 </details>
 
@@ -83,7 +80,6 @@ Sample IriusRisk scans can be found in the [sample scan data folder](https://git
 | static_finding   | False                            | 97            | Threat model data is neither static nor dynamic analysis    |
 | dynamic_finding  | False                            | 98            | Threat model data is neither static nor dynamic analysis    |
 | active           | True (False when "Very low")     | 96            | Set to False when Current Risk is "Very low" (fully mitigated) |
-| unique_id_from_tool | SHA-256 hash                  | 99            | Hash of Component, Threat, and Risk Response                |
 
 </details>
 
@@ -142,8 +138,8 @@ Findings are set to active by default (line 96). When the "Current Risk" value i
 
 ### Deduplication
 
-The parser generates a `unique_id_from_tool` by computing a SHA-256 hash of the Component, Threat, and Risk Response fields concatenated with pipe delimiters (lines 74-77). This ensures that each distinct combination of component, threat, and mitigation state produces a unique identifier. On reimport, findings with matching unique IDs are recognized as the same finding rather than being duplicated.
+Deduplication relies on DefectDojo's default hashcode algorithm, which uses the title, cwe, line, file_path, and description fields to identify duplicate findings.
 
 ### Duplicate Rows in Source Data
 
-IriusRisk CSV exports can contain multiple rows with the same Component and Threat but different Risk Response values. These represent distinct countermeasure paths for the same threat. Each row is imported as a separate finding, distinguished by its unique ID which incorporates the Risk Response field.
+IriusRisk CSV exports can contain multiple rows with the same Component and Threat but different Risk Response values. These represent distinct countermeasure paths for the same threat. Each row is imported as a separate finding, distinguished by its description content which incorporates all CSV fields.
diff --git a/dojo/tools/iriusrisk/parser.py b/dojo/tools/iriusrisk/parser.py
@@ -1,5 +1,4 @@
 import csv
-import hashlib
 import io
 import re
 
@@ -71,11 +70,6 @@ def get_findings(self, filename, test):
                 description_parts.append(f"**STRIDE-LM:** {stride_lm}")
             description = "\n".join(description_parts)
 
-            # Unique ID for deduplication across reimports
-            unique_id = hashlib.sha256(
-                f"{component}|{threat}|{risk_response}".encode(),
-            ).hexdigest()
-
             # Extract CWE from MITRE reference if present
             cwe = None
             references = ""
@@ -96,7 +90,6 @@ def get_findings(self, filename, test):
                 active=current_risk != "Very low",
                 static_finding=False,
                 dynamic_finding=False,
-                unique_id_from_tool=unique_id,
             )
             if cwe:
                 finding.cwe = cwe
diff --git a/unittests/tools/test_iriusrisk_parser.py b/unittests/tools/test_iriusrisk_parser.py
@@ -112,21 +112,6 @@ def test_finding_static_finding(self):
             self.assertFalse(findings[0].static_finding)
             self.assertFalse(findings[0].dynamic_finding)
 
-    def test_finding_unique_id_from_tool(self):
-        with (get_unit_tests_scans_path("iriusrisk") / "one_vuln.csv").open(encoding="utf-8") as testfile:
-            parser = IriusriskParser()
-            findings = parser.get_findings(testfile, Test())
-            self.assertIsNotNone(findings[0].unique_id_from_tool)
-            self.assertGreater(len(findings[0].unique_id_from_tool), 0)
-
-    def test_finding_unique_id_is_consistent(self):
-        """Parsing the same file twice should produce the same unique IDs."""
-        with (get_unit_tests_scans_path("iriusrisk") / "one_vuln.csv").open(encoding="utf-8") as testfile:
-            findings1 = IriusriskParser().get_findings(testfile, Test())
-        with (get_unit_tests_scans_path("iriusrisk") / "one_vuln.csv").open(encoding="utf-8") as testfile:
-            findings2 = IriusriskParser().get_findings(testfile, Test())
-        self.assertEqual(findings1[0].unique_id_from_tool, findings2[0].unique_id_from_tool)
-
     def test_finding_with_owner(self):
         with (get_unit_tests_scans_path("iriusrisk") / "many_vulns.csv").open(encoding="utf-8") as testfile:
             parser = IriusriskParser()