Skip to content

Commit 0359bba

Browse files
seantechcoseantechcoclaudevalentijnscholten
authored
fix: handle missing status_finding_non_special prefetch in reimporter (#14569)
* fix: handle missing status_finding_non_special prefetch in reimporter When a finding is created during reimport (no match found) and added to the candidate dictionaries for same-batch matching, it lacks the status_finding_non_special prefetch attribute that is only set by build_candidate_scope_queryset. If a subsequent finding in the same batch matches against this newly-created finding, accessing existing_finding.status_finding_non_special raises AttributeError. Add EndpointManager.get_non_special_endpoint_statuses() that returns the prefetched attribute when available, falling back to an equivalent DB query otherwise. Use it at both access sites: default_reimporter.py (process_matched_mitigated_finding) and endpoint_manager.py (update_endpoint_status). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * add test case * ruff * remove mock * Remove unused import from test_reimport_prefetch.py --------- Co-authored-by: seantechco <admin@seantech.co> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Valentijn Scholten <valentijnscholten@gmail.com>
1 parent 0ecee01 commit 0359bba

4 files changed

Lines changed: 356 additions & 4 deletions

File tree

dojo/importers/default_reimporter.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,8 +764,12 @@ def process_matched_mitigated_finding(
764764
else:
765765
# TODO: Delete this after the move to Locations
766766
# Reactivate mitigated endpoints that are not false positives, out of scope, or risk accepted
767-
# status_finding_non_special is prefetched by build_candidate_scope_queryset
768-
self.endpoint_manager.chunk_endpoints_and_reactivate(existing_finding.status_finding_non_special)
767+
# status_finding_non_special is prefetched by build_candidate_scope_queryset.
768+
# Falls back to a DB query when the finding was not loaded through that queryset
769+
# (e.g. a finding created during the same reimport batch).
770+
self.endpoint_manager.chunk_endpoints_and_reactivate(
771+
self.endpoint_manager.get_non_special_endpoint_statuses(existing_finding),
772+
)
769773
existing_finding.notes.add(note)
770774
self.reactivated_items.append(existing_finding)
771775
# The new finding is active while the existing on is mitigated. The existing finding needs to

dojo/importers/endpoint_manager.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22

33
from django.core.exceptions import MultipleObjectsReturned, ValidationError
4+
from django.db.models.query_utils import Q
45
from django.urls import reverse
56
from django.utils import timezone
67

@@ -147,6 +148,25 @@ def chunk_endpoints_and_mitigate(
147148
) -> None:
148149
dojo_dispatch_task(EndpointManager.mitigate_endpoint_status, endpoint_status_list, user, sync=True)
149150

151+
@staticmethod
152+
def get_non_special_endpoint_statuses(
153+
finding: Finding,
154+
) -> list[Endpoint_Status]:
155+
"""
156+
Return the non-special (not false_positive, out_of_scope, or risk_accepted) endpoint
157+
statuses for a finding. Uses the prefetched ``status_finding_non_special`` attribute
158+
when available (set by ``build_candidate_scope_queryset`` with ``mode="reimport"``),
159+
and falls back to a database query otherwise — e.g. when the finding was created
160+
during the same reimport batch and therefore never went through the prefetch queryset.
161+
"""
162+
if hasattr(finding, "status_finding_non_special"):
163+
return finding.status_finding_non_special
164+
return list(
165+
finding.status_finding.exclude(
166+
Q(false_positive=True) | Q(out_of_scope=True) | Q(risk_accepted=True),
167+
).select_related("endpoint"),
168+
)
169+
150170
def update_endpoint_status(
151171
self,
152172
existing_finding: Finding,
@@ -158,8 +178,10 @@ def update_endpoint_status(
158178
# New endpoints are already added in serializers.py / views.py (see comment "# for existing findings: make sure endpoints are present or created")
159179
# So we only need to mitigate endpoints that are no longer present
160180
# status_finding_non_special is prefetched by build_candidate_scope_queryset with the
161-
# special-status exclusion and endpoint select_related already applied at the DB level
162-
existing_finding_endpoint_status_list = existing_finding.status_finding_non_special
181+
# special-status exclusion and endpoint select_related already applied at the DB level.
182+
# Falls back to a DB query when the finding was not loaded through that queryset
183+
# (e.g. a finding created during the same reimport batch).
184+
existing_finding_endpoint_status_list = self.get_non_special_endpoint_statuses(existing_finding)
163185
new_finding_endpoints_list = new_finding.unsaved_endpoints
164186
if new_finding.is_mitigated:
165187
# New finding is mitigated, so mitigate all old endpoints
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
{
2+
"service": "StackHawk",
3+
"scanCompleted": {
4+
"scan": {
5+
"id": "f3aa6752-8fef-58fa-c854-1d3g8e972f38",
6+
"hawkscanVersion": "2.1.1",
7+
"env": "Development",
8+
"status": "COMPLETED",
9+
"application": "Secured Application",
10+
"startedTimestamp": "2022-02-16T23:07:19.575Z",
11+
"scanURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38"
12+
},
13+
"scanDuration": "21",
14+
"spiderDuration": "45",
15+
"completedScanStats": {
16+
"urlsCount": "2",
17+
"duration": "66",
18+
"scanResultsStats": {
19+
"totalCount": "2",
20+
"lowCount": "0",
21+
"mediumCount": "0",
22+
"highCount": "2",
23+
"lowTriagedCount": "0",
24+
"mediumTriagedCount": "0",
25+
"highTriagedCount": "0"
26+
}
27+
},
28+
"findings": [
29+
{
30+
"pluginId": "90001",
31+
"pluginName": "Insecure JSF ViewState",
32+
"severity": "High",
33+
"host": "https://localhost:9000",
34+
"paths": [
35+
{
36+
"path": "/app/login",
37+
"method": "GET",
38+
"status": "NEW",
39+
"pathURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38/finding/90001/path/1/message/1"
40+
}
41+
],
42+
"pathStats": [
43+
{
44+
"status": "NEW",
45+
"count": 1
46+
}
47+
],
48+
"totalCount": "1",
49+
"category": "Security Misconfiguration",
50+
"findingURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38/finding/90001"
51+
},
52+
{
53+
"pluginId": "90002",
54+
"pluginName": "Insecure HTTP Header",
55+
"severity": "High",
56+
"host": "https://localhost:9000",
57+
"paths": [
58+
{
59+
"path": "/app/dashboard",
60+
"method": "GET",
61+
"status": "NEW",
62+
"pathURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38/finding/90002/path/1/message/1"
63+
}
64+
],
65+
"pathStats": [
66+
{
67+
"status": "NEW",
68+
"count": 1
69+
}
70+
],
71+
"totalCount": "1",
72+
"category": "Security Misconfiguration",
73+
"findingURL": "https://app.stackhawk.com/scans/f3aa6752-8fef-58fa-c854-1d3g8e972f38/finding/90002"
74+
}
75+
]
76+
}
77+
}
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
"""
2+
Regression tests for handling duplicate findings within the same reimport report.
3+
4+
When a scan report contains two findings that produce the same hash_code, the first
5+
creates a new finding via process_finding_that_was_not_matched() and gets added to
6+
the candidate dictionaries. When the second finding in the same batch matches against
7+
this newly-created finding, accessing existing_finding.status_finding_non_special
8+
raises AttributeError because the finding was never loaded through
9+
build_candidate_scope_queryset (which sets up the Prefetch with to_attr).
10+
11+
Bugfix: https://github.com/DefectDojo/django-DefectDojo/pull/14569
12+
Batch endpoint optimization (related): https://github.com/DefectDojo/django-DefectDojo/pull/14489
13+
"""
14+
15+
16+
from crum import impersonate
17+
from django.conf import settings
18+
from django.test import override_settings
19+
from django.utils import timezone
20+
21+
from dojo.importers.default_reimporter import DefaultReImporter
22+
from dojo.location.models import LocationFindingReference
23+
from dojo.location.status import FindingLocationStatus
24+
from dojo.models import (
25+
Development_Environment,
26+
Dojo_User,
27+
Endpoint,
28+
Endpoint_Status,
29+
Engagement,
30+
Finding,
31+
Product,
32+
Product_Type,
33+
Test,
34+
Test_Type,
35+
User,
36+
UserContactInfo,
37+
)
38+
39+
from .dojo_test_case import DojoTestCase, get_unit_tests_scans_path, skip_unless_v2
40+
41+
SCAN_TYPE = "StackHawk HawkScan"
42+
43+
# Reimport: two findings with different pluginIds (so the parser keeps both as separate
44+
# findings) but identical hash_code fields (component_name + component_version).
45+
# Each finding has one unique endpoint/location path (/app/login and /app/dashboard).
46+
# This triggers the same-batch matching scenario where the second finding matches
47+
# against the first one that was just created in the same reimport batch.
48+
REIMPORT_SCAN = get_unit_tests_scans_path("stackhawk") / "stackhawk_two_vul_same_hashcode_fabricated.json"
49+
50+
51+
def _hashcode_fields_without_vuln_id():
52+
"""
53+
Return a copy of HASHCODE_FIELDS_PER_SCANNER with StackHawk configured to
54+
hash only on component_name + component_version (excluding vuln_id_from_tool).
55+
56+
By default StackHawk hashes on [vuln_id_from_tool, component_name, component_version].
57+
Since vuln_id_from_tool maps to pluginId and the parser deduplicates by pluginId,
58+
it is normally impossible for two separate findings to share a hash_code.
59+
60+
By removing vuln_id_from_tool from the hash fields, two findings with different
61+
pluginIds but the same application/env metadata will produce the same hash_code,
62+
allowing us to exercise the same-batch matching code path.
63+
"""
64+
fields = dict(settings.HASHCODE_FIELDS_PER_SCANNER)
65+
fields[SCAN_TYPE] = ["component_name", "component_version"]
66+
return fields
67+
68+
69+
class ReimportDuplicateFindingsTestBase(DojoTestCase):
70+
71+
"""Shared setup for duplicate-findings-in-same-report tests."""
72+
73+
def setUp(self):
74+
super().setUp()
75+
testuser, _ = User.objects.get_or_create(username="admin")
76+
UserContactInfo.objects.get_or_create(user=testuser, defaults={"block_execution": True})
77+
78+
self.system_settings(enable_deduplication=True)
79+
self.system_settings(enable_product_grade=False)
80+
81+
product_type, _ = Product_Type.objects.get_or_create(name="test")
82+
product, _ = Product.objects.get_or_create(
83+
name="ReimportPrefetchTest",
84+
description="Test",
85+
prod_type=product_type,
86+
)
87+
engagement, _ = Engagement.objects.get_or_create(
88+
name="Test Engagement",
89+
product=product,
90+
target_start=timezone.now(),
91+
target_end=timezone.now(),
92+
)
93+
self.lead, _ = User.objects.get_or_create(username="admin")
94+
environment, _ = Development_Environment.objects.get_or_create(name="Development")
95+
test_type, _ = Test_Type.objects.get_or_create(name=SCAN_TYPE)
96+
self.test = Test.objects.create(
97+
engagement=engagement,
98+
test_type=test_type,
99+
scan_type=SCAN_TYPE,
100+
target_start=timezone.now(),
101+
target_end=timezone.now(),
102+
environment=environment,
103+
)
104+
self.product = product
105+
106+
def _reimport_with_overridden_hashcode(self):
107+
"""Reimport the two-finding scan file with overridden hash settings."""
108+
hashcode_override = _hashcode_fields_without_vuln_id()
109+
with (
110+
impersonate(Dojo_User.objects.get(username="admin")),
111+
override_settings(HASHCODE_FIELDS_PER_SCANNER=hashcode_override),
112+
REIMPORT_SCAN.open(encoding="utf-8") as scan,
113+
):
114+
reimporter = DefaultReImporter(
115+
test=self.test,
116+
user=self.lead,
117+
lead=self.lead,
118+
scan_date=None,
119+
minimum_severity="Info",
120+
active=True,
121+
verified=True,
122+
sync=True,
123+
scan_type=SCAN_TYPE,
124+
)
125+
return reimporter.process_scan(scan)
126+
127+
128+
@skip_unless_v2
129+
class TestReimportDuplicateFindingsEndpointHandling(ReimportDuplicateFindingsTestBase):
130+
131+
"""Regression test: reimport must handle endpoints correctly for duplicate findings in the same report."""
132+
133+
def test_reimport_duplicate_findings_in_same_report_endpoints(self):
134+
"""
135+
Reimporting a report with two findings that share the same hash_code must
136+
not raise AttributeError on status_finding_non_special, and must correctly
137+
create endpoints for both findings.
138+
139+
The scan file has two findings with different pluginIds, each with one
140+
unique endpoint (/app/login and /app/dashboard). With the overridden hash
141+
settings, both produce the same hash_code. The first finding is created as
142+
new and added to candidates. The second finding matches against it (a
143+
batch-created finding), and its endpoint is added via finding_post_processing.
144+
"""
145+
# This previously raised:
146+
# AttributeError: 'Finding' object has no attribute 'status_finding_non_special'
147+
_test, _, len_new, len_closed, _, _, _ = self._reimport_with_overridden_hashcode()
148+
149+
# The first finding is new (empty test, no candidates). The second finding
150+
# matches the first (same hash_code) so it is not counted as new.
151+
self.assertEqual(len_new, 1, "Reimport should create one new finding (second matches first)")
152+
self.assertEqual(len_closed, 0, "No findings should be closed")
153+
154+
# Only one finding should exist — the second was matched to the first
155+
findings = Finding.objects.filter(test=self.test)
156+
self.assertEqual(findings.count(), 1, "Only one finding should exist after reimport")
157+
158+
finding = findings.first()
159+
160+
# The first finding (new, pluginId=90001) creates endpoint /app/login.
161+
# The second finding (pluginId=90002) matches the first via hash_code.
162+
# finding_post_processing runs for BOTH iterations, adding each finding's
163+
# endpoints to the (single) matched finding. So the finding ends up with
164+
# 2 endpoints: /app/login (from the first) and /app/dashboard (from the second).
165+
endpoints = Endpoint.objects.filter(product=self.product)
166+
self.assertEqual(endpoints.count(), 2, "Two endpoints should be created (one per finding in report)")
167+
168+
endpoint_statuses = Endpoint_Status.objects.filter(finding=finding)
169+
self.assertEqual(endpoint_statuses.count(), 2, "Finding should have two endpoint statuses")
170+
171+
# Because the second finding is dynamic, update_endpoint_status() compares
172+
# the first finding's existing endpoint statuses against the second finding's
173+
# unsaved_endpoints. The second finding only has /app/dashboard, so /app/login
174+
# (which belongs to the first finding) is considered "no longer present" and
175+
# gets mitigated. This is arguably wrong for batch-created findings — both
176+
# endpoints came from the same report — but it is the current behavior.
177+
self.assertEqual(
178+
endpoint_statuses.filter(mitigated=False).count(),
179+
1,
180+
"One endpoint status should be active (/app/dashboard from the matched finding)",
181+
)
182+
self.assertEqual(
183+
endpoint_statuses.filter(mitigated=True).count(),
184+
1,
185+
"One endpoint status should be mitigated (/app/login — mitigated by update_endpoint_status "
186+
"because it is not in the second finding's endpoint list)",
187+
)
188+
189+
190+
@override_settings(V3_FEATURE_LOCATIONS=True)
191+
class TestReimportDuplicateFindingsLocationHandling(ReimportDuplicateFindingsTestBase):
192+
193+
"""Test that reimport handles locations correctly for duplicate findings in the same report."""
194+
195+
def test_reimport_duplicate_findings_in_same_report_locations(self):
196+
"""
197+
Reimporting a report with two findings that share the same hash_code must
198+
correctly create locations for both findings.
199+
200+
The locations code does not use the to_attr prefetch that caused the
201+
AttributeError in the endpoint code path, so it does not crash. However,
202+
the same logical issue applies: update_location_status() compares the
203+
batch-created finding's locations against the second finding's unsaved
204+
locations, mitigating locations that are "no longer present".
205+
206+
The scan file has two findings with different pluginIds, each with one
207+
unique location (https://localhost:9000/app/login and .../app/dashboard).
208+
With the overridden hash settings, both produce the same hash_code. The
209+
first finding is created as new and added to candidates. The second finding
210+
matches against it (a batch-created finding).
211+
"""
212+
_test, _, len_new, len_closed, _, _, _ = self._reimport_with_overridden_hashcode()
213+
214+
# The first finding is new (empty test, no candidates). The second finding
215+
# matches the first (same hash_code) so it is not counted as new.
216+
self.assertEqual(len_new, 1, "Reimport should create one new finding (second matches first)")
217+
self.assertEqual(len_closed, 0, "No findings should be closed")
218+
219+
# Only one finding should exist — the second was matched to the first
220+
findings = Finding.objects.filter(test=self.test)
221+
self.assertEqual(findings.count(), 1, "Only one finding should exist after reimport")
222+
223+
finding = findings.first()
224+
225+
# The first finding (new, pluginId=90001) creates location for /app/login.
226+
# The second finding (pluginId=90002) matches the first via hash_code.
227+
# finding_post_processing runs for BOTH iterations, adding each finding's
228+
# locations to the (single) matched finding. So the finding ends up with
229+
# 2 location references pointing to 2 distinct Location objects.
230+
location_refs = LocationFindingReference.objects.filter(finding=finding)
231+
self.assertEqual(location_refs.count(), 2, "Finding should have two location references")
232+
233+
# Same behavior as endpoints: update_location_status() compares the first
234+
# finding's existing location refs against the second finding's unsaved_locations.
235+
# The second finding only has /app/dashboard, so /app/login (which belongs to
236+
# the first finding) is considered "no longer present" and gets mitigated.
237+
# This is arguably wrong for batch-created findings — both locations came from
238+
# the same report — but it is the current behavior.
239+
self.assertEqual(
240+
location_refs.filter(status=FindingLocationStatus.Active).count(),
241+
1,
242+
"One location ref should be active (/app/dashboard from the matched finding)",
243+
)
244+
self.assertEqual(
245+
location_refs.filter(status=FindingLocationStatus.Mitigated).count(),
246+
1,
247+
"One location ref should be mitigated (/app/login — mitigated by update_location_status "
248+
"because it is not in the second finding's location list)",
249+
)

0 commit comments

Comments
 (0)