Skip to content

Commit d48dbfc

Browse files
authored
Merge pull request #2433 from blacklanternsecurity/lightfuzz-fp-tweaks
Excavate unhandled error / Lightfuzz FP Fix
2 parents 0c78fef + b20126e commit d48dbfc

3 files changed

Lines changed: 68 additions & 44 deletions

File tree

bbot/modules/internal/excavate.py

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import inspect
66
import regex as re
77
from pathlib import Path
8-
from bbot.errors import ExcavateError
8+
from bbot.errors import ExcavateError, ValidationError
99
import bbot.core.helpers.regexes as bbot_regexes
1010
from bbot.modules.base import BaseInterceptModule
1111
from bbot.modules.internal.base import BaseInternalModule
@@ -622,14 +622,15 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte
622622
base_url += f"?{event.parsed_url.query}"
623623
url = urljoin(base_url, endpoint)
624624

625+
try:
626+
# Validate the URL before using it
627+
parsed_url = self.excavate.helpers.validators.validate_url_parsed(url)
628+
except (ValidationError, ValueError) as e:
629+
self.excavate.debug(f"Invalid URL [{url}]: {e}")
630+
continue
631+
625632
if self.excavate.helpers.validate_parameter(parameter_name, parameter_type):
626633
if self.excavate.in_bl(parameter_name) is False:
627-
parsed_url = urlparse(url)
628-
if not parsed_url.hostname:
629-
self.excavate.warning(
630-
f"Error Parsing reconstructed URL [{url}] during parameter extraction, missing hostname"
631-
)
632-
continue
633634
description = f"HTTP Extracted Parameter [{parameter_name}] ({parameterExtractorSubModule.name} Submodule)"
634635
data = {
635636
"host": parsed_url.hostname,
@@ -848,45 +849,51 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte
848849
urls_found = 0
849850
final_url = ""
850851
for url_str in results:
851-
if identifier == "url_full":
852-
if not await self.helpers.re.search(self.full_url_regex, url_str):
852+
try:
853+
if identifier == "url_full":
854+
if not await self.helpers.re.search(self.full_url_regex, url_str):
855+
self.excavate.debug(
856+
f"Rejecting potential full URL [{url_str}] as did not match full_url_regex"
857+
)
858+
continue
859+
final_url = url_str
860+
self.excavate.debug(f"Discovered Full URL [{final_url}]")
861+
862+
elif identifier == "url_attr" and hasattr(event, "parsed_url"):
863+
m = await self.helpers.re.search(self.tag_attribute_regex, url_str)
864+
if not m:
865+
self.excavate.debug(
866+
f"Rejecting potential attribute URL [{url_str}] as did not match tag_attribute_regex"
867+
)
868+
continue
869+
unescaped_url = html.unescape(m.group(1))
870+
source_url = event.parsed_url.geturl()
871+
final_url = urldefrag(urljoin(source_url, unescaped_url)).url
872+
if not await self.helpers.re.search(self.full_url_regex_strict, final_url):
873+
self.excavate.debug(
874+
f"Rejecting reconstructed URL [{final_url}] as did not match full_url_regex_strict"
875+
)
876+
continue
853877
self.excavate.debug(
854-
f"Rejecting potential full URL [{url_str}] as did not match full_url_regex"
878+
f"Reconstructed Full URL [{final_url}] from extracted relative URL [{unescaped_url}] "
855879
)
856-
continue
857-
final_url = url_str
858880

859-
self.excavate.debug(f"Discovered Full URL [{final_url}]")
860-
elif identifier == "url_attr" and hasattr(event, "parsed_url"):
861-
m = await self.helpers.re.search(self.tag_attribute_regex, url_str)
862-
if not m:
863-
self.excavate.debug(
864-
f"Rejecting potential attribute URL [{url_str}] as did not match tag_attribute_regex"
881+
if final_url:
882+
# Validate the URL before using it
883+
self.excavate.helpers.validators.validate_url_parsed(final_url)
884+
if self.excavate.scan.in_scope(final_url):
885+
urls_found += 1
886+
await self.report(
887+
final_url,
888+
event,
889+
yara_rule_settings,
890+
discovery_context,
891+
event_type="URL_UNVERIFIED",
892+
urls_found=urls_found,
865893
)
866-
continue
867-
unescaped_url = html.unescape(m.group(1))
868-
source_url = event.parsed_url.geturl()
869-
final_url = urldefrag(urljoin(source_url, unescaped_url)).url
870-
if not await self.helpers.re.search(self.full_url_regex_strict, final_url):
871-
self.excavate.debug(
872-
f"Rejecting reconstructed URL [{final_url}] as did not match full_url_regex_strict"
873-
)
874-
continue
875-
self.excavate.debug(
876-
f"Reconstructed Full URL [{final_url}] from extracted relative URL [{unescaped_url}] "
877-
)
878-
879-
if final_url:
880-
if self.excavate.scan.in_scope(final_url):
881-
urls_found += 1
882-
await self.report(
883-
final_url,
884-
event,
885-
yara_rule_settings,
886-
discovery_context,
887-
event_type="URL_UNVERIFIED",
888-
urls_found=urls_found,
889-
)
894+
except (ValidationError, ValueError) as e:
895+
self.excavate.debug(f"Invalid URL [{url_str if not final_url else final_url}]: {e}")
896+
continue
890897

891898
async def report_prep(self, event_data, event_type, event, tags, **kwargs):
892899
event_draft = self.excavate.make_event(event_data, event_type, parent=event)
@@ -1114,7 +1121,10 @@ async def search(self, data, event, content_type, discovery_context="HTTP respon
11141121

11151122
# Check if rule processing function exists
11161123
if rule_name in self.yara_preprocess_dict:
1117-
await self.yara_preprocess_dict[rule_name](result, event, discovery_context)
1124+
try:
1125+
await self.yara_preprocess_dict[rule_name](result, event, discovery_context)
1126+
except ValidationError as e:
1127+
self.debug(f"ValidationError in rule {rule_name} for result {result}: {e}")
11181128
else:
11191129
self.hugewarning(f"YARA Rule {rule_name} not found in pre-compiled rules")
11201130

bbot/modules/lightfuzz/submodules/serial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class serial(BaseLightfuzz):
2222
CONTROL_PAYLOAD_PHP_RAW = "z:0:{}"
2323

2424
BASE64_SERIALIZATION_PAYLOADS = {
25-
"php_base64": "YTowOnt9",
25+
"php_base64": "YToxOntpOjA7aToxO30=",
2626
"java_base64": "rO0ABXNyABFqYXZhLmxhbmcuQm9vbGVhbs0gcoDVnPruAgABWgAFdmFsdWV4cAA=",
2727
"java_base64_string_error": "rO0ABXQABHRlc3Q=",
2828
"java_base64_OptionalDataException": "rO0ABXcEAAAAAAEAAAABc3IAEGphdmEudXRpbC5IYXNoTWFwAAAAAAAAAAECAAJMAARrZXkxYgABAAAAAAAAAAJ4cHcBAAAAB3QABHRlc3Q=",

bbot/test/test_step_2/module_tests/test_module_excavate.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,3 +1418,17 @@ def check(self, module_test, events):
14181418

14191419
url_events = [e for e in events if e.type == "URL_UNVERIFIED"]
14201420
assert sorted([e.data for e in url_events]) == sorted(["https://ssl/", "http://127.0.0.1:8888/"])
1421+
1422+
1423+
class TestExcavateURL_InvalidPort(TestExcavate):
1424+
modules_overrides = ["excavate", "httpx", "hunt"]
1425+
1426+
async def setup_before_prep(self, module_test):
1427+
# Test URL with invalid port (greater than 65535)
1428+
module_test.httpserver.expect_request("/").respond_with_data(
1429+
'<div><img loading="lazy" src="https://asdffoo.test.notreal:9212952841/whatever.jpg" width="576" height="382" alt="...." /></div>'
1430+
)
1431+
1432+
def check(self, module_test, events):
1433+
# Verify we got the hostname
1434+
assert any(e.data == "asdffoo.test.notreal" for e in events)

0 commit comments

Comments
 (0)