|
5 | 5 | import inspect |
6 | 6 | import regex as re |
7 | 7 | from pathlib import Path |
8 | | -from bbot.errors import ExcavateError |
| 8 | +from bbot.errors import ExcavateError, ValidationError |
9 | 9 | import bbot.core.helpers.regexes as bbot_regexes |
10 | 10 | from bbot.modules.base import BaseInterceptModule |
11 | 11 | from bbot.modules.internal.base import BaseInternalModule |
@@ -622,14 +622,15 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte |
622 | 622 | base_url += f"?{event.parsed_url.query}" |
623 | 623 | url = urljoin(base_url, endpoint) |
624 | 624 |
|
| 625 | + try: |
| 626 | + # Validate the URL before using it |
| 627 | + parsed_url = self.excavate.helpers.validators.validate_url_parsed(url) |
| 628 | + except (ValidationError, ValueError) as e: |
| 629 | + self.excavate.debug(f"Invalid URL [{url}]: {e}") |
| 630 | + continue |
| 631 | + |
625 | 632 | if self.excavate.helpers.validate_parameter(parameter_name, parameter_type): |
626 | 633 | if self.excavate.in_bl(parameter_name) is False: |
627 | | - parsed_url = urlparse(url) |
628 | | - if not parsed_url.hostname: |
629 | | - self.excavate.warning( |
630 | | - f"Error Parsing reconstructed URL [{url}] during parameter extraction, missing hostname" |
631 | | - ) |
632 | | - continue |
633 | 634 | description = f"HTTP Extracted Parameter [{parameter_name}] ({parameterExtractorSubModule.name} Submodule)" |
634 | 635 | data = { |
635 | 636 | "host": parsed_url.hostname, |
@@ -848,45 +849,51 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte |
848 | 849 | urls_found = 0 |
849 | 850 | final_url = "" |
850 | 851 | for url_str in results: |
851 | | - if identifier == "url_full": |
852 | | - if not await self.helpers.re.search(self.full_url_regex, url_str): |
| 852 | + try: |
| 853 | + if identifier == "url_full": |
| 854 | + if not await self.helpers.re.search(self.full_url_regex, url_str): |
| 855 | + self.excavate.debug( |
| 856 | + f"Rejecting potential full URL [{url_str}] as did not match full_url_regex" |
| 857 | + ) |
| 858 | + continue |
| 859 | + final_url = url_str |
| 860 | + self.excavate.debug(f"Discovered Full URL [{final_url}]") |
| 861 | + |
| 862 | + elif identifier == "url_attr" and hasattr(event, "parsed_url"): |
| 863 | + m = await self.helpers.re.search(self.tag_attribute_regex, url_str) |
| 864 | + if not m: |
| 865 | + self.excavate.debug( |
| 866 | + f"Rejecting potential attribute URL [{url_str}] as did not match tag_attribute_regex" |
| 867 | + ) |
| 868 | + continue |
| 869 | + unescaped_url = html.unescape(m.group(1)) |
| 870 | + source_url = event.parsed_url.geturl() |
| 871 | + final_url = urldefrag(urljoin(source_url, unescaped_url)).url |
| 872 | + if not await self.helpers.re.search(self.full_url_regex_strict, final_url): |
| 873 | + self.excavate.debug( |
| 874 | + f"Rejecting reconstructed URL [{final_url}] as did not match full_url_regex_strict" |
| 875 | + ) |
| 876 | + continue |
853 | 877 | self.excavate.debug( |
854 | | - f"Rejecting potential full URL [{url_str}] as did not match full_url_regex" |
| 878 | + f"Reconstructed Full URL [{final_url}] from extracted relative URL [{unescaped_url}] " |
855 | 879 | ) |
856 | | - continue |
857 | | - final_url = url_str |
858 | 880 |
|
859 | | - self.excavate.debug(f"Discovered Full URL [{final_url}]") |
860 | | - elif identifier == "url_attr" and hasattr(event, "parsed_url"): |
861 | | - m = await self.helpers.re.search(self.tag_attribute_regex, url_str) |
862 | | - if not m: |
863 | | - self.excavate.debug( |
864 | | - f"Rejecting potential attribute URL [{url_str}] as did not match tag_attribute_regex" |
| 881 | + if final_url: |
| 882 | + # Validate the URL before using it |
| 883 | + self.excavate.helpers.validators.validate_url_parsed(final_url) |
| 884 | + if self.excavate.scan.in_scope(final_url): |
| 885 | + urls_found += 1 |
| 886 | + await self.report( |
| 887 | + final_url, |
| 888 | + event, |
| 889 | + yara_rule_settings, |
| 890 | + discovery_context, |
| 891 | + event_type="URL_UNVERIFIED", |
| 892 | + urls_found=urls_found, |
865 | 893 | ) |
866 | | - continue |
867 | | - unescaped_url = html.unescape(m.group(1)) |
868 | | - source_url = event.parsed_url.geturl() |
869 | | - final_url = urldefrag(urljoin(source_url, unescaped_url)).url |
870 | | - if not await self.helpers.re.search(self.full_url_regex_strict, final_url): |
871 | | - self.excavate.debug( |
872 | | - f"Rejecting reconstructed URL [{final_url}] as did not match full_url_regex_strict" |
873 | | - ) |
874 | | - continue |
875 | | - self.excavate.debug( |
876 | | - f"Reconstructed Full URL [{final_url}] from extracted relative URL [{unescaped_url}] " |
877 | | - ) |
878 | | - |
879 | | - if final_url: |
880 | | - if self.excavate.scan.in_scope(final_url): |
881 | | - urls_found += 1 |
882 | | - await self.report( |
883 | | - final_url, |
884 | | - event, |
885 | | - yara_rule_settings, |
886 | | - discovery_context, |
887 | | - event_type="URL_UNVERIFIED", |
888 | | - urls_found=urls_found, |
889 | | - ) |
| 894 | + except (ValidationError, ValueError) as e: |
| 895 | + self.excavate.debug(f"Invalid URL [{url_str if not final_url else final_url}]: {e}") |
| 896 | + continue |
890 | 897 |
|
891 | 898 | async def report_prep(self, event_data, event_type, event, tags, **kwargs): |
892 | 899 | event_draft = self.excavate.make_event(event_data, event_type, parent=event) |
@@ -1114,7 +1121,10 @@ async def search(self, data, event, content_type, discovery_context="HTTP respon |
1114 | 1121 |
|
1115 | 1122 | # Check if rule processing function exists |
1116 | 1123 | if rule_name in self.yara_preprocess_dict: |
1117 | | - await self.yara_preprocess_dict[rule_name](result, event, discovery_context) |
| 1124 | + try: |
| 1125 | + await self.yara_preprocess_dict[rule_name](result, event, discovery_context) |
| 1126 | + except ValidationError as e: |
| 1127 | + self.debug(f"ValidationError in rule {rule_name} for result {result}: {e}") |
1118 | 1128 | else: |
1119 | 1129 | self.hugewarning(f"YARA Rule {rule_name} not found in pre-compiled rules") |
1120 | 1130 |
|
|
0 commit comments