From 339a1e3b646dc60fa95e910c469a05c546ec517d Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 8 Apr 2025 10:23:11 -0400 Subject: [PATCH 1/5] logging improvements + fix mobile app error --- bbot/core/config/logger.py | 18 +++++----------- bbot/logger.py | 44 ++++++++++++++++++++++++++++++++++---- bbot/scanner/scanner.py | 8 +++++-- bbot/scanner/target.py | 6 +++--- 4 files changed, 54 insertions(+), 22 deletions(-) diff --git a/bbot/core/config/logger.py b/bbot/core/config/logger.py index 56bde9a9df..2b6e7383d2 100644 --- a/bbot/core/config/logger.py +++ b/bbot/core/config/logger.py @@ -60,7 +60,6 @@ def __init__(self, core): self._loggers = None self._log_handlers = None self._log_level = None - self.root_logger = logging.getLogger() self.core_logger = logging.getLogger("bbot") self.core = core @@ -83,14 +82,6 @@ def cleanup_logging(self): with suppress(Exception): self.queue_handler.close() - # Clean root logger - root_logger = logging.getLogger() - for handler in list(root_logger.handlers): - with suppress(Exception): - root_logger.removeHandler(handler) - with suppress(Exception): - handler.close() - # Clean all other loggers for logger in logging.Logger.manager.loggerDict.values(): if hasattr(logger, "handlers"): # Logger, not PlaceHolder @@ -111,8 +102,7 @@ def setup_queue_handler(self, logging_queue=None, log_level=logging.DEBUG): self.queue = logging_queue self.queue_handler = logging.handlers.QueueHandler(logging_queue) - self.root_logger.addHandler(self.queue_handler) - + self.core_logger.addHandler(self.queue_handler) self.core_logger.setLevel(log_level) # disable asyncio logging for child processes if not SHARED_INTERPRETER_STATE.is_main_process: @@ -216,10 +206,12 @@ def log_handlers(self): error_and_exit(f"Failure creating or error writing to BBOT logs directory ({log_dir})") # Main log file (compressed) - main_handler = GzipRotatingFileHandler(f"{log_dir}/bbot.log", when="d", interval=1, backupCount=14) + main_handler = GzipRotatingFileHandler(f"{log_dir}/bbot.log", maxBytes=1024 * 1024 * 100, backupCount=100) # Separate log file for debugging (compressed) - debug_handler = GzipRotatingFileHandler(f"{log_dir}/bbot.debug.log", when="d", interval=1, backupCount=14) + debug_handler = GzipRotatingFileHandler( + f"{log_dir}/bbot.debug.log", maxBytes=1024 * 1024 * 100, backupCount=100 + ) # Log to stderr stderr_handler = logging.StreamHandler(sys.stderr) diff --git a/bbot/logger.py b/bbot/logger.py index 9334741460..5073d7379d 100644 --- a/bbot/logger.py +++ b/bbot/logger.py @@ -1,3 +1,4 @@ +import os import sys import logging.handlers @@ -53,9 +54,44 @@ def log_to_stderr(msg, level="INFO", logname=True): print(msg, file=sys.stderr) -# Create a compressed file handler for logs -class GzipRotatingFileHandler(logging.handlers.TimedRotatingFileHandler): - def _open(self): +class GzipRotatingFileHandler(logging.handlers.RotatingFileHandler): + """ + A rotating file handler that compresses rotated files with gzip. + Checks file size only periodically to improve performance. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._msg_count = 0 + self._check_interval = 1000 # Check size every 1000 messages + + def rotation_filename(self, default_name): + """ + Modify the rotated filename to include .gz extension + """ + return default_name + ".gz" + + def rotate(self, source, dest): + """ + Compress the source file and move it to the destination. + """ import gzip - return gzip.open(f"{self.baseFilename}.gz", mode="at", encoding=self.encoding) + with open(source, "rb") as f_in: + with gzip.open(dest, "wb") as f_out: + f_out.writelines(f_in) + os.remove(source) + + def emit(self, record): + """ + Emit a record, checking for rollover only periodically using modulo. + """ + self._msg_count += 1 + + # Only check for rollover periodically to save compute + if self._msg_count % self._check_interval == 0: + if self.shouldRollover(record): + self.doRollover() + + # Continue with normal emit process + super().emit(record) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index ce2592c173..7cf923b5ec 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -1229,9 +1229,13 @@ def log_level(self): def _log_handlers(self): if self.__log_handlers is None: self.helpers.mkdir(self.home) - main_handler = GzipRotatingFileHandler(str(self.home / "scan.log"), when="d", interval=1, backupCount=14) + main_handler = GzipRotatingFileHandler( + str(self.home / "scan.log"), maxBytes=1024 * 1024 * 100, backupCount=100 + ) main_handler.addFilter(lambda x: x.levelno != logging.TRACE and x.levelno >= logging.VERBOSE) - debug_handler = GzipRotatingFileHandler(str(self.home / "debug.log"), when="d", interval=1, backupCount=14) + debug_handler = GzipRotatingFileHandler( + str(self.home / "debug.log"), maxBytes=1024 * 1024 * 100, backupCount=100 + ) debug_handler.addFilter(lambda x: x.levelno >= logging.DEBUG) self.__log_handlers = [main_handler, debug_handler] return self.__log_handlers diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index 68714205a8..528986ea93 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -269,9 +269,9 @@ def __init__(self, *seeds, whitelist=None, blacklist=None, strict_scope=False, s @property def json(self): return { - "seeds": sorted([e.data for e in self.seeds]), - "whitelist": sorted([e.data for e in self.whitelist]), - "blacklist": sorted([e.data for e in self.blacklist]), + "seeds": sorted([e for e in self.seeds.inputs]), + "whitelist": sorted([str(e.data) for e in self.whitelist]), + "blacklist": sorted([str(e.data) for e in self.blacklist]), "strict_scope": self.strict_scope, "hash": self.hash.hex(), "seed_hash": self.seeds.hash.hex(), From 0e868c119ac192664f11f53998baaa28a3fe7917 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 8 Apr 2025 10:27:18 -0400 Subject: [PATCH 2/5] fix tests --- bbot/test/bbot_fixtures.py | 19 ------------------ bbot/test/test_step_1/test_cli.py | 12 +++++------ bbot/test/test_step_1/test_python_api.py | 20 +++++++++---------- .../module_tests/test_module_excavate.py | 2 +- .../module_tests/test_module_nuclei.py | 6 ++---- 5 files changed, 19 insertions(+), 40 deletions(-) diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 6b9abd1f58..6b773e8596 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -50,25 +50,6 @@ def tempapkfile(): return apk_file -def read_gzipped_file(file_path): - """ - Read and decompress a gzipped file, tolerating missing end markers. - - Args: - file_path: Path to the gzipped file - - Returns: - The decompressed content as a string - """ - with open(file_path, "rb") as f: - data = f.read() - decompressor = zlib.decompressobj( - 16 + zlib.MAX_WBITS - ) # This is needed because the file doesn't have an end marker - content = decompressor.decompress(data).decode("utf-8", errors="ignore") - return content - - @pytest.fixture def clean_default_config(monkeypatch): clean_config = OmegaConf.merge( diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index e74267a9c1..0e5007d93f 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -130,9 +130,9 @@ async def test_cli_scan(monkeypatch): assert ip_success and dns_success, "IP_ADDRESS and/or DNS_NAME are not present in output.txt" # Check for gzipped scan log file - scan_log_gz = scan_home / "scan.log.gz" - assert scan_log_gz.is_file(), "scan.log.gz not found" - assert "[INFO]" in read_gzipped_file(scan_log_gz) + scan_log = scan_home / "scan.log" + assert scan_log.is_file(), "scan.log not found" + assert "[INFO]" in open(scan_log).read() @pytest.mark.asyncio @@ -202,9 +202,9 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config): assert "[SCAN]" in open(scan_dir / "output.txt").read() # Check for gzipped scan log file - scan_log_gz = scan_dir / "scan.log.gz" - assert scan_log_gz.is_file(), "scan.log.gz not found" - assert "[INFO]" in read_gzipped_file(scan_log_gz) + scan_log = scan_dir / "scan.log" + assert scan_log.is_file(), "scan.log not found" + assert "[INFO]" in open(scan_log).read() shutil.rmtree(output_dir) # list module options diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index 48751b29d3..eaa9636b1c 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -17,28 +17,28 @@ async def test_python_api(): scan_home = scan2.helpers.scans_dir / "python_api_test" out_file = scan_home / "output.json" assert list(scan2.helpers.read_file(out_file)) - scan_log = scan_home / "scan.log.gz" - debug_log = scan_home / "debug.log.gz" + scan_log = scan_home / "scan.log" + debug_log = scan_home / "debug.log" assert scan_log.is_file() - assert "python_api_test" in read_gzipped_file(scan_log) + assert "python_api_test" in open(scan_log).read() assert debug_log.is_file() - assert "python_api_test" in read_gzipped_file(debug_log) + assert "python_api_test" in open(debug_log).read() scan3 = Scanner("127.0.0.1", output_modules=["json"], scan_name="scan_logging_test") await scan3.async_start_without_generator() - assert "scan_logging_test" not in read_gzipped_file(scan_log) - assert "scan_logging_test" not in read_gzipped_file(debug_log) + assert "scan_logging_test" not in open(scan_log).read() + assert "scan_logging_test" not in open(debug_log).read() scan_home = scan3.helpers.scans_dir / "scan_logging_test" out_file = scan_home / "output.json" assert list(scan3.helpers.read_file(out_file)) - scan_log = scan_home / "scan.log.gz" - debug_log = scan_home / "debug.log.gz" + scan_log = scan_home / "scan.log" + debug_log = scan_home / "debug.log" assert scan_log.is_file() assert debug_log.is_file() - assert "scan_logging_test" in read_gzipped_file(scan_log) - assert "scan_logging_test" in read_gzipped_file(debug_log) + assert "scan_logging_test" in open(scan_log).read() + assert "scan_logging_test" in open(debug_log).read() # make sure config loads properly bbot_home = "/tmp/.bbot_python_api_test" diff --git a/bbot/test/test_step_2/module_tests/test_module_excavate.py b/bbot/test/test_step_2/module_tests/test_module_excavate.py index 29da52a88b..20fd4fe366 100644 --- a/bbot/test/test_step_2/module_tests/test_module_excavate.py +++ b/bbot/test/test_step_2/module_tests/test_module_excavate.py @@ -1034,7 +1034,7 @@ async def setup_after_prep(self, module_test): module_test.set_expect_requests({"uri": "/"}, {"response_data": self.bad_url_data}) def check(self, module_test, events): - debug_log_content = read_gzipped_file(module_test.scan.home / "debug.log.gz") + debug_log_content = open(module_test.scan.home / "debug.log").read() # make sure our logging is working assert "Setting scan status to STARTING" in debug_log_content # make sure we don't have any URL validation errors diff --git a/bbot/test/test_step_2/module_tests/test_module_nuclei.py b/bbot/test/test_step_2/module_tests/test_module_nuclei.py index 0d1537b6f2..de08264a5a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_nuclei.py +++ b/bbot/test/test_step_2/module_tests/test_module_nuclei.py @@ -140,8 +140,7 @@ async def setup_before_prep(self, module_test): module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) def check(self, module_test, events): - assert "-retries 0" in read_gzipped_file(module_test.scan.home / "debug.log.gz") - + assert "-retries 0" in open(module_test.scan.home / "debug.log").read() class TestNucleiRetriesCustom(TestNucleiRetries): config_overrides = { @@ -150,8 +149,7 @@ class TestNucleiRetriesCustom(TestNucleiRetries): } def check(self, module_test, events): - assert "-retries 1" in read_gzipped_file(module_test.scan.home / "debug.log.gz") - + assert "-retries 1" in open(module_test.scan.home / "debug.log").read() class TestNucleiCustomHeaders(TestNucleiManual): custom_headers = {"testheader1": "test1", "testheader2": "test2"} From fded823872bcb3a4383a005546dfadab1e1f6fc3 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 8 Apr 2025 10:27:26 -0400 Subject: [PATCH 3/5] ruffed --- bbot/test/test_step_2/module_tests/test_module_nuclei.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bbot/test/test_step_2/module_tests/test_module_nuclei.py b/bbot/test/test_step_2/module_tests/test_module_nuclei.py index de08264a5a..073f455ad3 100644 --- a/bbot/test/test_step_2/module_tests/test_module_nuclei.py +++ b/bbot/test/test_step_2/module_tests/test_module_nuclei.py @@ -142,6 +142,7 @@ async def setup_before_prep(self, module_test): def check(self, module_test, events): assert "-retries 0" in open(module_test.scan.home / "debug.log").read() + class TestNucleiRetriesCustom(TestNucleiRetries): config_overrides = { "interactsh_disable": True, @@ -151,6 +152,7 @@ class TestNucleiRetriesCustom(TestNucleiRetries): def check(self, module_test, events): assert "-retries 1" in open(module_test.scan.home / "debug.log").read() + class TestNucleiCustomHeaders(TestNucleiManual): custom_headers = {"testheader1": "test1", "testheader2": "test2"} config_overrides = TestNucleiManual.config_overrides From cd119879772786676081b725d5e7f8024c7c1c37 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 9 Apr 2025 16:20:25 -0400 Subject: [PATCH 4/5] fix tests --- bbot/scanner/target.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index 528986ea93..68714205a8 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -269,9 +269,9 @@ def __init__(self, *seeds, whitelist=None, blacklist=None, strict_scope=False, s @property def json(self): return { - "seeds": sorted([e for e in self.seeds.inputs]), - "whitelist": sorted([str(e.data) for e in self.whitelist]), - "blacklist": sorted([str(e.data) for e in self.blacklist]), + "seeds": sorted([e.data for e in self.seeds]), + "whitelist": sorted([e.data for e in self.whitelist]), + "blacklist": sorted([e.data for e in self.blacklist]), "strict_scope": self.strict_scope, "hash": self.hash.hex(), "seed_hash": self.seeds.hash.hex(), From fb7ed05eb8b968bcda5368ab03b91c7ae3327521 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 10 Apr 2025 10:14:39 -0400 Subject: [PATCH 5/5] fix test --- bbot/test/test_step_2/module_tests/test_module_nuclei.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_nuclei.py b/bbot/test/test_step_2/module_tests/test_module_nuclei.py index 073f455ad3..24638b4b86 100644 --- a/bbot/test/test_step_2/module_tests/test_module_nuclei.py +++ b/bbot/test/test_step_2/module_tests/test_module_nuclei.py @@ -101,7 +101,7 @@ async def setup_before_prep(self, module_test): def check(self, module_test, events): assert any(e.type == "TECHNOLOGY" and "apache" in e.data["technology"].lower() for e in events) - assert "Using Interactsh Server" not in read_gzipped_file(module_test.scan.home / "debug.log.gz") + assert "Using Interactsh Server" not in open(module_test.scan.home / "debug.log").read() class TestNucleiBudget(TestNucleiManual):