Add option to exclude hosts/CIDRs from HTTP proxy

corest · corest · commit ae864c62db3a · 2026-03-31T18:38:16.000+03:00
Use radixtarget library for proxy exclusion lookups, consistent with
how targets and blacklists are handled. Supports hostnames, IPs, CIDRs,
and NO_PROXY conventions (*.domain, .domain, wildcard *).

- Add http_proxy_exclude config and --no-proxy CLI arg
- Export NO_PROXY environment variable for external tools
- Add tests for proxy exclusion and passthrough behavior
diff --git a/bbot/core/helpers/web/engine.py b/bbot/core/helpers/web/engine.py
@@ -4,8 +4,10 @@
 import asyncio
 import logging
 import traceback
+from urllib.parse import urlparse
 from socksio.exceptions import SOCKSError
 from contextlib import asynccontextmanager
+from radixtarget import RadixTarget
 
 from bbot.core.engine import EngineServer
 from bbot.core.helpers.misc import bytes_to_human, human_to_bytes, get_exception_chain, truncate_string
@@ -36,6 +38,32 @@ def __init__(self, socket_path, target, config={}, debug=False):
         self.web_clients = {}
         self.web_client = self.AsyncClient(persist_cookies=False)
 
+        # proxy exclusion support
+        self.has_proxy = bool(self.web_config.get("http_proxy", ""))
+        proxy_exclusions = self.web_config.get("http_proxy_exclude", [])
+        self.noproxy_web_clients = {}
+        self.proxy_bypass_all = False
+        if self.has_proxy and proxy_exclusions:
+            normalized = []
+            for pattern in proxy_exclusions:
+                pattern = str(pattern).strip()
+                if pattern == "*":
+                    self.proxy_bypass_all = True
+                    break
+                # normalize NO_PROXY conventions for radixtarget
+                # ".example.com" and "*.example.com" both mean "example.com + subdomains"
+                if pattern.startswith("*."):
+                    pattern = pattern[2:]
+                elif pattern.startswith("."):
+                    pattern = pattern[1:]
+                if pattern:
+                    normalized.append(pattern)
+            self.proxy_exclusion_target = RadixTarget(*normalized) if normalized else RadixTarget()
+            self.noproxy_web_client = self._AsyncClient_noproxy(persist_cookies=False)
+        else:
+            self.proxy_exclusion_target = RadixTarget()
+            self.noproxy_web_client = None
+
     def AsyncClient(self, *args, **kwargs):
         # cache by retries to prevent unwanted accumulation of clients
         # (they are not garbage-collected)
@@ -49,12 +77,44 @@ def AsyncClient(self, *args, **kwargs):
             self.web_clients[client.retries] = client
             return client
 
+    def _AsyncClient_noproxy(self, *args, **kwargs):
+        """Create/cache a BBOTAsyncClient with proxy disabled, for excluded hosts."""
+        retries = kwargs.get("retries", 1)
+        try:
+            return self.noproxy_web_clients[retries]
+        except KeyError:
+            from .client import BBOTAsyncClient
+
+            noproxy_config = dict(self.config)
+            noproxy_web = dict(noproxy_config.get("web", {}))
+            noproxy_web["http_proxy"] = None
+            noproxy_config["web"] = noproxy_web
+            client = BBOTAsyncClient.from_config(noproxy_config, self.target, *args, **kwargs)
+            self.noproxy_web_clients[client.retries] = client
+            return client
+
+    def _get_client_for_url(self, url, client=None):
+        """Return the appropriate client based on proxy exclusion rules.
+
+        If no explicit client is provided and the URL matches an exclusion pattern,
+        returns the no-proxy client. Otherwise returns the given client or default.
+        """
+        if client is not None:
+            return client
+        if self.noproxy_web_client is not None and url:
+            if self.proxy_bypass_all:
+                return self.noproxy_web_client
+            hostname = urlparse(str(url)).hostname
+            if hostname and self.proxy_exclusion_target.get(hostname):
+                return self.noproxy_web_client
+        return self.web_client
+
     async def request(self, *args, **kwargs):
         raise_error = kwargs.pop("raise_error", False)
         # TODO: use this
         cache_for = kwargs.pop("cache_for", None)  # noqa
 
-        client = kwargs.get("client", self.web_client)
+        explicit_client = kwargs.pop("client", None)
 
         # allow vs follow, httpx why??
         allow_redirects = kwargs.pop("allow_redirects", None)
@@ -79,6 +139,8 @@ async def request(self, *args, **kwargs):
 
         if client_kwargs:
             client = self.AsyncClient(**client_kwargs)
+        else:
+            client = self._get_client_for_url(url, explicit_client)
 
         try:
             async with self._acatch(url, raise_error):
@@ -144,7 +206,8 @@ async def stream_request(self, url, **kwargs):
             chunk_size = 8192
             chunks = []
 
-            async with self._acatch(url, raise_error=True), self.web_client.stream(url=url, **kwargs) as response:
+            stream_client = self._get_client_for_url(url)
+            async with self._acatch(url, raise_error=True), stream_client.stream(url=url, **kwargs) as response:
                 agen = response.aiter_bytes(chunk_size=chunk_size)
                 async for chunk in agen:
                     _chunk_size = len(chunk)
diff --git a/bbot/defaults.yml b/bbot/defaults.yml
@@ -84,6 +84,9 @@ dns:
 web:
   # HTTP proxy
   http_proxy:
+  # Hosts/CIDRs to exclude from HTTP proxy (NO_PROXY equivalent)
+  # Examples: ["localhost", "*.internal.corp", "10.0.0.0/8", "elastic.mycompany.com"]
+  http_proxy_exclude: []
   # Web user-agent
   user_agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.2151.97
   # Suffix to append to user-agent (e.g. for tracking or identification)
diff --git a/bbot/scanner/preset/args.py b/bbot/scanner/preset/args.py
@@ -177,6 +177,9 @@ def preset_from_args(self):
         if self.parsed.proxy:
             args_preset.core.merge_custom({"web": {"http_proxy": self.parsed.proxy}})
 
+        if self.parsed.no_proxy:
+            args_preset.core.merge_custom({"web": {"http_proxy_exclude": self.parsed.no_proxy}})
+
         if self.parsed.custom_headers:
             args_preset.core.merge_custom({"web": {"http_headers": self.parsed.custom_headers}})
 
@@ -372,6 +375,13 @@ def create_parser(self, *args, **kwargs):
         misc = p.add_argument_group(title="Misc")
         misc.add_argument("--version", action="store_true", help="show BBOT version and exit")
         misc.add_argument("--proxy", help="Use this proxy for all HTTP requests", metavar="HTTP_PROXY")
+        misc.add_argument(
+            "--no-proxy",
+            nargs="+",
+            default=[],
+            help="Exclude these hosts from proxy (e.g. localhost *.internal.corp 10.0.0.0/8)",
+            metavar="HOST",
+        )
         misc.add_argument(
             "-H",
             "--custom-headers",
diff --git a/bbot/scanner/preset/environ.py b/bbot/scanner/preset/environ.py
@@ -125,6 +125,13 @@ def prepare(self):
             environ.pop("HTTP_PROXY", None)
             environ.pop("HTTPS_PROXY", None)
 
+        # handle proxy exclusions (NO_PROXY)
+        http_proxy_exclude = self.preset.config.get("web", {}).get("http_proxy_exclude", [])
+        if http_proxy_exclude:
+            environ["NO_PROXY"] = ",".join(str(x) for x in http_proxy_exclude)
+        else:
+            environ.pop("NO_PROXY", None)
+
         # ssl verification
         import urllib3
 
diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py
@@ -232,6 +232,7 @@ def __init__(
         max_redirects = web_config.get("http_max_redirects", 5)
         self.web_max_redirects = max(max_redirects, self.web_spider_distance)
         self.http_proxy = web_config.get("http_proxy", "")
+        self.http_proxy_exclude = web_config.get("http_proxy_exclude", [])
         self.http_timeout = web_config.get("http_timeout", 10)
         self.httpx_timeout = web_config.get("httpx_timeout", 5)
         self.http_retries = web_config.get("http_retries", 1)
diff --git a/bbot/test/test_step_1/test_web.py b/bbot/test/test_step_1/test_web.py
@@ -425,6 +425,70 @@ async def test_http_proxy(bbot_scanner, bbot_httpserver, proxy_server):
     await scan._cleanup()
 
 
+@pytest.mark.asyncio
+async def test_http_proxy_exclude(bbot_scanner, bbot_httpserver, proxy_server):
+    """Verify that requests to excluded hosts bypass the proxy."""
+    endpoint = "/test_http_proxy_exclude"
+    url = bbot_httpserver.url_for(endpoint)
+    bbot_httpserver.expect_request(uri=endpoint).respond_with_data("proxy_exclude_works")
+
+    proxy_address = f"http://127.0.0.1:{proxy_server.server_address[1]}"
+    # Exclude 127.0.0.1 from proxy
+    scan = bbot_scanner(
+        "127.0.0.1",
+        config={
+            "web": {
+                "http_proxy": proxy_address,
+                "http_proxy_exclude": ["127.0.0.1"],
+            }
+        },
+    )
+
+    await scan._prep()
+
+    proxy_server.RequestHandlerClass.urls.clear()
+    r = await scan.helpers.request(url)
+
+    # Request should NOT go through proxy
+    assert len(proxy_server.RequestHandlerClass.urls) == 0, "Request should have bypassed proxy but went through it"
+    assert r.status_code == 200 and r.text == "proxy_exclude_works"
+
+    await scan._cleanup()
+
+
+@pytest.mark.asyncio
+async def test_http_proxy_exclude_passthrough(bbot_scanner, bbot_httpserver, proxy_server):
+    """Verify that non-excluded hosts still go through the proxy."""
+    endpoint = "/test_proxy_passthrough"
+    url = bbot_httpserver.url_for(endpoint)
+    bbot_httpserver.expect_request(uri=endpoint).respond_with_data("passthrough_works")
+
+    proxy_address = f"http://127.0.0.1:{proxy_server.server_address[1]}"
+    # Exclude a different host, not the one we're requesting
+    scan = bbot_scanner(
+        "127.0.0.1",
+        config={
+            "web": {
+                "http_proxy": proxy_address,
+                "http_proxy_exclude": ["10.0.0.0/8"],
+            }
+        },
+    )
+
+    await scan._prep()
+
+    proxy_server.RequestHandlerClass.urls.clear()
+    r = await scan.helpers.request(url)
+
+    # Request SHOULD go through proxy (127.0.0.1 not in exclusion list)
+    assert len(proxy_server.RequestHandlerClass.urls) == 1, (
+        f"Request to {url} should have gone through proxy but didn't"
+    )
+    assert r.status_code == 200 and r.text == "passthrough_works"
+
+    await scan._cleanup()
+
+
 @pytest.mark.asyncio
 async def test_http_ssl(bbot_scanner, bbot_httpserver_ssl):
     endpoint = "/test_http_ssl"