Skip to content

Commit ae864c6

Browse files
committed
Add option to exclude hosts/CIDRs from HTTP proxy
Use radixtarget library for proxy exclusion lookups, consistent with how targets and blacklists are handled. Supports hostnames, IPs, CIDRs, and NO_PROXY conventions (*.domain, .domain, wildcard *). - Add http_proxy_exclude config and --no-proxy CLI arg - Export NO_PROXY environment variable for external tools - Add tests for proxy exclusion and passthrough behavior
1 parent 8b02acb commit ae864c6

6 files changed

Lines changed: 150 additions & 2 deletions

File tree

bbot/core/helpers/web/engine.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
import asyncio
55
import logging
66
import traceback
7+
from urllib.parse import urlparse
78
from socksio.exceptions import SOCKSError
89
from contextlib import asynccontextmanager
10+
from radixtarget import RadixTarget
911

1012
from bbot.core.engine import EngineServer
1113
from bbot.core.helpers.misc import bytes_to_human, human_to_bytes, get_exception_chain, truncate_string
@@ -36,6 +38,32 @@ def __init__(self, socket_path, target, config={}, debug=False):
3638
self.web_clients = {}
3739
self.web_client = self.AsyncClient(persist_cookies=False)
3840

41+
# proxy exclusion support
42+
self.has_proxy = bool(self.web_config.get("http_proxy", ""))
43+
proxy_exclusions = self.web_config.get("http_proxy_exclude", [])
44+
self.noproxy_web_clients = {}
45+
self.proxy_bypass_all = False
46+
if self.has_proxy and proxy_exclusions:
47+
normalized = []
48+
for pattern in proxy_exclusions:
49+
pattern = str(pattern).strip()
50+
if pattern == "*":
51+
self.proxy_bypass_all = True
52+
break
53+
# normalize NO_PROXY conventions for radixtarget
54+
# ".example.com" and "*.example.com" both mean "example.com + subdomains"
55+
if pattern.startswith("*."):
56+
pattern = pattern[2:]
57+
elif pattern.startswith("."):
58+
pattern = pattern[1:]
59+
if pattern:
60+
normalized.append(pattern)
61+
self.proxy_exclusion_target = RadixTarget(*normalized) if normalized else RadixTarget()
62+
self.noproxy_web_client = self._AsyncClient_noproxy(persist_cookies=False)
63+
else:
64+
self.proxy_exclusion_target = RadixTarget()
65+
self.noproxy_web_client = None
66+
3967
def AsyncClient(self, *args, **kwargs):
4068
# cache by retries to prevent unwanted accumulation of clients
4169
# (they are not garbage-collected)
@@ -49,12 +77,44 @@ def AsyncClient(self, *args, **kwargs):
4977
self.web_clients[client.retries] = client
5078
return client
5179

80+
def _AsyncClient_noproxy(self, *args, **kwargs):
81+
"""Create/cache a BBOTAsyncClient with proxy disabled, for excluded hosts."""
82+
retries = kwargs.get("retries", 1)
83+
try:
84+
return self.noproxy_web_clients[retries]
85+
except KeyError:
86+
from .client import BBOTAsyncClient
87+
88+
noproxy_config = dict(self.config)
89+
noproxy_web = dict(noproxy_config.get("web", {}))
90+
noproxy_web["http_proxy"] = None
91+
noproxy_config["web"] = noproxy_web
92+
client = BBOTAsyncClient.from_config(noproxy_config, self.target, *args, **kwargs)
93+
self.noproxy_web_clients[client.retries] = client
94+
return client
95+
96+
def _get_client_for_url(self, url, client=None):
97+
"""Return the appropriate client based on proxy exclusion rules.
98+
99+
If no explicit client is provided and the URL matches an exclusion pattern,
100+
returns the no-proxy client. Otherwise returns the given client or default.
101+
"""
102+
if client is not None:
103+
return client
104+
if self.noproxy_web_client is not None and url:
105+
if self.proxy_bypass_all:
106+
return self.noproxy_web_client
107+
hostname = urlparse(str(url)).hostname
108+
if hostname and self.proxy_exclusion_target.get(hostname):
109+
return self.noproxy_web_client
110+
return self.web_client
111+
52112
async def request(self, *args, **kwargs):
53113
raise_error = kwargs.pop("raise_error", False)
54114
# TODO: use this
55115
cache_for = kwargs.pop("cache_for", None) # noqa
56116

57-
client = kwargs.get("client", self.web_client)
117+
explicit_client = kwargs.pop("client", None)
58118

59119
# allow vs follow, httpx why??
60120
allow_redirects = kwargs.pop("allow_redirects", None)
@@ -79,6 +139,8 @@ async def request(self, *args, **kwargs):
79139

80140
if client_kwargs:
81141
client = self.AsyncClient(**client_kwargs)
142+
else:
143+
client = self._get_client_for_url(url, explicit_client)
82144

83145
try:
84146
async with self._acatch(url, raise_error):
@@ -144,7 +206,8 @@ async def stream_request(self, url, **kwargs):
144206
chunk_size = 8192
145207
chunks = []
146208

147-
async with self._acatch(url, raise_error=True), self.web_client.stream(url=url, **kwargs) as response:
209+
stream_client = self._get_client_for_url(url)
210+
async with self._acatch(url, raise_error=True), stream_client.stream(url=url, **kwargs) as response:
148211
agen = response.aiter_bytes(chunk_size=chunk_size)
149212
async for chunk in agen:
150213
_chunk_size = len(chunk)

bbot/defaults.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ dns:
8484
web:
8585
# HTTP proxy
8686
http_proxy:
87+
# Hosts/CIDRs to exclude from HTTP proxy (NO_PROXY equivalent)
88+
# Examples: ["localhost", "*.internal.corp", "10.0.0.0/8", "elastic.mycompany.com"]
89+
http_proxy_exclude: []
8790
# Web user-agent
8891
user_agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.2151.97
8992
# Suffix to append to user-agent (e.g. for tracking or identification)

bbot/scanner/preset/args.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ def preset_from_args(self):
177177
if self.parsed.proxy:
178178
args_preset.core.merge_custom({"web": {"http_proxy": self.parsed.proxy}})
179179

180+
if self.parsed.no_proxy:
181+
args_preset.core.merge_custom({"web": {"http_proxy_exclude": self.parsed.no_proxy}})
182+
180183
if self.parsed.custom_headers:
181184
args_preset.core.merge_custom({"web": {"http_headers": self.parsed.custom_headers}})
182185

@@ -372,6 +375,13 @@ def create_parser(self, *args, **kwargs):
372375
misc = p.add_argument_group(title="Misc")
373376
misc.add_argument("--version", action="store_true", help="show BBOT version and exit")
374377
misc.add_argument("--proxy", help="Use this proxy for all HTTP requests", metavar="HTTP_PROXY")
378+
misc.add_argument(
379+
"--no-proxy",
380+
nargs="+",
381+
default=[],
382+
help="Exclude these hosts from proxy (e.g. localhost *.internal.corp 10.0.0.0/8)",
383+
metavar="HOST",
384+
)
375385
misc.add_argument(
376386
"-H",
377387
"--custom-headers",

bbot/scanner/preset/environ.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,13 @@ def prepare(self):
125125
environ.pop("HTTP_PROXY", None)
126126
environ.pop("HTTPS_PROXY", None)
127127

128+
# handle proxy exclusions (NO_PROXY)
129+
http_proxy_exclude = self.preset.config.get("web", {}).get("http_proxy_exclude", [])
130+
if http_proxy_exclude:
131+
environ["NO_PROXY"] = ",".join(str(x) for x in http_proxy_exclude)
132+
else:
133+
environ.pop("NO_PROXY", None)
134+
128135
# ssl verification
129136
import urllib3
130137

bbot/scanner/scanner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ def __init__(
232232
max_redirects = web_config.get("http_max_redirects", 5)
233233
self.web_max_redirects = max(max_redirects, self.web_spider_distance)
234234
self.http_proxy = web_config.get("http_proxy", "")
235+
self.http_proxy_exclude = web_config.get("http_proxy_exclude", [])
235236
self.http_timeout = web_config.get("http_timeout", 10)
236237
self.httpx_timeout = web_config.get("httpx_timeout", 5)
237238
self.http_retries = web_config.get("http_retries", 1)

bbot/test/test_step_1/test_web.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,70 @@ async def test_http_proxy(bbot_scanner, bbot_httpserver, proxy_server):
425425
await scan._cleanup()
426426

427427

428+
@pytest.mark.asyncio
429+
async def test_http_proxy_exclude(bbot_scanner, bbot_httpserver, proxy_server):
430+
"""Verify that requests to excluded hosts bypass the proxy."""
431+
endpoint = "/test_http_proxy_exclude"
432+
url = bbot_httpserver.url_for(endpoint)
433+
bbot_httpserver.expect_request(uri=endpoint).respond_with_data("proxy_exclude_works")
434+
435+
proxy_address = f"http://127.0.0.1:{proxy_server.server_address[1]}"
436+
# Exclude 127.0.0.1 from proxy
437+
scan = bbot_scanner(
438+
"127.0.0.1",
439+
config={
440+
"web": {
441+
"http_proxy": proxy_address,
442+
"http_proxy_exclude": ["127.0.0.1"],
443+
}
444+
},
445+
)
446+
447+
await scan._prep()
448+
449+
proxy_server.RequestHandlerClass.urls.clear()
450+
r = await scan.helpers.request(url)
451+
452+
# Request should NOT go through proxy
453+
assert len(proxy_server.RequestHandlerClass.urls) == 0, "Request should have bypassed proxy but went through it"
454+
assert r.status_code == 200 and r.text == "proxy_exclude_works"
455+
456+
await scan._cleanup()
457+
458+
459+
@pytest.mark.asyncio
460+
async def test_http_proxy_exclude_passthrough(bbot_scanner, bbot_httpserver, proxy_server):
461+
"""Verify that non-excluded hosts still go through the proxy."""
462+
endpoint = "/test_proxy_passthrough"
463+
url = bbot_httpserver.url_for(endpoint)
464+
bbot_httpserver.expect_request(uri=endpoint).respond_with_data("passthrough_works")
465+
466+
proxy_address = f"http://127.0.0.1:{proxy_server.server_address[1]}"
467+
# Exclude a different host, not the one we're requesting
468+
scan = bbot_scanner(
469+
"127.0.0.1",
470+
config={
471+
"web": {
472+
"http_proxy": proxy_address,
473+
"http_proxy_exclude": ["10.0.0.0/8"],
474+
}
475+
},
476+
)
477+
478+
await scan._prep()
479+
480+
proxy_server.RequestHandlerClass.urls.clear()
481+
r = await scan.helpers.request(url)
482+
483+
# Request SHOULD go through proxy (127.0.0.1 not in exclusion list)
484+
assert len(proxy_server.RequestHandlerClass.urls) == 1, (
485+
f"Request to {url} should have gone through proxy but didn't"
486+
)
487+
assert r.status_code == 200 and r.text == "passthrough_works"
488+
489+
await scan._cleanup()
490+
491+
428492
@pytest.mark.asyncio
429493
async def test_http_ssl(bbot_scanner, bbot_httpserver_ssl):
430494
endpoint = "/test_http_ssl"

0 commit comments

Comments
 (0)