Skip to content

Commit 5396147

Browse files
http.py proper streaming
1 parent 77f07d9 commit 5396147

2 files changed

Lines changed: 124 additions & 120 deletions

File tree

bbot/modules/http.py

Lines changed: 117 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,75 @@ def _response_to_json(self, url_input, response):
175175

176176
return j
177177

178+
async def _process_result(self, result, parent_event):
179+
"""Emit URL + HTTP_RESPONSE events for one batch result. Returns True if status was usable."""
180+
if not result.success:
181+
self.debug(f"blasthttp error for {result.url}: {result.error}")
182+
return False
183+
184+
response = result.response
185+
status_code = response.status
186+
if status_code == 0:
187+
self.debug(f'No HTTP status code for "{result.url}"')
188+
return False
189+
190+
url = response.url
191+
192+
# The "input" field represents the original scan target (host:port),
193+
# not the full URL. Other modules and output consumers use this to
194+
# correlate responses back to the target that produced them.
195+
input_parsed = urlparse(result.url)
196+
url_input = input_parsed.netloc or result.url
197+
j = self._response_to_json(url_input, response)
198+
199+
# discard 404s from unverified URLs
200+
path = j.get("path", "/")
201+
if parent_event.type == "URL_UNVERIFIED" and status_code in (404,) and path != "/":
202+
self.debug(f'Discarding 404 from "{url}"')
203+
return True
204+
205+
tags = [f"status-{status_code}"]
206+
url_context = "{module} visited {event.parent.data} and got status code {event.http_status}"
207+
if parent_event.type == "OPEN_TCP_PORT":
208+
url_context += " at {event.data}"
209+
210+
url_event = self.make_event(url, "URL", parent_event, tags=tags, context=url_context)
211+
if url_event:
212+
response_ip = j.get("host", "")
213+
if response_ip:
214+
url_event._resolved_hosts.add(response_ip)
215+
title = j.get("title", "")
216+
if title:
217+
url_event.http_title = title
218+
location = j.get("location", "")
219+
if location:
220+
url_event.redirect_location = location
221+
if url_event != parent_event:
222+
await self.emit_event(url_event)
223+
content_type = j.get("header", {}).get("content_type", "unspecified").split(";")[0]
224+
content_length = self.helpers.bytes_to_human(j.get("content_length", 0))
225+
await self.emit_event(
226+
j,
227+
"HTTP_RESPONSE",
228+
url_event,
229+
tags=url_event.tags,
230+
context=f"HTTP_RESPONSE was {content_length} with {content_type} content type",
231+
)
232+
233+
if self.store_responses:
234+
response_dir = self.scan.home / "http_responses"
235+
self.helpers.mkdir(response_dir)
236+
filename = f"{j['host']}.{urlparse(url).port or 443}{path.replace('/', '[slash]')}.txt"
237+
response_file = response_dir / filename
238+
response_file.write_text(j.get("raw_header", "") + j.get("body", ""))
239+
return True
240+
178241
async def handle_batch(self, *events):
179242
stdin = {}
180243
# Track dual-scheme probes from OPEN_TCP_PORT: {(host, port): {"http": url, "https": url}}
181244
port_probes = {}
245+
# Reverse index: each paired probe URL → its (host, port) key
246+
paired_probe_urls = {}
182247

183248
for event in events:
184249
urls, url_hash = self.make_url_metadata(event)
@@ -191,6 +256,13 @@ async def handle_batch(self, *events):
191256
scheme = "https" if url.startswith("https://") else "http"
192257
port_probes[key][scheme] = url
193258

259+
# Only ports with BOTH schemes are subject to suppression — single-scheme
260+
# OPEN_TCP_PORT probes (rare, but possible) stream through normally.
261+
for key, schemes in port_probes.items():
262+
if "http" in schemes and "https" in schemes:
263+
paired_probe_urls[schemes["http"]] = key
264+
paired_probe_urls[schemes["https"]] = key
265+
194266
if not stdin:
195267
return
196268

@@ -199,7 +271,6 @@ async def handle_batch(self, *events):
199271
timeout = self.scan.blasthttp_timeout
200272
retries = self.scan.blasthttp_retries
201273

202-
# Build batch configs
203274
configs = []
204275
for url in stdin:
205276
config = blasthttp.BatchConfig(
@@ -213,114 +284,51 @@ async def handle_batch(self, *events):
213284
)
214285
configs.append(config)
215286

216-
# Drain the streaming batch into a list — we need every result in hand
217-
# before we can decide http/https suppression for paired OPEN_TCP_PORT probes.
218-
# Python conversion still overlaps with in-flight HTTP I/O via the stream.
219-
results = []
220-
async for r in iter_batch_results(self.client.request_batch_stream(configs, concurrency=self.threads)):
221-
results.append(r)
222-
223-
# For OPEN_TCP_PORT probes, suppress redundant https when http already succeeded.
224-
# When probing an unknown port, we try both http:// and https://. If http works,
225-
# the port definitely speaks HTTP — the https result may be a proxy artifact
226-
# (intercepting proxies like Burp terminate TLS themselves, making any https://
227-
# URL "succeed" regardless of whether the target actually speaks TLS).
228-
# If http fails but https succeeds, the port genuinely speaks TLS.
229-
# Explicit URLs (URL_UNVERIFIED/URL) are never suppressed — this only applies
230-
# to speculative OPEN_TCP_PORT probes.
231-
suppressed_urls = set()
232-
if port_probes:
233-
successful_urls = {r.url for r in results if r.success and r.response.status != 0}
234-
for key, schemes in port_probes.items():
235-
http_url = schemes.get("http")
236-
https_url = schemes.get("https")
237-
if not (http_url and https_url):
287+
# Suppress redundant https probes when http already succeeded for the same
288+
# (host, port). When probing an unknown port, we try both schemes; if http
289+
# works, the port definitely speaks HTTP, and the https result is likely a
290+
# proxy artifact (intercepting proxies like Burp terminate TLS themselves,
291+
# making any https:// URL "succeed" regardless of whether the target really
292+
# speaks TLS). Explicit URL/URL_UNVERIFIED events are never suppressed —
293+
# only speculative OPEN_TCP_PORT probes.
294+
#
295+
# Streaming requires per-pair coordination: emit http immediately, defer
296+
# https until http's outcome is known (or the stream ends).
297+
http_succeeded = {} # key -> bool, set when http result arrives
298+
deferred_https = {} # key -> result, awaiting http verdict
299+
300+
async def resolve_https(key, result):
301+
if http_succeeded.get(key) and result.success and result.response.status != 0:
302+
self.debug(f"Suppressing https probe {result.url} (http already succeeded for {key})")
303+
return
304+
await self._process_result(result, stdin[result.url])
305+
306+
async for result in iter_batch_results(self.client.request_batch_stream(configs, concurrency=self.threads)):
307+
key = paired_probe_urls.get(result.url)
308+
if key is None:
309+
# Non-paired URL — emit immediately
310+
parent_event = stdin.get(result.url)
311+
if parent_event is None:
312+
self.warning(f"Unable to correlate parent event for: {result.url}")
238313
continue
239-
if http_url in successful_urls and https_url in successful_urls:
240-
self.debug(f"Suppressing https probe {https_url} (http already succeeded: {http_url})")
241-
suppressed_urls.add(https_url)
242-
243-
for i in range(len(results)):
244-
result = results[i]
245-
results[i] = None # free response body memory as we go
246-
if not result.success:
247-
self.debug(f"blasthttp error for {result.url}: {result.error}")
248-
continue
249-
250-
response = result.response
251-
status_code = response.status
252-
if status_code == 0:
253-
self.debug(f'No HTTP status code for "{result.url}"')
254-
continue
255-
256-
if result.url in suppressed_urls:
314+
await self._process_result(result, parent_event)
257315
continue
258316

259-
# Map back to parent event using the input URL
260-
parent_event = stdin.get(result.url, None)
261-
262-
if parent_event is None:
263-
self.warning(f"Unable to correlate parent event for: {result.url}")
264-
continue
265-
266-
url = response.url
267-
268-
# Build JSON dict for HTTP_RESPONSE event
269-
# The "input" field represents the original scan target (host:port),
270-
# not the full URL. Other modules and output consumers use this to
271-
# correlate responses back to the target that produced them.
272-
input_parsed = urlparse(result.url)
273-
url_input = input_parsed.netloc or result.url
274-
j = self._response_to_json(url_input, response)
275-
276-
# discard 404s from unverified URLs
277-
path = j.get("path", "/")
278-
if parent_event.type == "URL_UNVERIFIED" and status_code in (404,) and path != "/":
279-
self.debug(f'Discarding 404 from "{url}"')
280-
continue
281-
282-
# main URL
283-
tags = [f"status-{status_code}"]
284-
285-
url_context = "{module} visited {event.parent.data} and got status code {event.http_status}"
286-
if parent_event.type == "OPEN_TCP_PORT":
287-
url_context += " at {event.data}"
288-
289-
url_event = self.make_event(
290-
url,
291-
"URL",
292-
parent_event,
293-
tags=tags,
294-
context=url_context,
295-
)
296-
if url_event:
297-
response_ip = j.get("host", "")
298-
if response_ip:
299-
url_event._resolved_hosts.add(response_ip)
300-
title = j.get("title", "")
301-
if title:
302-
url_event.http_title = title
303-
location = j.get("location", "")
304-
if location:
305-
url_event.redirect_location = location
306-
if url_event != parent_event:
307-
await self.emit_event(url_event)
308-
# HTTP response
309-
content_type = j.get("header", {}).get("content_type", "unspecified").split(";")[0]
310-
content_length = j.get("content_length", 0)
311-
content_length = self.helpers.bytes_to_human(content_length)
312-
await self.emit_event(
313-
j,
314-
"HTTP_RESPONSE",
315-
url_event,
316-
tags=url_event.tags,
317-
context=f"HTTP_RESPONSE was {content_length} with {content_type} content type",
318-
)
319-
320-
# Store responses if configured
321-
if self.store_responses:
322-
response_dir = self.scan.home / "http_responses"
323-
self.helpers.mkdir(response_dir)
324-
filename = f"{j['host']}.{urlparse(url).port or 443}{path.replace('/', '[slash]')}.txt"
325-
response_file = response_dir / filename
326-
response_file.write_text(j.get("raw_header", "") + j.get("body", ""))
317+
# Paired OPEN_TCP_PORT probe
318+
is_http = result.url == port_probes[key]["http"]
319+
if is_http:
320+
http_succeeded[key] = result.success and result.response is not None and result.response.status != 0
321+
await self._process_result(result, stdin[result.url])
322+
# If https for this key arrived first and was buffered, resolve it now
323+
pending = deferred_https.pop(key, None)
324+
if pending is not None:
325+
await resolve_https(key, pending)
326+
else: # is https
327+
if key in http_succeeded:
328+
await resolve_https(key, result)
329+
else:
330+
deferred_https[key] = result
331+
332+
# Stream ended — any leftover https had no http result, so emit unconditionally
333+
for key, result in deferred_https.items():
334+
await self._process_result(result, stdin[result.url])

bbot/modules/web_brute.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -316,22 +316,18 @@ async def execute_fuzz(
316316
self.debug(f"Fuzzing {len(configs)} URLs for ext [{ext}]")
317317

318318
# Fire all requests via native blasthttp batch (Rust concurrency).
319-
# Stream results into a URL-keyed dict so we can re-process them in
320-
# wordlist order (canary appended last) below.
321-
results_by_url = {}
319+
# Stream results in completion order — canary detection and hit
320+
# collection are order-independent (we only check `canary_found and
321+
# hits` after the stream completes), so per-result work overlaps with
322+
# in-flight HTTP I/O.
323+
canary_found = False
324+
hits = []
322325
async for result in iter_batch_results(
323326
self.blast_client.request_batch_stream(configs, self.concurrency, rate_limit=self.rate)
324327
):
325-
results_by_url[result.url] = result
326-
327-
# Process in wordlist order so canary (appended last) is checked last
328-
canary_found = False
329-
hits = []
330-
for config in configs:
331328
if self.scan.stopping:
332329
return
333-
result = results_by_url.get(config.url)
334-
if result is None or not result.success:
330+
if not result.success:
335331
continue
336332

337333
response = result.response

0 commit comments

Comments
 (0)