@@ -175,10 +175,75 @@ def _response_to_json(self, url_input, response):
175175
176176 return j
177177
178+ async def _process_result (self , result , parent_event ):
179+ """Emit URL + HTTP_RESPONSE events for one batch result. Returns True if status was usable."""
180+ if not result .success :
181+ self .debug (f"blasthttp error for { result .url } : { result .error } " )
182+ return False
183+
184+ response = result .response
185+ status_code = response .status
186+ if status_code == 0 :
187+ self .debug (f'No HTTP status code for "{ result .url } "' )
188+ return False
189+
190+ url = response .url
191+
192+ # The "input" field represents the original scan target (host:port),
193+ # not the full URL. Other modules and output consumers use this to
194+ # correlate responses back to the target that produced them.
195+ input_parsed = urlparse (result .url )
196+ url_input = input_parsed .netloc or result .url
197+ j = self ._response_to_json (url_input , response )
198+
199+ # discard 404s from unverified URLs
200+ path = j .get ("path" , "/" )
201+ if parent_event .type == "URL_UNVERIFIED" and status_code in (404 ,) and path != "/" :
202+ self .debug (f'Discarding 404 from "{ url } "' )
203+ return True
204+
205+ tags = [f"status-{ status_code } " ]
206+ url_context = "{module} visited {event.parent.data} and got status code {event.http_status}"
207+ if parent_event .type == "OPEN_TCP_PORT" :
208+ url_context += " at {event.data}"
209+
210+ url_event = self .make_event (url , "URL" , parent_event , tags = tags , context = url_context )
211+ if url_event :
212+ response_ip = j .get ("host" , "" )
213+ if response_ip :
214+ url_event ._resolved_hosts .add (response_ip )
215+ title = j .get ("title" , "" )
216+ if title :
217+ url_event .http_title = title
218+ location = j .get ("location" , "" )
219+ if location :
220+ url_event .redirect_location = location
221+ if url_event != parent_event :
222+ await self .emit_event (url_event )
223+ content_type = j .get ("header" , {}).get ("content_type" , "unspecified" ).split (";" )[0 ]
224+ content_length = self .helpers .bytes_to_human (j .get ("content_length" , 0 ))
225+ await self .emit_event (
226+ j ,
227+ "HTTP_RESPONSE" ,
228+ url_event ,
229+ tags = url_event .tags ,
230+ context = f"HTTP_RESPONSE was { content_length } with { content_type } content type" ,
231+ )
232+
233+ if self .store_responses :
234+ response_dir = self .scan .home / "http_responses"
235+ self .helpers .mkdir (response_dir )
236+ filename = f"{ j ['host' ]} .{ urlparse (url ).port or 443 } { path .replace ('/' , '[slash]' )} .txt"
237+ response_file = response_dir / filename
238+ response_file .write_text (j .get ("raw_header" , "" ) + j .get ("body" , "" ))
239+ return True
240+
178241 async def handle_batch (self , * events ):
179242 stdin = {}
180243 # Track dual-scheme probes from OPEN_TCP_PORT: {(host, port): {"http": url, "https": url}}
181244 port_probes = {}
245+ # Reverse index: each paired probe URL → its (host, port) key
246+ paired_probe_urls = {}
182247
183248 for event in events :
184249 urls , url_hash = self .make_url_metadata (event )
@@ -191,6 +256,13 @@ async def handle_batch(self, *events):
191256 scheme = "https" if url .startswith ("https://" ) else "http"
192257 port_probes [key ][scheme ] = url
193258
259+ # Only ports with BOTH schemes are subject to suppression — single-scheme
260+ # OPEN_TCP_PORT probes (rare, but possible) stream through normally.
261+ for key , schemes in port_probes .items ():
262+ if "http" in schemes and "https" in schemes :
263+ paired_probe_urls [schemes ["http" ]] = key
264+ paired_probe_urls [schemes ["https" ]] = key
265+
194266 if not stdin :
195267 return
196268
@@ -199,7 +271,6 @@ async def handle_batch(self, *events):
199271 timeout = self .scan .blasthttp_timeout
200272 retries = self .scan .blasthttp_retries
201273
202- # Build batch configs
203274 configs = []
204275 for url in stdin :
205276 config = blasthttp .BatchConfig (
@@ -213,114 +284,51 @@ async def handle_batch(self, *events):
213284 )
214285 configs .append (config )
215286
216- # Drain the streaming batch into a list — we need every result in hand
217- # before we can decide http/https suppression for paired OPEN_TCP_PORT probes.
218- # Python conversion still overlaps with in-flight HTTP I/O via the stream.
219- results = []
220- async for r in iter_batch_results (self .client .request_batch_stream (configs , concurrency = self .threads )):
221- results .append (r )
222-
223- # For OPEN_TCP_PORT probes, suppress redundant https when http already succeeded.
224- # When probing an unknown port, we try both http:// and https://. If http works,
225- # the port definitely speaks HTTP — the https result may be a proxy artifact
226- # (intercepting proxies like Burp terminate TLS themselves, making any https://
227- # URL "succeed" regardless of whether the target actually speaks TLS).
228- # If http fails but https succeeds, the port genuinely speaks TLS.
229- # Explicit URLs (URL_UNVERIFIED/URL) are never suppressed — this only applies
230- # to speculative OPEN_TCP_PORT probes.
231- suppressed_urls = set ()
232- if port_probes :
233- successful_urls = {r .url for r in results if r .success and r .response .status != 0 }
234- for key , schemes in port_probes .items ():
235- http_url = schemes .get ("http" )
236- https_url = schemes .get ("https" )
237- if not (http_url and https_url ):
287+ # Suppress redundant https probes when http already succeeded for the same
288+ # (host, port). When probing an unknown port, we try both schemes; if http
289+ # works, the port definitely speaks HTTP, and the https result is likely a
290+ # proxy artifact (intercepting proxies like Burp terminate TLS themselves,
291+ # making any https:// URL "succeed" regardless of whether the target really
292+ # speaks TLS). Explicit URL/URL_UNVERIFIED events are never suppressed —
293+ # only speculative OPEN_TCP_PORT probes.
294+ #
295+ # Streaming requires per-pair coordination: emit http immediately, defer
296+ # https until http's outcome is known (or the stream ends).
297+ http_succeeded = {} # key -> bool, set when http result arrives
298+ deferred_https = {} # key -> result, awaiting http verdict
299+
300+ async def resolve_https (key , result ):
301+ if http_succeeded .get (key ) and result .success and result .response .status != 0 :
302+ self .debug (f"Suppressing https probe { result .url } (http already succeeded for { key } )" )
303+ return
304+ await self ._process_result (result , stdin [result .url ])
305+
306+ async for result in iter_batch_results (self .client .request_batch_stream (configs , concurrency = self .threads )):
307+ key = paired_probe_urls .get (result .url )
308+ if key is None :
309+ # Non-paired URL — emit immediately
310+ parent_event = stdin .get (result .url )
311+ if parent_event is None :
312+ self .warning (f"Unable to correlate parent event for: { result .url } " )
238313 continue
239- if http_url in successful_urls and https_url in successful_urls :
240- self .debug (f"Suppressing https probe { https_url } (http already succeeded: { http_url } )" )
241- suppressed_urls .add (https_url )
242-
243- for i in range (len (results )):
244- result = results [i ]
245- results [i ] = None # free response body memory as we go
246- if not result .success :
247- self .debug (f"blasthttp error for { result .url } : { result .error } " )
248- continue
249-
250- response = result .response
251- status_code = response .status
252- if status_code == 0 :
253- self .debug (f'No HTTP status code for "{ result .url } "' )
254- continue
255-
256- if result .url in suppressed_urls :
314+ await self ._process_result (result , parent_event )
257315 continue
258316
259- # Map back to parent event using the input URL
260- parent_event = stdin .get (result .url , None )
261-
262- if parent_event is None :
263- self .warning (f"Unable to correlate parent event for: { result .url } " )
264- continue
265-
266- url = response .url
267-
268- # Build JSON dict for HTTP_RESPONSE event
269- # The "input" field represents the original scan target (host:port),
270- # not the full URL. Other modules and output consumers use this to
271- # correlate responses back to the target that produced them.
272- input_parsed = urlparse (result .url )
273- url_input = input_parsed .netloc or result .url
274- j = self ._response_to_json (url_input , response )
275-
276- # discard 404s from unverified URLs
277- path = j .get ("path" , "/" )
278- if parent_event .type == "URL_UNVERIFIED" and status_code in (404 ,) and path != "/" :
279- self .debug (f'Discarding 404 from "{ url } "' )
280- continue
281-
282- # main URL
283- tags = [f"status-{ status_code } " ]
284-
285- url_context = "{module} visited {event.parent.data} and got status code {event.http_status}"
286- if parent_event .type == "OPEN_TCP_PORT" :
287- url_context += " at {event.data}"
288-
289- url_event = self .make_event (
290- url ,
291- "URL" ,
292- parent_event ,
293- tags = tags ,
294- context = url_context ,
295- )
296- if url_event :
297- response_ip = j .get ("host" , "" )
298- if response_ip :
299- url_event ._resolved_hosts .add (response_ip )
300- title = j .get ("title" , "" )
301- if title :
302- url_event .http_title = title
303- location = j .get ("location" , "" )
304- if location :
305- url_event .redirect_location = location
306- if url_event != parent_event :
307- await self .emit_event (url_event )
308- # HTTP response
309- content_type = j .get ("header" , {}).get ("content_type" , "unspecified" ).split (";" )[0 ]
310- content_length = j .get ("content_length" , 0 )
311- content_length = self .helpers .bytes_to_human (content_length )
312- await self .emit_event (
313- j ,
314- "HTTP_RESPONSE" ,
315- url_event ,
316- tags = url_event .tags ,
317- context = f"HTTP_RESPONSE was { content_length } with { content_type } content type" ,
318- )
319-
320- # Store responses if configured
321- if self .store_responses :
322- response_dir = self .scan .home / "http_responses"
323- self .helpers .mkdir (response_dir )
324- filename = f"{ j ['host' ]} .{ urlparse (url ).port or 443 } { path .replace ('/' , '[slash]' )} .txt"
325- response_file = response_dir / filename
326- response_file .write_text (j .get ("raw_header" , "" ) + j .get ("body" , "" ))
317+ # Paired OPEN_TCP_PORT probe
318+ is_http = result .url == port_probes [key ]["http" ]
319+ if is_http :
320+ http_succeeded [key ] = result .success and result .response is not None and result .response .status != 0
321+ await self ._process_result (result , stdin [result .url ])
322+ # If https for this key arrived first and was buffered, resolve it now
323+ pending = deferred_https .pop (key , None )
324+ if pending is not None :
325+ await resolve_https (key , pending )
326+ else : # is https
327+ if key in http_succeeded :
328+ await resolve_https (key , result )
329+ else :
330+ deferred_https [key ] = result
331+
332+ # Stream ended — any leftover https had no http result, so emit unconditionally
333+ for key , result in deferred_https .items ():
334+ await self ._process_result (result , stdin [result .url ])
0 commit comments