2020log = logging .getLogger ("bbot.core.helpers.web" )
2121
2222
23+ async def iter_batch_results (stream ):
24+ """
25+ Yield individual ``BatchResult`` objects from a ``request_batch_stream`` iterator.
26+
27+ The native blasthttp 0.4.0 iterator yields lists of ``BatchResult`` (drained in
28+ chunks of 1000 / 200ms to amortize the Python↔Rust boundary). A future Python
29+ wrapper is expected to unwrap these into individual items. This helper handles
30+ both shapes so callers can write a single ``async for`` loop.
31+ """
32+ async for item in stream :
33+ if isinstance (item , list ):
34+ for r in item :
35+ yield r
36+ else :
37+ yield item
38+
39+
2340class WebHelper :
2441 """
2542 Main utility class for managing HTTP operations in BBOT. Uses blasthttp (Rust) as the
@@ -297,23 +314,26 @@ async def request(self, *args, **kwargs):
297314 log .trace (traceback .format_exc ())
298315 raise
299316
300- async def request_batch (self , urls , threads = 10 , ** kwargs ):
317+ async def request_batch_stream (self , urls , threads = 10 , ** kwargs ):
301318 """
302- Request multiple URLs in parallel via blasthttp's native Rust batch engine.
319+ Request multiple URLs in parallel via blasthttp's native Rust batch engine,
320+ yielding each response as soon as it completes (completion order, not input
321+ order).
303322
304323 Applies the same header/cookie/proxy/timeout logic as ``request()`` — each
305- entry is translated into a ``blasthttp.BatchConfig`` and sent to Rust in one
306- shot. Results are returned as a list (not streamed).
324+ entry is translated into a ``blasthttp.BatchConfig`` and dispatched through
325+ ``blasthttp.request_batch_stream``. A slow request no longer blocks faster
326+ peers behind it, and Python work overlaps with in-flight HTTP I/O.
307327
308328 Each entry in ``urls`` can be:
309329 - A plain URL string (uses shared ``**kwargs`` for all requests)
310330 - A ``(url, per_request_kwargs)`` tuple for per-request options
311331 - A ``(url, per_request_kwargs, tracker)`` tuple to attach arbitrary
312- tracking data that is returned alongside the response
332+ tracking data that is yielded alongside the response
313333
314- Returns :
315- When entries are plain strings: ``list[ (url, response)] ``
316- When any entry includes a tracker: ``list[ (url, response, tracker)] ``
334+ Yields :
335+ When entries are plain strings: ``(url, response)``
336+ When any entry includes a tracker: ``(url, response, tracker)``
317337
318338 Args:
319339 urls: URLs to visit — strings or ``(url, kwargs[, tracker])`` tuples.
@@ -324,15 +344,13 @@ async def request_batch(self, urls, threads=10, **kwargs):
324344 Examples:
325345 Simple (shared kwargs)::
326346
327- results = await self.helpers.request_batch(urls, headers={"X-Test": "Test"})
328- for url, response in results:
347+ async for url, response in self.helpers.request_batch_stream(urls, headers={"X-Test": "Test"}):
329348 ...
330349
331350 Per-request kwargs with tracker::
332351
333352 reqs = [("http://example.com", {"method": "POST"}, "my-tracker")]
334- results = await self.helpers.request_batch(reqs)
335- for url, response, tracker in results:
353+ async for url, response, tracker in self.helpers.request_batch_stream(reqs):
336354 ...
337355 """
338356 import blasthttp
@@ -354,33 +372,33 @@ async def request_batch(self, urls, threads=10, **kwargs):
354372 entries .append ((str (entry ), kwargs , None ))
355373
356374 if not entries :
357- return []
375+ return
376+
377+ # Build BatchConfig objects using the same logic as request().
378+ # Map each config URL back to a queue of trackers so we can correlate
379+ # completion-order results to original entries even when multiple entries
380+ # share a URL.
381+ from collections import deque
358382
359- # Build BatchConfig objects using the same logic as request()
360383 configs = []
361- trackers = []
384+ trackers_by_url = {}
362385 for url , req_kwargs , tracker in entries :
363386 url , method , blast_kwargs = self ._build_blasthttp_kwargs (url , ** req_kwargs )
364387 config = blasthttp .BatchConfig (url , ** blast_kwargs )
365388 configs .append (config )
366- trackers .append (tracker )
367-
368- # Send to Rust — all I/O happens here
369- batch_results = await self .client .request_batch (configs , concurrency = threads )
389+ trackers_by_url .setdefault (config .url , deque ()).append (tracker )
370390
371- # Convert to (url, response[, tracker]) tuples
372- # Results are returned in the same order as configs
373- results = []
374- for i , br in enumerate (batch_results ):
391+ async for br in iter_batch_results (self .client .request_batch_stream (configs , concurrency = threads )):
375392 if br .response is not None :
376393 response = BlasthttpResponse (br .response , request_url = br .url , method = "GET" )
377394 else :
378395 response = None
379396 if has_tracker :
380- results .append ((br .url , response , trackers [i ]))
397+ queue = trackers_by_url .get (br .url )
398+ tracker = queue .popleft () if queue else None
399+ yield br .url , response , tracker
381400 else :
382- results .append ((br .url , response ))
383- return results
401+ yield br .url , response
384402
385403 async def download (self , url , ** kwargs ):
386404 """
0 commit comments