@@ -37,7 +37,7 @@ import type {
3737 LaunchContext ,
3838} from '@crawlee/browser-pool' ;
3939import { BROWSER_CONTROLLER_EVENTS , BrowserPool } from '@crawlee/browser-pool' ;
40- import type { BatchAddRequestsResult , Cookie as CookieObject , ProxyInfo } from '@crawlee/types' ;
40+ import type { BatchAddRequestsResult , Cookie as CookieObject } from '@crawlee/types' ;
4141import type { RobotsTxtFile } from '@crawlee/utils' ;
4242import { CLOUDFLARE_RETRY_CSS_SELECTORS , RETRY_CSS_SELECTORS , sleep } from '@crawlee/utils' ;
4343import ow from 'ow' ;
@@ -391,12 +391,8 @@ export abstract class BrowserCrawler<
391391 browserPoolOptions . useFingerprints = false ;
392392 }
393393
394- const { preLaunchHooks = [ ] , postLaunchHooks = [ ] , ...rest } = browserPoolOptions ;
395-
396394 this . browserPool = new BrowserPool < InternalBrowserPoolOptions > ( {
397- ...( rest as any ) ,
398- preLaunchHooks : [ this . _extendLaunchContext . bind ( this ) , ...preLaunchHooks ] ,
399- postLaunchHooks : [ this . _maybeAddSessionRetiredListener . bind ( this ) , ...postLaunchHooks ] ,
395+ ...( browserPoolOptions as any ) ,
400396 } ) ;
401397 }
402398
@@ -469,25 +465,12 @@ export abstract class BrowserCrawler<
469465 id : crawlingContext . id ,
470466 } ;
471467
472- const useIncognitoPages = this . launchContext ?. useIncognitoPages ;
473-
474468 if ( crawlingContext . session ?. proxyInfo ) {
475469 const proxyInfo = crawlingContext . session . proxyInfo ;
476- crawlingContext . proxyInfo = proxyInfo ;
477470
478471 newPageOptions . proxyUrl = proxyInfo ?. url ;
479472 newPageOptions . proxyTier = proxyInfo ?. proxyTier ;
480-
481- if ( proxyInfo ?. ignoreTlsErrors ) {
482- /**
483- * @see https://playwright.dev/docs/api/class-browser/#browser-new-context
484- * @see https://github.com/puppeteer/puppeteer/blob/main/docs/api.md
485- */
486- newPageOptions . pageOptions = {
487- ignoreHTTPSErrors : true ,
488- acceptInsecureCerts : true ,
489- } ;
490- }
473+ newPageOptions . ignoreTlsErrors = proxyInfo ?. ignoreTlsErrors ;
491474 }
492475
493476 const page = ( await this . browserPool . newPage ( newPageOptions ) ) as Page ;
@@ -497,11 +480,9 @@ export abstract class BrowserCrawler<
497480 page as any ,
498481 ) as ProvidedController ;
499482
500- const contextEnqueueLinks = crawlingContext . enqueueLinks ;
483+ this . addSessionRetiredListener ( crawlingContext . session , browserControllerInstance ) ;
501484
502- const session = useIncognitoPages
503- ? crawlingContext . session
504- : ( browserControllerInstance . launchContext . session as Session ) ;
485+ const contextEnqueueLinks = crawlingContext . enqueueLinks ;
505486
506487 return {
507488 page,
@@ -511,8 +492,6 @@ export abstract class BrowserCrawler<
511492 ) ;
512493 } ,
513494 browserController : browserControllerInstance ,
514- session,
515- proxyInfo : session ?. proxyInfo ,
516495 enqueueLinks : async ( enqueueOptions : EnqueueLinksOptions = { } ) => {
517496 return ( await browserCrawlerEnqueueLinks ( {
518497 options : { ...enqueueOptions , limit : this . calculateEnqueuedRequestLimit ( enqueueOptions ?. limit ) } ,
@@ -685,57 +664,37 @@ export abstract class BrowserCrawler<
685664 request . loadedUrl = await page . url ( ) ;
686665 }
687666
688- protected async _extendLaunchContext ( _pageId : string , launchContext : LaunchContext ) : Promise < void > {
689- const launchContextExtends : { session ?: Session ; proxyInfo ?: ProxyInfo } = { } ;
690-
691- // Hacky access from `AdaptivePlaywrightCrawler` calls this without calling `.init()`.
692- // This is the only case where this.sessionPool is accessed without being initialized.
693- if ( this . sessionPool ) {
694- launchContextExtends . session = await this . sessionPool . newSession ( {
695- proxyInfo : await this . proxyConfiguration ?. newProxyInfo ( {
696- // cannot pass a request here, since session is created on browser launch
697- } ) ,
698- } ) ;
699- }
667+ private browserSessionIds = new WeakMap < Context [ 'browserController' ] , Set < string > > ( ) ;
700668
701- if ( ! launchContext . proxyUrl && launchContextExtends . session ?. proxyInfo ) {
702- const proxyInfo = launchContextExtends . session . proxyInfo ;
669+ private addSessionRetiredListener ( session : Session , browserController : Context [ 'browserController' ] ) : void {
670+ if ( ! this . sessionPool ) {
671+ return ;
672+ }
703673
704- launchContext . proxyUrl = proxyInfo ?. url ;
705- launchContextExtends . proxyInfo = proxyInfo ;
674+ let sessionIds = this . browserSessionIds . get ( browserController ) ;
706675
707- // Disable SSL verification for MITM proxies
708- if ( proxyInfo ?. ignoreTlsErrors ) {
709- /**
710- * @see https://playwright.dev/docs/api/class-browser/#browser-new-context
711- * @see https://github.com/puppeteer/puppeteer/blob/main/docs/api.md
712- */
713- ( launchContext . launchOptions as Dictionary ) . ignoreHTTPSErrors = true ;
714- ( launchContext . launchOptions as Dictionary ) . acceptInsecureCerts = true ;
715- }
676+ if ( sessionIds ) {
677+ sessionIds . add ( session . id ) ;
678+ return ;
716679 }
717680
718- launchContext . extend ( launchContextExtends ) ;
719- }
681+ sessionIds = new Set ( [ session . id ] ) ;
682+ this . browserSessionIds . set ( browserController , sessionIds ) ;
720683
721- protected _maybeAddSessionRetiredListener ( _pageId : string , browserController : Context [ 'browserController' ] ) : void {
722- if ( this . sessionPool ) {
723- const listener = ( session : Session ) => {
724- const { launchContext } = browserController ;
725- if ( session . id === ( launchContext . session as Session ) . id ) {
726- this . browserPool . retireBrowserController (
727- browserController as Parameters <
728- BrowserPool < InternalBrowserPoolOptions > [ 'retireBrowserController' ]
729- > [ 0 ] ,
730- ) ;
731- }
732- } ;
684+ const listener = ( retired : Session ) => {
685+ if ( this . browserSessionIds . get ( browserController ) ?. has ( retired . id ) ) {
686+ this . browserPool . retireBrowserController (
687+ browserController as Parameters <
688+ BrowserPool < InternalBrowserPoolOptions > [ 'retireBrowserController' ]
689+ > [ 0 ] ,
690+ ) ;
691+ }
692+ } ;
733693
734- this . sessionPool . on ( EVENT_SESSION_RETIRED , listener ) ;
735- browserController . on ( BROWSER_CONTROLLER_EVENTS . BROWSER_CLOSED , ( ) => {
736- return this . sessionPool ! . removeListener ( EVENT_SESSION_RETIRED , listener ) ;
737- } ) ;
738- }
694+ this . sessionPool . on ( EVENT_SESSION_RETIRED , listener ) ;
695+ browserController . on ( BROWSER_CONTROLLER_EVENTS . BROWSER_CLOSED , ( ) => {
696+ return this . sessionPool ! . removeListener ( EVENT_SESSION_RETIRED , listener ) ;
697+ } ) ;
739698 }
740699
741700 /**
0 commit comments