Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
a5df5b8
feat: add `isRemote` flag and connect option types for remote browser…
l2ysho Mar 18, 2026
b012525
feat: add PlaywrightPlugin remote connection routing via `connect()` …
l2ysho Mar 18, 2026
0e83812
feat: add PuppeteerPlugin remote connection routing via `puppeteer.co…
l2ysho Mar 18, 2026
29e7aa4
feat: skip proxy/webdriver hiding for remote browsers, add remote con…
l2ysho Mar 19, 2026
ed86761
fix: require endpoint in connect options, use non-deprecated Playwrig…
l2ysho Mar 19, 2026
bd19911
feat: default `useIncognitoPages` to `true` for remote browser connec…
l2ysho Mar 19, 2026
373da36
fix: improve remote connection error handling and endpoint validation
l2ysho Mar 19, 2026
76b7d20
fix: prevent resource leaks in PuppeteerPlugin remote browser connect…
l2ysho Mar 19, 2026
01ada42
chore: add clarifying comments for remote launch path in base class
l2ysho Mar 19, 2026
f7dc7c6
fix: clarify useIncognitoPages pattern and improve warning for WebSoc…
l2ysho Mar 19, 2026
0e3218b
feat: add warnings for ignored options on remote browser connections
l2ysho Mar 19, 2026
a11370a
test: add unit tests for remote browser connections
l2ysho Mar 19, 2026
2740bf4
fix: prevent proxy URL from leaking into remote Puppeteer browser con…
l2ysho Mar 19, 2026
fb73726
docs(examples): add remote browser integration examples
l2ysho Mar 31, 2026
3be257c
Merge branch 'v4' into 1822-connect-to-remote-browser-services
l2ysho Apr 21, 2026
92b971b
Merge branch 'v4' into 1822-connect-to-remote-browser-services
l2ysho Apr 27, 2026
090ae78
feat(browser-pool): add RemoteBrowserProvider abstract class and re…
l2ysho Apr 29, 2026
945ef92
feat(browser-pool): add maxOpenBrowsers to prevent concurrent session…
l2ysho Apr 29, 2026
2f6e9eb
update remote browser
l2ysho Apr 30, 2026
604346a
update examples
l2ysho Apr 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions packages/browser-crawler/src/internals/browser-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,12 @@ export abstract class BrowserCrawler<
this.browserPool = new BrowserPool<InternalBrowserPoolOptions>({
...(browserPoolOptions as any),
});

// Read maxOpenBrowsers from the remote browser config and apply it to the pool.
const remoteMaxBrowsers = this.browserPool.browserPlugins[0]?.remoteBrowser?.maxOpenBrowsers;
if (remoteMaxBrowsers) {
this.browserPool.maxOpenBrowsers = remoteMaxBrowsers;
}
}

protected override buildContextPipeline(): ContextPipeline<
Expand Down Expand Up @@ -673,6 +679,13 @@ export abstract class BrowserCrawler<
return;
}

// Remote browsers are expensive — don't retire them when a session retires.
// Let retireBrowserAfterPageCount control the browser lifecycle instead.
// See also: PR #3605 which fixes the root cause (maxUsageCount: 1 in BasicCrawler).
if (browserController.launchContext.isRemote) {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is temporary fix of 1 remote browser per url problem (discussed in meet). should be fixed by #3605

return;
}

let sessionIds = this.browserSessionIds.get(browserController);

if (sessionIds) {
Expand Down Expand Up @@ -703,6 +716,16 @@ export abstract class BrowserCrawler<
* Function for cleaning up after all requests are processed.
* @ignore
*/
protected override async _isTaskReadyFunction(): Promise<boolean> {
// Don't start new tasks if browser pool is at its limit and no active browser has capacity.
// AutoscaledPool will retry automatically when a browser closes and frees a slot.
if (!this.browserPool.hasFreeBrowserSlot() && !this.browserPool.hasActiveBrowserWithFreeCapacity()) {
return false;
}

return super._isTaskReadyFunction();
}

override async teardown(): Promise<void> {
await this.browserPool.destroy();
await super.teardown();
Expand Down
1 change: 1 addition & 0 deletions packages/browser-crawler/src/internals/browser-launcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ export abstract class BrowserLauncher<
userDataDir: ow.optional.string,
launchOptions: ow.optional.object,
userAgent: ow.optional.string,
remoteBrowser: ow.optional.object,
};

static requireLauncherOrThrow<T>(launcher: string, apifyImageName: string): T {
Expand Down
18 changes: 18 additions & 0 deletions packages/browser-pool/src/abstract-classes/browser-controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ export abstract class BrowserController<
this.log.debug(`Could not close browser.\nCause: ${(error as Error).message}`, { id: this.id });
}

await this._releaseRemoteBrowser();
this.emit(BROWSER_CONTROLLER_EVENTS.BROWSER_CLOSED, this);

setTimeout(() => {
Expand All @@ -158,9 +159,26 @@ export abstract class BrowserController<
async kill(): Promise<void> {
await this.hasBrowserPromise;
await this._kill();
await this._releaseRemoteBrowser();
this.emit(BROWSER_CONTROLLER_EVENTS.BROWSER_CLOSED, this);
}

/**
* Calls `remoteBrowser.release()` if configured. Safe to call multiple times —
* clears the endpoint after the first call so release only fires once.
*/
private async _releaseRemoteBrowser(): Promise<void> {
const endpoint = this.launchContext?._resolvedRemoteEndpoint as string | undefined;
if (!endpoint) return;

const context = this.launchContext._remoteContext as Record<string, unknown> | undefined;

// Clear so release only fires once (close() schedules kill() after timeout)
this.launchContext.extend({ _resolvedRemoteEndpoint: undefined, _remoteContext: undefined });

await this.browserPlugin._callRelease(endpoint, context);
}

/**
* Opens new browser page.
* @ignore
Expand Down
138 changes: 136 additions & 2 deletions packages/browser-pool/src/abstract-classes/browser-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import merge from 'lodash.merge';

import type { LaunchContextOptions } from '../launch-context.js';
import { LaunchContext } from '../launch-context.js';
import { RemoteBrowserProvider } from '../remote-browser-provider.js';
import type { UnwrapPromise } from '../utils.js';
import type { BrowserController } from './browser-controller.js';

Expand Down Expand Up @@ -44,6 +45,67 @@ export interface CommonPage {
url(): string | Promise<string>;
}

/**
* Return type for dynamic endpoint functions that need to pass session
* metadata to the `release()` callback.
*/
export interface RemoteBrowserEndpointResult {
/** The browser endpoint URL to connect to. */
url: string;
/** Opaque metadata passed back to `release()` — e.g. session IDs, API tokens. */
context?: Record<string, unknown>;
}

/**
* Configuration for connecting to a remote browser service.
*
* **Static endpoint (e.g. Browserless):**
* ```typescript
* { endpoint: 'wss://browserless.io?token=xxx' }
* ```
*
* **Dynamic endpoint with lifecycle (e.g. Browserbase):**
* ```typescript
* {
* endpoint: async () => {
* const session = await createSession();
* return { url: session.connectUrl, context: { id: session.id } };
* },
* release: async ({ context }) => {
* await releaseSession(context.id);
* },
* }
* ```
*/
export interface RemoteBrowserConfig {
/**
* The browser endpoint URL, or an async function that returns one.
* When a function is provided, it is called once per browser launch (not per page).
*
* Can return a plain string or an object with `url` and optional `context`
* that will be forwarded to `release()`.
*/
endpoint: string | (() => string | RemoteBrowserEndpointResult | Promise<string | RemoteBrowserEndpointResult>);
/**
* Optional cleanup function called when the browser closes, crashes, or the pool is destroyed.
* Receives the resolved endpoint URL and the `context` object returned by `endpoint()`.
* Errors are caught and logged as warnings — they never crash the crawler.
*/
release?: (info: { endpoint: string; context?: Record<string, unknown> }) => void | Promise<void>;
/**
* Connection type. Subclass interfaces narrow this further
* (e.g. Puppeteer only allows `'cdp'`).
* @default 'cdp'
*/
type?: 'cdp' | 'websocket';
/**
* Maximum number of browsers that can be open at the same time.
* When the limit is reached, the crawler waits for a browser to close before launching a new one.
* Set this to your remote service's concurrent session limit to avoid 429 errors.
*/
maxOpenBrowsers?: number;
}

export interface BrowserPluginOptions<LibraryOptions> {
/**
* Options that will be passed down to the automation library. E.g.
Expand Down Expand Up @@ -81,6 +143,15 @@ export interface BrowserPluginOptions<LibraryOptions> {
* This is useful when using HTTPS proxies with self-signed certificates.
*/
ignoreProxyCertificate?: boolean;
/**
* Configuration for connecting to a remote browser service.
* When set, the plugin connects to a remote browser instead of launching a local one.
*
* Accepts either a {@link RemoteBrowserConfig} object or a {@link RemoteBrowserProvider} instance.
*
* Takes precedence over `connectOverCDPOptions` / `connectOptions` if both are set.
*/
remoteBrowser?: RemoteBrowserConfig | RemoteBrowserProvider<any>;
}

export interface CreateLaunchContextOptions<
Expand Down Expand Up @@ -116,6 +187,7 @@ export abstract class BrowserPlugin<
browserPerProxy?: boolean;

ignoreProxyCertificate?: boolean;
remoteBrowser?: RemoteBrowserConfig;

constructor(library: Library, options: BrowserPluginOptions<LibraryOptions> = {}) {
const {
Expand All @@ -125,6 +197,7 @@ export abstract class BrowserPlugin<
useIncognitoPages = false,
browserPerProxy = false,
ignoreProxyCertificate = false,
remoteBrowser,
} = options;

this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' });
Expand All @@ -135,6 +208,53 @@ export abstract class BrowserPlugin<
this.useIncognitoPages = useIncognitoPages;
this.browserPerProxy = browserPerProxy;
this.ignoreProxyCertificate = ignoreProxyCertificate;

// Normalize RemoteBrowserProvider instances into a plain RemoteBrowserConfig
// so all downstream code only deals with the config shape.
if (remoteBrowser instanceof RemoteBrowserProvider) {
const provider = remoteBrowser;
this.remoteBrowser = {
endpoint: () => provider.connect(),
release: ({ context }) => provider.release(context as any),
type: provider.type,
maxOpenBrowsers: provider.maxOpenBrowsers,
};
} else {
this.remoteBrowser = remoteBrowser;
}
}

/** Resolves the remote browser endpoint from a string or function. Returns { url, context }. */
protected async _resolveRemoteEndpoint(): Promise<RemoteBrowserEndpointResult> {
const { endpoint } = this.remoteBrowser!;
const result = typeof endpoint === 'function' ? await endpoint() : endpoint;
if (typeof result === 'string') {
return { url: result };
}
return result;
}

/** @internal Called by BrowserController on browser close/kill. */
async _callRelease(endpoint: string, context?: Record<string, unknown>): Promise<void> {
try {
await this.remoteBrowser?.release?.({ endpoint, context });
} catch (err) {
this.log.warning('remoteBrowser.release() failed.', { error: (err as Error)?.message });
}
}

/** Strips credentials from a URL for safe logging. */
protected _sanitizeEndpointForLog(endpoint: string): string {
try {
const url = new URL(endpoint);
if (url.username || url.password) {
url.username = '***';
url.password = '***';
}
return url.toString();
} catch {
return '<invalid URL>';
}
}

/**
Expand All @@ -155,6 +275,7 @@ export abstract class BrowserPlugin<
browserPerProxy = this.browserPerProxy,
ignoreProxyCertificate = this.ignoreProxyCertificate,
proxyTier,
isRemote,
} = options;

return new LaunchContext({
Expand All @@ -167,6 +288,7 @@ export abstract class BrowserPlugin<
browserPerProxy,
ignoreProxyCertificate,
proxyTier,
isRemote,
});
}

Expand All @@ -190,15 +312,23 @@ export abstract class BrowserPlugin<
NewPageResult
> = this.createLaunchContext(),
): Promise<LaunchResult> {
// launchOptions is only used by the local launch path below — remote connections ignore it.
launchContext.launchOptions ??= {} as LibraryOptions;

const { proxyUrl, launchOptions } = launchContext;

if (proxyUrl) {
if (proxyUrl && launchContext.isRemote) {
this.log.warning(
'proxyUrl is set but will be ignored for remote browser connections. ' +
'Configure proxy settings on the remote browser service instead.',
);
}

if (proxyUrl && !launchContext.isRemote) {
await this._addProxyToLaunchOptions(launchContext);
}

if (this._isChromiumBasedBrowser(launchContext)) {
if (!launchContext.isRemote && this._isChromiumBasedBrowser(launchContext)) {
// This will set the args for chromium based browsers to hide the webdriver.
(launchOptions as Dictionary).args = this._mergeArgsToHideWebdriver(launchOptions!.args);
// When User-Agent is not set, and we're using Chromium in headless mode,
Expand All @@ -210,6 +340,10 @@ export abstract class BrowserPlugin<
}
}

if (launchContext.isRemote) {
this.log.info('Connecting to remote browser (skipping local proxy and webdriver stealth configuration).');
}

return this._launch(launchContext);
}

Expand Down
24 changes: 24 additions & 0 deletions packages/browser-pool/src/browser-pool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ export class BrowserPool<
> extends TypedEmitter<BrowserPoolEvents<BrowserControllerReturn, PageReturn>> {
browserPlugins: BrowserPlugins;
maxOpenPagesPerBrowser: number;
maxOpenBrowsers: number;
retireBrowserAfterPageCount: number;
operationTimeoutMillis: number;
closeInactiveBrowserAfterMillis: number;
Expand Down Expand Up @@ -395,6 +396,7 @@ export class BrowserPool<

this.browserPlugins = browserPlugins as unknown as BrowserPlugins;
this.maxOpenPagesPerBrowser = maxOpenPagesPerBrowser;
this.maxOpenBrowsers = Infinity;
this.retireBrowserAfterPageCount = retireBrowserAfterPageCount;
this.operationTimeoutMillis = operationTimeoutSecs * 1000;
this.closeInactiveBrowserAfterMillis = closeInactiveBrowserAfterSecs * 1000;
Expand Down Expand Up @@ -860,6 +862,28 @@ export class BrowserPool<
}
}

/**
* Returns `true` if the pool can accept a new browser launch without exceeding
* {@link BrowserPoolOptions.maxOpenBrowsers}. Counts starting, active, and retired browsers.
*/
hasFreeBrowserSlot(): boolean {
const total =
this.startingBrowserControllers.size +
this.activeBrowserControllers.size +
this.retiredBrowserControllers.size;
return total < this.maxOpenBrowsers;
}

/**
* Returns `true` if any active browser has room for another page.
*/
hasActiveBrowserWithFreeCapacity(): boolean {
for (const controller of this.activeBrowserControllers) {
if (controller.activePages < this.maxOpenPagesPerBrowser) return true;
}
return false;
}

private _initializeFingerprinting(): void {
const { useFingerprintCache = true, fingerprintCacheSize = 10_000 } = this.fingerprintOptions;
this.fingerprintGenerator = new FingerprintGenerator(this.fingerprintOptions.fingerprintGeneratorOptions);
Expand Down
5 changes: 5 additions & 0 deletions packages/browser-pool/src/fingerprinting/hooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ export function createFingerprintPreLaunchHook(browserPool: BrowserPool<any, any
} = browserPool;

return (_pageId: string, launchContext: LaunchContext) => {
// Remote browsers may have their own fingerprinting — skip local fingerprint injection
if (launchContext.isRemote) return;

const { useIncognitoPages } = launchContext;
const cacheKey = (launchContext.session as { id: string } | undefined)?.id ?? launchContext.proxyUrl;
const { launchOptions }: { launchOptions: any } = launchContext;
Expand Down Expand Up @@ -62,6 +65,7 @@ export function createFingerprintPreLaunchHook(browserPool: BrowserPool<any, any
export function createPrePageCreateHook() {
return (_pageId: string, browserController: BrowserController, pageOptions: any): void => {
const { launchContext, browserPlugin } = browserController;
if (launchContext.isRemote) return;
const { fingerprint } = launchContext.fingerprint!;

if (launchContext.useIncognitoPages && browserPlugin instanceof PlaywrightPlugin && pageOptions) {
Expand All @@ -80,6 +84,7 @@ export function createPrePageCreateHook() {
export function createPostPageCreateHook(fingerprintInjector: FingerprintInjector) {
return async (page: any, browserController: BrowserController): Promise<void> => {
const { browserPlugin, launchContext } = browserController;
if (launchContext.isRemote) return;
const fingerprint = launchContext.fingerprint!;

// TODO this will require refactoring, we should use common API instead of branching based on plugin type,
Expand Down
3 changes: 3 additions & 0 deletions packages/browser-pool/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,12 @@ export type {
CommonLibrary,
BrowserPluginOptions,
CreateLaunchContextOptions,
RemoteBrowserConfig,
RemoteBrowserEndpointResult,
} from './abstract-classes/browser-plugin.js';
export { BrowserPlugin, BrowserLaunchError, DEFAULT_USER_AGENT } from './abstract-classes/browser-plugin.js';
export type { LaunchContextOptions } from './launch-context.js';
export { LaunchContext } from './launch-context.js';
export { RemoteBrowserProvider } from './remote-browser-provider.js';
export type { InferBrowserPluginArray, UnwrapPromise } from './utils.js';
export { anonymizeProxySugar, type AnonymizeProxySugarOptions } from './anonymize-proxy.js';
Loading