Skip to content

Commit ab34886

Browse files
moaedynikitachapovskii-devjanbuchar
authored
fix: BrowserCrawler closes ctx.page before errorHandler runs on navig… (#3655)
Co-authored-by: nikitachapovskii-dev <nikita.chapovskii@apify.com> Co-authored-by: Jan Buchar <jan.buchar@apify.com>
1 parent 158e989 commit ab34886

3 files changed

Lines changed: 43 additions & 3 deletions

File tree

packages/browser-crawler/src/internals/browser-crawler.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -686,16 +686,18 @@ export abstract class BrowserCrawler<
686686
}
687687

688688
/**
689-
* Marks session bad in case of navigation timeout.
689+
* Marks session bad on navigation timeout, and stops in-flight page loading on any navigation error.
690690
*/
691691
protected async _handleNavigationTimeout(crawlingContext: Context, error: Error): Promise<void> {
692-
const { session } = crawlingContext;
692+
const { session, page } = crawlingContext;
693693

694694
if (error && error.constructor.name === 'TimeoutError') {
695695
handleRequestTimeout({ session, errorMessage: error.message });
696696
}
697697

698-
await crawlingContext.page.close();
698+
// Fire-and-forget: no user code will run on this page after a failed navigation.
699+
// Swallow rejections: the page may already be detached.
700+
void page.evaluate(() => window.stop()).catch(() => {});
699701
}
700702

701703
/**

packages/browser-pool/src/abstract-classes/browser-plugin.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export interface CommonBrowser {
4242
export interface CommonPage {
4343
close(...args: unknown[]): Promise<unknown>;
4444
url(): string | Promise<string>;
45+
evaluate(pageFunction: ((...args: any[]) => unknown) | string, ...args: unknown[]): Promise<unknown>;
4546
}
4647

4748
export interface BrowserPluginOptions<LibraryOptions> {

test/core/crawlers/browser_crawler.test.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,43 @@ describe('BrowserCrawler', () => {
291291
}
292292
});
293293

294+
test.concurrent('errorHandler has open page after non-timeout navigation error', async () => {
295+
const localStorageEmulator = new MemoryStorageEmulator();
296+
await localStorageEmulator.init();
297+
const puppeteerPlugin = new PuppeteerPlugin(puppeteer);
298+
299+
try {
300+
const requestList = await RequestList.open({
301+
sources: [{ url: `${serverAddress}/?q=1` }],
302+
});
303+
304+
const pageClosedStates: boolean[] = [];
305+
306+
const browserCrawler = new (class extends BrowserCrawlerTest {
307+
protected override async _navigationHandler(): Promise<HTTPResponse | null | undefined> {
308+
throw new Error('net::ERR_NAME_NOT_RESOLVED');
309+
}
310+
})({
311+
browserPoolOptions: {
312+
browserPlugins: [puppeteerPlugin],
313+
},
314+
requestList,
315+
requestHandler: async () => {},
316+
maxRequestRetries: 1,
317+
errorHandler: async (ctx) => {
318+
pageClosedStates.push(ctx.page.isClosed());
319+
},
320+
});
321+
322+
await browserCrawler.run();
323+
324+
expect(pageClosedStates).toHaveLength(1);
325+
expect(pageClosedStates[0]).toBe(false);
326+
} finally {
327+
await localStorageEmulator.destroy();
328+
}
329+
});
330+
294331
test.concurrent('should correctly track request.state', async () => {
295332
const localStorageEmulator = new MemoryStorageEmulator();
296333
await localStorageEmulator.init();

0 commit comments

Comments
 (0)