Skip to content

Commit 3bb68c9

Browse files
committed
test(playwright): cover SKIP_BLOCKED_STATUS_CODE_CHECK opt-out
Adds a 403 test endpoint and verifies that a post-navigation hook setting the symbol routes the response to the request handler, while omitting it falls back to the default blocked-status-code behavior.
1 parent f59b2cd commit 3bb68c9

1 file changed

Lines changed: 52 additions & 1 deletion

File tree

test/core/crawlers/playwright_crawler.test.ts

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import type { AddressInfo } from 'node:net';
33
import os from 'node:os';
44

55
import type { PlaywrightCrawlingContext, PlaywrightGotoOptions, Request } from '@crawlee/playwright';
6-
import { PlaywrightCrawler, RequestList } from '@crawlee/playwright';
6+
import { PlaywrightCrawler, RequestList, SKIP_BLOCKED_STATUS_CODE_CHECK } from '@crawlee/playwright';
77
import type { Cheerio, CheerioAPI, CheerioRoot, Element } from '@crawlee/utils';
88
import express from 'express';
99
import playwright from 'playwright';
@@ -33,6 +33,9 @@ describe('PlaywrightCrawler', () => {
3333
res.send(`<html><head><title>Example Domain</title></head></html>`);
3434
res.status(200);
3535
});
36+
app.get('/blocked-403', (_req, res) => {
37+
res.status(403).send(`<html><head><title>Blocked</title></head><body>nope</body></html>`);
38+
});
3639
});
3740

3841
beforeAll(async () => {
@@ -194,6 +197,54 @@ describe('PlaywrightCrawler', () => {
194197
},
195198
);
196199

200+
describe('SKIP_BLOCKED_STATUS_CODE_CHECK', () => {
201+
test('reaches the request handler on a 403 when set in a postNavigationHook', async () => {
202+
const requestHandler = vi.fn(async (_ctx: PlaywrightCrawlingContext) => {});
203+
const failedRequestHandler = vi.fn(async (_ctx: PlaywrightCrawlingContext) => {});
204+
205+
const crawler = new PlaywrightCrawler({
206+
requestList: await RequestList.open(`skip-flag-set-${Math.random()}`, [
207+
`http://${HOSTNAME}:${port}/blocked-403`,
208+
]),
209+
maxRequestRetries: 0,
210+
maxConcurrency: 1,
211+
postNavigationHooks: [
212+
async (ctx) => {
213+
ctx[SKIP_BLOCKED_STATUS_CODE_CHECK] = true;
214+
},
215+
],
216+
requestHandler,
217+
failedRequestHandler,
218+
});
219+
220+
await crawler.run();
221+
222+
expect(requestHandler).toHaveBeenCalledOnce();
223+
expect(requestHandler.mock.calls[0][0].response!.status()).toBe(403);
224+
expect(failedRequestHandler).not.toHaveBeenCalled();
225+
});
226+
227+
test('skips the request handler on a 403 when not set', async () => {
228+
const requestHandler = vi.fn(async (_ctx: PlaywrightCrawlingContext) => {});
229+
const failedRequestHandler = vi.fn(async (_ctx: PlaywrightCrawlingContext) => {});
230+
231+
const crawler = new PlaywrightCrawler({
232+
requestList: await RequestList.open(`skip-flag-unset-${Math.random()}`, [
233+
`http://${HOSTNAME}:${port}/blocked-403`,
234+
]),
235+
maxRequestRetries: 0,
236+
maxConcurrency: 1,
237+
requestHandler,
238+
failedRequestHandler,
239+
});
240+
241+
await crawler.run();
242+
243+
expect(requestHandler).not.toHaveBeenCalled();
244+
expect(failedRequestHandler).toHaveBeenCalledOnce();
245+
});
246+
});
247+
197248
test('should have correct types in crawling context', async () => {
198249
const requestHandler = async (crawlingContext: PlaywrightCrawlingContext) => {
199250
// Checking that types are correct

0 commit comments

Comments
 (0)