@@ -3,7 +3,7 @@ import type { AddressInfo } from 'node:net';
33import os from 'node:os' ;
44
55import type { PlaywrightCrawlingContext , PlaywrightGotoOptions , Request } from '@crawlee/playwright' ;
6- import { PlaywrightCrawler , RequestList } from '@crawlee/playwright' ;
6+ import { PlaywrightCrawler , RequestList , SKIP_BLOCKED_STATUS_CODE_CHECK } from '@crawlee/playwright' ;
77import type { Cheerio , CheerioAPI , CheerioRoot , Element } from '@crawlee/utils' ;
88import express from 'express' ;
99import playwright from 'playwright' ;
@@ -33,6 +33,9 @@ describe('PlaywrightCrawler', () => {
3333 res . send ( `<html><head><title>Example Domain</title></head></html>` ) ;
3434 res . status ( 200 ) ;
3535 } ) ;
36+ app . get ( '/blocked-403' , ( _req , res ) => {
37+ res . status ( 403 ) . send ( `<html><head><title>Blocked</title></head><body>nope</body></html>` ) ;
38+ } ) ;
3639 } ) ;
3740
3841 beforeAll ( async ( ) => {
@@ -194,6 +197,54 @@ describe('PlaywrightCrawler', () => {
194197 } ,
195198 ) ;
196199
200+ describe ( 'SKIP_BLOCKED_STATUS_CODE_CHECK' , ( ) => {
201+ test ( 'reaches the request handler on a 403 when set in a postNavigationHook' , async ( ) => {
202+ const requestHandler = vi . fn ( async ( _ctx : PlaywrightCrawlingContext ) => { } ) ;
203+ const failedRequestHandler = vi . fn ( async ( _ctx : PlaywrightCrawlingContext ) => { } ) ;
204+
205+ const crawler = new PlaywrightCrawler ( {
206+ requestList : await RequestList . open ( `skip-flag-set-${ Math . random ( ) } ` , [
207+ `http://${ HOSTNAME } :${ port } /blocked-403` ,
208+ ] ) ,
209+ maxRequestRetries : 0 ,
210+ maxConcurrency : 1 ,
211+ postNavigationHooks : [
212+ async ( ctx ) => {
213+ ctx [ SKIP_BLOCKED_STATUS_CODE_CHECK ] = true ;
214+ } ,
215+ ] ,
216+ requestHandler,
217+ failedRequestHandler,
218+ } ) ;
219+
220+ await crawler . run ( ) ;
221+
222+ expect ( requestHandler ) . toHaveBeenCalledOnce ( ) ;
223+ expect ( requestHandler . mock . calls [ 0 ] [ 0 ] . response ! . status ( ) ) . toBe ( 403 ) ;
224+ expect ( failedRequestHandler ) . not . toHaveBeenCalled ( ) ;
225+ } ) ;
226+
227+ test ( 'skips the request handler on a 403 when not set' , async ( ) => {
228+ const requestHandler = vi . fn ( async ( _ctx : PlaywrightCrawlingContext ) => { } ) ;
229+ const failedRequestHandler = vi . fn ( async ( _ctx : PlaywrightCrawlingContext ) => { } ) ;
230+
231+ const crawler = new PlaywrightCrawler ( {
232+ requestList : await RequestList . open ( `skip-flag-unset-${ Math . random ( ) } ` , [
233+ `http://${ HOSTNAME } :${ port } /blocked-403` ,
234+ ] ) ,
235+ maxRequestRetries : 0 ,
236+ maxConcurrency : 1 ,
237+ requestHandler,
238+ failedRequestHandler,
239+ } ) ;
240+
241+ await crawler . run ( ) ;
242+
243+ expect ( requestHandler ) . not . toHaveBeenCalled ( ) ;
244+ expect ( failedRequestHandler ) . toHaveBeenCalledOnce ( ) ;
245+ } ) ;
246+ } ) ;
247+
197248 test ( 'should have correct types in crawling context' , async ( ) => {
198249 const requestHandler = async ( crawlingContext : PlaywrightCrawlingContext ) => {
199250 // Checking that types are correct
0 commit comments