Skip to content

Commit 374f347

Browse files
Copilotjanbuchar
authored andcommitted
fix: treat CookieJar write methods as potentially throwing (#3426)
1 parent ba3a356 commit 374f347

5 files changed

Lines changed: 71 additions & 7 deletions

File tree

packages/core/src/session_pool/session.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,11 @@ export class Session implements ISession {
364364
* Sets a cookie within this session for the specific URL.
365365
*/
366366
setCookie(rawCookie: string, url: string): void {
367-
this.cookieJar.setCookieSync(rawCookie, url);
367+
try {
368+
this.cookieJar.setCookieSync(rawCookie, url);
369+
} catch (e) {
370+
this.log.warning('Could not set cookie.', { url, error: (e as Error).message });
371+
}
368372
}
369373

370374
/**
@@ -384,7 +388,7 @@ export class Session implements ISession {
384388

385389
// if invalid cookies are provided just log the exception. No need to retry the request automatically.
386390
if (errorMessages.length) {
387-
this.log.debug('Could not set cookies.', { errorMessages });
391+
this.log.warning('Could not set cookies.', { errorMessages });
388392
}
389393
}
390394

packages/http-client/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
"access": "public"
4848
},
4949
"dependencies": {
50+
"@apify/log": "^2.5.32",
5051
"@crawlee/types": "4.0.0",
5152
"tough-cookie": "^6.0.0"
5253
}

packages/http-client/src/base-http-client.ts

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import type { BaseHttpClient as BaseHttpClientInterface, SendRequestOptions } from '@crawlee/types';
22
import { CookieJar } from 'tough-cookie';
33

4+
import type { Log } from '@apify/log';
5+
import defaultLog from '@apify/log';
6+
47
export interface CustomFetchOptions {
58
proxyUrl?: string;
69
}
@@ -11,25 +14,43 @@ export interface CustomFetchOptions {
1114
* implement only the low-level network call in `fetch`.
1215
*/
1316
export abstract class BaseHttpClient implements BaseHttpClientInterface {
17+
protected log: Log;
18+
19+
constructor(log?: Log) {
20+
this.log = log ?? defaultLog;
21+
}
22+
1423
/**
1524
* Perform the raw network request and return a single Response without any
1625
* automatic redirect following or special error handling.
1726
*/
1827
protected abstract fetch(input: Request, init?: RequestInit & CustomFetchOptions): Promise<Response>;
1928

2029
private async applyCookies(request: Request, cookieJar: CookieJar): Promise<Request> {
21-
const cookies = (await cookieJar.getCookies(request.url)).map((x) => x.cookieString().trim()).filter(Boolean);
30+
try {
31+
const cookies = (await cookieJar.getCookies(request.url))
32+
.map((x) => x.cookieString().trim())
33+
.filter(Boolean);
2234

23-
if (cookies?.length > 0) {
24-
request.headers.set('cookie', cookies.join('; '));
35+
if (cookies?.length > 0) {
36+
request.headers.set('cookie', cookies.join('; '));
37+
}
38+
} catch (e) {
39+
this.log.warning(`Failed to get cookies for URL "${request.url}": ${(e as Error).message}`);
2540
}
2641
return request;
2742
}
2843

2944
private async setCookies(response: Response, cookieJar: CookieJar): Promise<void> {
3045
const setCookieHeaders = response.headers.getSetCookie();
3146

32-
await Promise.all(setCookieHeaders.map((header) => cookieJar.setCookie(header, response.url)));
47+
for (const header of setCookieHeaders) {
48+
try {
49+
await cookieJar.setCookie(header, response.url);
50+
} catch (e) {
51+
this.log.warning(`Failed to set cookie for URL "${response.url}": ${(e as Error).message}`);
52+
}
53+
}
3354
}
3455

3556
private resolveRequestContext(options?: SendRequestOptions): {

test/core/session_pool/session.test.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { EVENT_SESSION_RETIRED, Session, SessionPool } from '@crawlee/core';
1+
import { EVENT_SESSION_RETIRED, log, Session, SessionPool } from '@crawlee/core';
22
import { ResponseWithUrl } from '@crawlee/http-client';
33
import { entries, sleep } from '@crawlee/utils';
44
import { CookieJar } from 'tough-cookie';
@@ -207,6 +207,26 @@ describe('Session - testing session behaviour ', () => {
207207
expect(session.getCookieString(url)).toBe('cookie2=your-cookie');
208208
});
209209

210+
test('setCookies will log warning (not throw) on invalid cookies', () => {
211+
const url = 'https://www.example.com';
212+
// domain 'abc.different.domain' does not match the request URL, so tough-cookie rejects it
213+
const cookies = [{ name: 'cookie1', value: 'my-cookie', domain: 'abc.different.domain' }];
214+
215+
const mockedLog = vitest.mockObject(log, {
216+
spy: true,
217+
});
218+
219+
session = new Session({ sessionPool, log: mockedLog } as any);
220+
session.setCookies(cookies, url);
221+
expect(session.getCookieString(url)).toBe('');
222+
expect(mockedLog.warning).toHaveBeenCalledOnce();
223+
});
224+
225+
test('setCookie does not throw on malformed raw cookie string', () => {
226+
session = new Session({ sessionPool });
227+
expect(() => session.setCookie('garbled!!!@#$%nonsense', 'https://www.example.com')).not.toThrow();
228+
});
229+
210230
test('setCookies works with hostOnly cookies', () => {
211231
const url = 'https://www.example.com';
212232
const cookies = [

yarn.lock

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,13 @@ __metadata:
232232
languageName: node
233233
linkType: hard
234234

235+
"@apify/consts@npm:^2.51.0":
236+
version: 2.51.0
237+
resolution: "@apify/consts@npm:2.51.0"
238+
checksum: 10c0/06902272f1ca99ed515d45aca546ba41130dc8719413e1dd3ceed8c5dd7eeea30a9215227ac393403646b7fbef04bd26ef4f8215c5f04e5d42fa71834304071b
239+
languageName: node
240+
linkType: hard
241+
235242
"@apify/datastructures@npm:^2.0.0, @apify/datastructures@npm:^2.0.3":
236243
version: 2.0.3
237244
resolution: "@apify/datastructures@npm:2.0.3"
@@ -292,6 +299,16 @@ __metadata:
292299
languageName: node
293300
linkType: hard
294301

302+
"@apify/log@npm:^2.5.32":
303+
version: 2.5.32
304+
resolution: "@apify/log@npm:2.5.32"
305+
dependencies:
306+
"@apify/consts": "npm:^2.51.0"
307+
ansi-colors: "npm:^4.1.1"
308+
checksum: 10c0/dbe716561ee9df72a5f1b26881837780f3d7d8afb11676fd20f4e529dfce7997a4cc6e240c5902c27efa2b73beb66a8563d72a911dc4b6e641c92e8507dfdefb
309+
languageName: node
310+
linkType: hard
311+
295312
"@apify/ps-tree@npm:^1.2.0":
296313
version: 1.2.0
297314
resolution: "@apify/ps-tree@npm:1.2.0"
@@ -1096,6 +1113,7 @@ __metadata:
10961113
version: 0.0.0-use.local
10971114
resolution: "@crawlee/http-client@workspace:packages/http-client"
10981115
dependencies:
1116+
"@apify/log": "npm:^2.5.32"
10991117
"@crawlee/types": "npm:4.0.0"
11001118
tough-cookie: "npm:^6.0.0"
11011119
languageName: unknown

0 commit comments

Comments
 (0)