Skip to content

Commit f4677e7

Browse files
committed
feat: implement FirecrawlService.mapSite tests and sanitizeMapOptions function
1 parent 7f6defa commit f4677e7

4 files changed

Lines changed: 225 additions & 24 deletions

File tree

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test";
2+
import { FirecrawlService } from "./firecrawl";
3+
4+
describe("FirecrawlService.mapSite", () => {
5+
const originalFetch = globalThis.fetch;
6+
const originalWarn = console.warn;
7+
8+
let capturedRequestBody: Record<string, unknown> | null = null;
9+
let warnSpy = mock(() => {});
10+
11+
beforeEach(() => {
12+
capturedRequestBody = null;
13+
warnSpy = mock(() => {});
14+
console.warn = warnSpy as typeof console.warn;
15+
16+
globalThis.fetch = (async (
17+
_input: string | URL | Request,
18+
init?: RequestInit
19+
) => {
20+
capturedRequestBody = JSON.parse(String(init?.body ?? "{}")) as Record<
21+
string,
22+
unknown
23+
>;
24+
25+
return new Response(
26+
JSON.stringify({
27+
success: true,
28+
links: ["https://example.com/docs"],
29+
}),
30+
{
31+
status: 200,
32+
headers: {
33+
"Content-Type": "application/json",
34+
},
35+
}
36+
);
37+
}) as typeof fetch;
38+
});
39+
40+
afterEach(() => {
41+
globalThis.fetch = originalFetch;
42+
console.warn = originalWarn;
43+
});
44+
45+
it("drops unsupported map options like maxAge from request body", async () => {
46+
const firecrawlService = new FirecrawlService("test-key");
47+
48+
const result = await firecrawlService.mapSite("https://example.com", {
49+
limit: 100,
50+
ignoreCache: false,
51+
maxAge: 3_600_000,
52+
} as unknown as Parameters<FirecrawlService["mapSite"]>[1]);
53+
54+
expect(result).toEqual({
55+
success: true,
56+
urls: ["https://example.com/docs"],
57+
});
58+
expect(capturedRequestBody).toEqual({
59+
url: "https://example.com",
60+
includeSubdomains: false,
61+
limit: 100,
62+
ignoreCache: false,
63+
});
64+
expect(capturedRequestBody?.maxAge).toBeUndefined();
65+
expect(warnSpy).toHaveBeenCalledTimes(1);
66+
});
67+
68+
it("maps sitemapOnly and ignoreCache to supported Firecrawl /map fields", async () => {
69+
const firecrawlService = new FirecrawlService("test-key");
70+
71+
await firecrawlService.mapSite("https://example.com", {
72+
search: "docs",
73+
includeSubdomains: true,
74+
limit: 42,
75+
sitemapOnly: true,
76+
ignoreSitemap: true,
77+
ignoreCache: true,
78+
});
79+
80+
expect(capturedRequestBody).toEqual({
81+
url: "https://example.com",
82+
search: "docs",
83+
includeSubdomains: true,
84+
limit: 42,
85+
sitemap: "only",
86+
ignoreCache: true,
87+
});
88+
expect(warnSpy).not.toHaveBeenCalled();
89+
});
90+
91+
it("maps ignoreSitemap to sitemap=skip", async () => {
92+
const firecrawlService = new FirecrawlService("test-key");
93+
94+
await firecrawlService.mapSite("https://example.com", {
95+
ignoreSitemap: true,
96+
});
97+
98+
expect(capturedRequestBody).toEqual({
99+
url: "https://example.com",
100+
includeSubdomains: false,
101+
limit: 100,
102+
sitemap: "skip",
103+
});
104+
expect(warnSpy).not.toHaveBeenCalled();
105+
});
106+
});

apps/api/src/services/firecrawl.ts

Lines changed: 69 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -256,14 +256,61 @@ export type MapOptions = {
256256
limit?: number;
257257
/** Firecrawl v2 map option for bypassing cache */
258258
ignoreCache?: boolean;
259-
/**
260-
* Deprecated compatibility field.
261-
* Firecrawl map endpoint does not support maxAge.
262-
* When set to 0 or less, it forces ignoreCache=true.
263-
*/
264-
maxAge?: number;
265259
};
266260

261+
const FIRECRAWL_MAP_OPTION_KEYS = new Set<keyof MapOptions>([
262+
"search",
263+
"ignoreSitemap",
264+
"sitemapOnly",
265+
"includeSubdomains",
266+
"limit",
267+
"ignoreCache",
268+
]);
269+
270+
type SanitizedMapOptions = {
271+
search?: string;
272+
ignoreSitemap?: boolean;
273+
sitemapOnly?: boolean;
274+
includeSubdomains: boolean;
275+
limit: number;
276+
ignoreCache?: boolean;
277+
};
278+
279+
function sanitizeMapOptions(options: MapOptions): {
280+
sanitized: SanitizedMapOptions;
281+
unknownKeys: string[];
282+
} {
283+
const unknownKeys = Object.keys(options as Record<string, unknown>).filter(
284+
(key) => !FIRECRAWL_MAP_OPTION_KEYS.has(key as keyof MapOptions)
285+
);
286+
287+
const sanitized: SanitizedMapOptions = {
288+
includeSubdomains: options.includeSubdomains ?? false,
289+
limit: options.limit ?? 100,
290+
};
291+
292+
if (options.search) {
293+
sanitized.search = options.search;
294+
}
295+
296+
if (options.ignoreSitemap) {
297+
sanitized.ignoreSitemap = true;
298+
}
299+
300+
if (options.sitemapOnly) {
301+
sanitized.sitemapOnly = true;
302+
}
303+
304+
if (options.ignoreCache !== undefined) {
305+
sanitized.ignoreCache = options.ignoreCache;
306+
}
307+
308+
return {
309+
sanitized,
310+
unknownKeys,
311+
};
312+
}
313+
267314
function sleep(ms: number): Promise<void> {
268315
return new Promise((resolve) => setTimeout(resolve, ms));
269316
}
@@ -1153,26 +1200,32 @@ export class FirecrawlService {
11531200
console.log("[firecrawl] mapSite called for:", url, "options:", options);
11541201

11551202
try {
1203+
const { sanitized: sanitizedOptions, unknownKeys } =
1204+
sanitizeMapOptions(options);
1205+
if (unknownKeys.length > 0) {
1206+
console.warn("[firecrawl] mapSite dropping unsupported options", {
1207+
unknownKeys,
1208+
});
1209+
}
1210+
11561211
const requestBody: Record<string, unknown> = {
11571212
url,
1158-
includeSubdomains: options.includeSubdomains ?? false,
1159-
limit: options.limit ?? 100,
1213+
includeSubdomains: sanitizedOptions.includeSubdomains,
1214+
limit: sanitizedOptions.limit,
11601215
};
11611216

1162-
if (options.search) {
1163-
requestBody.search = options.search;
1217+
if (sanitizedOptions.search) {
1218+
requestBody.search = sanitizedOptions.search;
11641219
}
11651220

1166-
if (options.sitemapOnly) {
1221+
if (sanitizedOptions.sitemapOnly) {
11671222
requestBody.sitemap = "only";
1168-
} else if (options.ignoreSitemap) {
1223+
} else if (sanitizedOptions.ignoreSitemap) {
11691224
requestBody.sitemap = "skip";
11701225
}
11711226

1172-
if (options.ignoreCache !== undefined) {
1173-
requestBody.ignoreCache = options.ignoreCache;
1174-
} else if (options.maxAge !== undefined && options.maxAge <= 0) {
1175-
requestBody.ignoreCache = true;
1227+
if (sanitizedOptions.ignoreCache !== undefined) {
1228+
requestBody.ignoreCache = sanitizedOptions.ignoreCache;
11761229
}
11771230

11781231
const response = await this.requestWithRetry("/map", {

apps/api/src/trpc/routers/ai-agent.test.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import { describe, expect, it } from "bun:test";
2-
import { getModelSelectionError } from "./ai-agent";
2+
import {
3+
getGenerateBasePromptFirecrawlOptions,
4+
getModelSelectionError,
5+
} from "./ai-agent";
36

47
describe("ai-agent router model validation", () => {
58
it("rejects unknown models", () => {
@@ -29,3 +32,20 @@ describe("ai-agent router model validation", () => {
2932
expect(result).toBeNull();
3033
});
3134
});
35+
36+
describe("generateBasePrompt Firecrawl options", () => {
37+
it("uses scrape maxAge and map ignoreCache without maxAge", () => {
38+
const firecrawlOptions = getGenerateBasePromptFirecrawlOptions();
39+
40+
expect(firecrawlOptions.scrapeOptions).toEqual({
41+
maxAge: 3_600_000,
42+
});
43+
expect(firecrawlOptions.mapOptions).toEqual({
44+
limit: 100,
45+
ignoreCache: false,
46+
});
47+
expect(
48+
"maxAge" in (firecrawlOptions.mapOptions as Record<string, unknown>)
49+
).toBe(false);
50+
});
51+
});

apps/api/src/trpc/routers/ai-agent.ts

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,25 @@ export function getModelSelectionError(params: {
427427
return null;
428428
}
429429

430+
const GENERATE_BASE_PROMPT_FIRECRAWL_SCRAPE_MAX_AGE_MS = 3_600_000;
431+
432+
export function getGenerateBasePromptFirecrawlOptions(): {
433+
scrapeOptions: NonNullable<
434+
Parameters<typeof firecrawlService.extractBrandInfo>[1]
435+
>;
436+
mapOptions: Parameters<typeof firecrawlService.mapSite>[1];
437+
} {
438+
return {
439+
scrapeOptions: {
440+
maxAge: GENERATE_BASE_PROMPT_FIRECRAWL_SCRAPE_MAX_AGE_MS,
441+
},
442+
mapOptions: {
443+
limit: 100,
444+
ignoreCache: false,
445+
},
446+
};
447+
}
448+
430449
async function resolveAiAgentModelForRead(params: {
431450
db: Parameters<typeof updateAiAgentModel>[0];
432451
agent: Awaited<ReturnType<typeof getAiAgentForWebsite>>;
@@ -715,15 +734,18 @@ export const aiAgentRouter = createTRPCRouter({
715734
if (input.sourceUrl) {
716735
// Run brand extraction (which scrapes internally) and site mapping in parallel
717736
// extractBrandInfo returns company name, description, logo, favicon, AND markdown content
718-
// Use maxAge of 1 hour (in ms) to enable Firecrawl caching - avoids re-paying
737+
// Use scrape maxAge of 1 hour (in ms) to enable Firecrawl caching - avoids re-paying
719738
// for API calls when user refreshes the page during onboarding
720-
const cacheOptions = { maxAge: 3_600_000 };
739+
const firecrawlOptions = getGenerateBasePromptFirecrawlOptions();
721740
[brandInfo, mapResult] = await Promise.all([
722-
firecrawlService.extractBrandInfo(input.sourceUrl, cacheOptions),
723-
firecrawlService.mapSite(input.sourceUrl, {
724-
limit: 100,
725-
ignoreCache: false,
726-
}),
741+
firecrawlService.extractBrandInfo(
742+
input.sourceUrl,
743+
firecrawlOptions.scrapeOptions
744+
),
745+
firecrawlService.mapSite(
746+
input.sourceUrl,
747+
firecrawlOptions.mapOptions
748+
),
727749
]);
728750

729751
// Log what Firecrawl returned for debugging

0 commit comments

Comments
 (0)