Skip to content

Commit 0fb3407

Browse files
Retry pagespeed_insights on transient PSI failures (#48)
The PSI API occasionally returns transient errors — HTTP 5xx bursts when Google's backend is congested, rate-limit 429s under load, and HTTP 200 responses where lighthouseResult.runtimeError is set because Lighthouse itself crashed mid-audit (common on slow or JS-heavy pages). Until now each of these would bubble straight up to the caller as a tool error, failing the whole URL even though a single retry would usually succeed. Split the PSI fetch into a helper with a bounded retry loop (max 1 retry, 2s backoff). Retry triggers on: - network/abort errors (fetch throws) - HTTP 5xx or 429 - HTTP 200 but lighthouseResult.runtimeError present Non-transient failures (4xx other than 429, malformed JSON) still fail immediately. Worst-case added latency: one retry window (~2s + one 90s PSI call) when the first attempt also hits its timeout — still comfortable under the 120s budget callers allow. Retries are logged to stdout so wrangler tail reveals flakiness patterns when they happen. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b106a77 commit 0fb3407

1 file changed

Lines changed: 94 additions & 21 deletions

File tree

api/tools/pagespeed-insights.ts

Lines changed: 94 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ import type { Env } from "../types/env.ts";
88
const PSI_ENDPOINT =
99
"https://www.googleapis.com/pagespeedonline/v5/runPagespeed";
1010
const PSI_TIMEOUT_MS = 90_000;
11+
// Retry once on transient PSI failures — 5xx, 429, network errors, or a
12+
// lighthouseResult.runtimeError (PSI returns 200 with a runtimeError when
13+
// Lighthouse itself crashed while auditing). Most of these succeed on
14+
// second attempt; a fixed short backoff is enough.
15+
const PSI_MAX_RETRIES = 1;
16+
const PSI_RETRY_BACKOFF_MS = 2_000;
1117

1218
// Lighthouse audit IDs we care about surfacing from the Lab result.
1319
const LAB_METRIC_AUDITS = [
@@ -232,6 +238,92 @@ function mapLabAudit(audit: any): z.infer<typeof labAuditSchema> | undefined {
232238
};
233239
}
234240

241+
/** Build the PSI request URL. Pulled out so the retry loop stays clean. */
242+
function buildPsiUrl(
243+
url: string,
244+
strategy: string,
245+
categories: readonly string[],
246+
): string {
247+
const params = new URLSearchParams();
248+
params.set("url", url);
249+
params.set("strategy", strategy);
250+
for (const cat of categories) params.append("category", cat);
251+
const apiKey = process.env.PAGESPEED_API_KEY;
252+
if (apiKey) params.set("key", apiKey);
253+
return `${PSI_ENDPOINT}?${params.toString()}`;
254+
}
255+
256+
/**
257+
* Fetch PSI with one retry on transient failures.
258+
*
259+
* What counts as transient:
260+
* - Network error (fetch throws, e.g. timeout)
261+
* - HTTP 5xx or 429 from PSI
262+
* - HTTP 200 but `lighthouseResult.runtimeError` is set (PSI's way of
263+
* signaling that Lighthouse crashed mid-audit — common on very slow
264+
* or JS-error-heavy pages)
265+
*
266+
* Non-transient failures (4xx other than 429, malformed JSON) fail immediately.
267+
*/
268+
async function fetchPsiWithRetry(
269+
requestUrl: string,
270+
// biome-ignore lint/suspicious/noExplicitAny: PSI response is loose JSON
271+
): Promise<any> {
272+
let lastError: Error | null = null;
273+
274+
for (let attempt = 0; attempt <= PSI_MAX_RETRIES; attempt++) {
275+
if (attempt > 0) {
276+
console.log(
277+
`[pagespeed_insights] retry ${attempt}/${PSI_MAX_RETRIES} after: ${lastError?.message ?? "unknown"}`,
278+
);
279+
await new Promise((r) => setTimeout(r, PSI_RETRY_BACKOFF_MS));
280+
}
281+
282+
try {
283+
const resp = await fetch(requestUrl, {
284+
headers: { accept: "application/json" },
285+
signal: AbortSignal.timeout(PSI_TIMEOUT_MS),
286+
});
287+
288+
if (!resp.ok) {
289+
const transient = resp.status >= 500 || resp.status === 429;
290+
const body = await resp.text();
291+
const err = new Error(
292+
`PageSpeed Insights API error (${resp.status}): ${body.slice(0, 500)}`,
293+
);
294+
if (transient && attempt < PSI_MAX_RETRIES) {
295+
lastError = err;
296+
continue;
297+
}
298+
throw err;
299+
}
300+
301+
// biome-ignore lint/suspicious/noExplicitAny: PSI JSON is loose
302+
const json: any = await resp.json();
303+
304+
const runtimeError = json?.lighthouseResult?.runtimeError;
305+
if (runtimeError && attempt < PSI_MAX_RETRIES) {
306+
lastError = new Error(
307+
`PSI lighthouseResult.runtimeError: ${runtimeError.code ?? ""} ${runtimeError.message ?? ""}`.trim(),
308+
);
309+
continue;
310+
}
311+
312+
return json;
313+
} catch (err) {
314+
// Network errors / aborts / thrown above. Only retry if we still have budget.
315+
if (attempt < PSI_MAX_RETRIES) {
316+
lastError = err instanceof Error ? err : new Error(String(err));
317+
continue;
318+
}
319+
throw err;
320+
}
321+
}
322+
323+
// Unreachable in practice — loop either returns or throws — but satisfies TS.
324+
throw lastError ?? new Error("PSI fetch failed with no captured error");
325+
}
326+
235327
// ── Tool Definition ────────────────────────────────────────
236328

237329
export const pagespeedInsightsTool = (_env: Env) =>
@@ -253,27 +345,8 @@ export const pagespeedInsightsTool = (_env: Env) =>
253345
const { url, strategy, categories } = context;
254346

255347
try {
256-
const params = new URLSearchParams();
257-
params.set("url", url);
258-
params.set("strategy", strategy);
259-
for (const cat of categories) params.append("category", cat);
260-
const apiKey = process.env.PAGESPEED_API_KEY;
261-
if (apiKey) params.set("key", apiKey);
262-
263-
const response = await fetch(`${PSI_ENDPOINT}?${params.toString()}`, {
264-
headers: { accept: "application/json" },
265-
signal: AbortSignal.timeout(PSI_TIMEOUT_MS),
266-
});
267-
268-
if (!response.ok) {
269-
const body = await response.text();
270-
throw new Error(
271-
`PageSpeed Insights API error (${response.status}): ${body.slice(0, 500)}`,
272-
);
273-
}
274-
275-
// biome-ignore lint/suspicious/noExplicitAny: PSI response is loose JSON
276-
const json: any = await response.json();
348+
const requestUrl = buildPsiUrl(url, strategy, categories);
349+
const json = await fetchPsiWithRetry(requestUrl);
277350
const lr = json.lighthouseResult ?? {};
278351
const audits = (lr.audits ?? {}) as Record<string, unknown>;
279352
const cats = (lr.categories ?? {}) as Record<string, unknown>;

0 commit comments

Comments
 (0)