Skip to content

Commit 5b7d8d9

Browse files
tfireubs-uiclaude
andauthored
test(cloudflare-ai): add unit tests for timeout fallback behavior (closes #70) (#75)
Adds tests/cloudflare-ai-fallback.unit.test.ts with 8 unit tests covering the Cloudflare AI timeout fallback logic introduced in #69: - Non-streaming path: "3046" error on primary → fallback_model in JSON response - Streaming path: "Request timed out" → X-Fallback-Model header set - Normal path: primary succeeds → no fallback metadata in response or headers - Non-retryable errors (MODEL_NOT_FOUND, INTERNAL_ERROR, RATE_LIMIT) → no retry - AbortError name triggers timeout classification and fallback Uses a minimal Hono app with mocked c.env.AI binding; no x402 payment middleware is needed as payment context is stubbed directly in the test. Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 820f9b6 commit 5b7d8d9

1 file changed

Lines changed: 301 additions & 0 deletions

File tree

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
#!/usr/bin/env bun
2+
/**
3+
* Unit tests for Cloudflare AI timeout fallback behavior
4+
*
5+
* Covers:
6+
* 1. Non-streaming timeout fallback: primary throws "3046" error -> fallback_model in response
7+
* 2. Streaming timeout fallback: primary throws "3046" error -> X-Fallback-Model header
8+
* 3. Normal path (no fallback): primary succeeds -> no fallback metadata
9+
* 4. Non-retryable error: primary throws non-timeout error -> error returned immediately, no retry
10+
*
11+
* Approach: direct handler unit tests using a minimal Hono app with mocked c.env.AI.
12+
* No x402 payment flow is needed — the x402 middleware is not mounted in these tests.
13+
*/
14+
15+
import { describe, expect, test } from "bun:test";
16+
import { Hono } from "hono";
17+
import { CloudflareChat } from "../src/endpoints/inference/cloudflare/chat";
18+
import type { Env, AppVariables, Logger } from "../src/types";
19+
20+
// ---------------------------------------------------------------------------
21+
// Constants mirrored from the source (not re-exported, but stable)
22+
// ---------------------------------------------------------------------------
23+
24+
const DEFAULT_CF_MODEL = "@cf/meta/llama-3.1-8b-instruct";
25+
const FALLBACK_CF_MODEL = "@cf/meta/llama-3.3-70b-instruct-fp8-fast";
26+
27+
// ---------------------------------------------------------------------------
28+
// Test helpers
29+
// ---------------------------------------------------------------------------
30+
31+
/** Minimal no-op logger that satisfies the Logger interface */
32+
function makeLogger(): Logger {
33+
return {
34+
debug: () => {},
35+
info: () => {},
36+
warn: () => {},
37+
error: () => {},
38+
child: function () {
39+
return this;
40+
},
41+
};
42+
}
43+
44+
/**
45+
* Build a minimal Hono test app that:
46+
* - Injects a mock AI binding via c.env
47+
* - Sets up c.var.logger and c.var.requestId
48+
* - Skips x402 payment (sets a stub c.var.x402)
49+
* - Mounts the CloudflareChat handler at POST /inference/cloudflare/chat
50+
*/
51+
function buildTestApp(mockAI: Partial<Ai>) {
52+
const app = new Hono<{ Bindings: Env; Variables: AppVariables }>();
53+
54+
// Middleware: inject env + vars before route handler
55+
app.use("/inference/cloudflare/chat", async (c, next) => {
56+
// Inject the AI binding into env
57+
// @ts-expect-error — we are intentionally overriding readonly env in tests
58+
c.env = {
59+
...c.env,
60+
AI: mockAI as Ai,
61+
};
62+
63+
// Set required context variables
64+
c.set("requestId", "test-req-id");
65+
c.set("logger", makeLogger());
66+
// Set a stub x402 context so the handler can call recordUsage safely
67+
c.set("x402", {
68+
payerAddress: "SP1TESTPAYERADDRESS",
69+
settleResult: { success: true, transaction: "", network: "mainnet:1", payer: "SP1TESTPAYERADDRESS" },
70+
priceEstimate: {
71+
estimatedCostUsd: 0,
72+
costWithMarginUsd: 0,
73+
amountInToken: BigInt(0),
74+
tokenType: "STX" as const,
75+
tier: "standard" as const,
76+
},
77+
});
78+
79+
return next();
80+
});
81+
82+
// Mount the handler — CloudflareChat extends OpenAPIRoute so we call handle() directly
83+
const handler = new CloudflareChat();
84+
app.post("/inference/cloudflare/chat", (c) => handler.handle(c));
85+
86+
return app;
87+
}
88+
89+
/** Standard chat request body */
90+
const CHAT_BODY = {
91+
model: DEFAULT_CF_MODEL,
92+
messages: [{ role: "user", content: "Hello" }],
93+
};
94+
95+
/** Helper to post to the chat endpoint */
96+
async function postChat(
97+
app: ReturnType<typeof buildTestApp>,
98+
body: Record<string, unknown> = CHAT_BODY
99+
) {
100+
return app.request("/inference/cloudflare/chat", {
101+
method: "POST",
102+
headers: { "Content-Type": "application/json" },
103+
body: JSON.stringify(body),
104+
});
105+
}
106+
107+
// ---------------------------------------------------------------------------
108+
// Tests
109+
// ---------------------------------------------------------------------------
110+
111+
describe("CloudflareChat timeout fallback behavior", () => {
112+
test("non-streaming: timeout on primary model triggers fallback, response includes fallback_model", async () => {
113+
let callCount = 0;
114+
115+
const mockAI = {
116+
run: async (model: string, _opts: unknown) => {
117+
callCount++;
118+
if (model === DEFAULT_CF_MODEL) {
119+
// Simulate Cloudflare error code 3046
120+
throw new Error("Inference request failed: error code 3046");
121+
}
122+
// Fallback model succeeds
123+
return { response: "Hello from fallback" };
124+
},
125+
};
126+
127+
const app = buildTestApp(mockAI);
128+
const res = await postChat(app);
129+
130+
expect(res.status).toBe(200);
131+
const data = (await res.json()) as Record<string, unknown>;
132+
133+
expect(data.ok).toBe(true);
134+
expect(data.fallback_model).toBe(FALLBACK_CF_MODEL);
135+
expect(data.response).toBe("Hello from fallback");
136+
// The original requested model is preserved in the `model` field
137+
expect(data.model).toBe(DEFAULT_CF_MODEL);
138+
// Two AI calls: primary (timeout) + fallback (success)
139+
expect(callCount).toBe(2);
140+
});
141+
142+
test("streaming: timeout on primary model triggers fallback, response includes X-Fallback-Model header", async () => {
143+
let callCount = 0;
144+
145+
// Minimal ReadableStream stub for streaming responses
146+
const fakeStream = new ReadableStream({
147+
start(controller) {
148+
controller.enqueue(new TextEncoder().encode('data: {"response":"hi"}\n\n'));
149+
controller.close();
150+
},
151+
});
152+
153+
const mockAI = {
154+
run: async (model: string, _opts: unknown) => {
155+
callCount++;
156+
if (model === DEFAULT_CF_MODEL) {
157+
throw new Error("Request timed out");
158+
}
159+
// Fallback succeeds and returns a stream
160+
return fakeStream;
161+
},
162+
};
163+
164+
const app = buildTestApp(mockAI);
165+
const res = await postChat(app, {
166+
model: DEFAULT_CF_MODEL,
167+
messages: [{ role: "user", content: "Hello" }],
168+
stream: true,
169+
});
170+
171+
expect(res.status).toBe(200);
172+
expect(res.headers.get("Content-Type")).toContain("text/event-stream");
173+
expect(res.headers.get("X-Fallback-Model")).toBe(FALLBACK_CF_MODEL);
174+
expect(callCount).toBe(2);
175+
});
176+
177+
test("normal path: primary model succeeds, no fallback metadata in response", async () => {
178+
let callCount = 0;
179+
180+
const mockAI = {
181+
run: async (_model: string, _opts: unknown) => {
182+
callCount++;
183+
return { response: "Hello from primary" };
184+
},
185+
};
186+
187+
const app = buildTestApp(mockAI);
188+
const res = await postChat(app);
189+
190+
expect(res.status).toBe(200);
191+
const data = (await res.json()) as Record<string, unknown>;
192+
193+
expect(data.ok).toBe(true);
194+
expect(data.fallback_model).toBeUndefined();
195+
// Response headers should not include X-Fallback-Model
196+
expect(res.headers.get("X-Fallback-Model")).toBeNull();
197+
expect(data.response).toBe("Hello from primary");
198+
// Only one AI call (no retry)
199+
expect(callCount).toBe(1);
200+
});
201+
202+
test("non-streaming normal path: no X-Fallback-Model header", async () => {
203+
const mockAI = {
204+
run: async (_model: string, _opts: unknown) => {
205+
return { response: "Success" };
206+
},
207+
};
208+
209+
const app = buildTestApp(mockAI);
210+
const res = await postChat(app);
211+
212+
expect(res.status).toBe(200);
213+
expect(res.headers.get("X-Fallback-Model")).toBeNull();
214+
});
215+
216+
test("non-retryable error: primary throws non-timeout error, handler returns error immediately without retry", async () => {
217+
let callCount = 0;
218+
219+
const mockAI = {
220+
run: async (_model: string, _opts: unknown) => {
221+
callCount++;
222+
// Non-timeout error (Model not found, which maps to 404 / non-retryable)
223+
throw new Error("Model not found");
224+
},
225+
};
226+
227+
const app = buildTestApp(mockAI);
228+
const res = await postChat(app);
229+
230+
// Should return 404 (MODEL_NOT_FOUND is non-retryable)
231+
expect(res.status).toBe(404);
232+
const data = (await res.json()) as Record<string, unknown>;
233+
expect(data.ok).toBe(false);
234+
expect(data.error_code).toBe("MODEL_NOT_FOUND");
235+
expect(data.retryable).toBe(false);
236+
// Only one call — no fallback retry for non-timeout errors
237+
expect(callCount).toBe(1);
238+
});
239+
240+
test("non-retryable error (internal): primary throws generic error, returns 502 without retry", async () => {
241+
let callCount = 0;
242+
243+
const mockAI = {
244+
run: async (_model: string, _opts: unknown) => {
245+
callCount++;
246+
throw new Error("Unexpected internal error from Cloudflare AI");
247+
},
248+
};
249+
250+
const app = buildTestApp(mockAI);
251+
const res = await postChat(app);
252+
253+
expect(res.status).toBe(502);
254+
const data = (await res.json()) as Record<string, unknown>;
255+
expect(data.ok).toBe(false);
256+
expect(data.error_code).toBe("INTERNAL_ERROR");
257+
expect(data.retryable).toBe(false);
258+
// Only one call — no retry for internal errors
259+
expect(callCount).toBe(1);
260+
});
261+
262+
test("AbortError name triggers timeout fallback", async () => {
263+
let callCount = 0;
264+
265+
const mockAI = {
266+
run: async (model: string, _opts: unknown) => {
267+
callCount++;
268+
if (model === DEFAULT_CF_MODEL) {
269+
const err = new Error("Aborted");
270+
err.name = "AbortError";
271+
throw err;
272+
}
273+
return { response: "Fallback response" };
274+
},
275+
};
276+
277+
const app = buildTestApp(mockAI);
278+
const res = await postChat(app);
279+
280+
expect(res.status).toBe(200);
281+
const data = (await res.json()) as Record<string, unknown>;
282+
expect(data.fallback_model).toBe(FALLBACK_CF_MODEL);
283+
expect(callCount).toBe(2);
284+
});
285+
286+
test("error response does not include fallback_model field", async () => {
287+
const mockAI = {
288+
run: async (_model: string, _opts: unknown) => {
289+
throw new Error("Rate limit exceeded");
290+
},
291+
};
292+
293+
const app = buildTestApp(mockAI);
294+
const res = await postChat(app);
295+
296+
expect(res.status).toBe(429);
297+
const data = (await res.json()) as Record<string, unknown>;
298+
expect(data.ok).toBe(false);
299+
expect(data.fallback_model).toBeUndefined();
300+
});
301+
});

0 commit comments

Comments
 (0)