|
| 1 | +#!/usr/bin/env bun |
| 2 | +/** |
| 3 | + * Unit tests for Cloudflare AI timeout fallback behavior |
| 4 | + * |
| 5 | + * Covers: |
| 6 | + * 1. Non-streaming timeout fallback: primary throws "3046" error -> fallback_model in response |
| 7 | + * 2. Streaming timeout fallback: primary throws "3046" error -> X-Fallback-Model header |
| 8 | + * 3. Normal path (no fallback): primary succeeds -> no fallback metadata |
| 9 | + * 4. Non-retryable error: primary throws non-timeout error -> error returned immediately, no retry |
| 10 | + * |
| 11 | + * Approach: direct handler unit tests using a minimal Hono app with mocked c.env.AI. |
| 12 | + * No x402 payment flow is needed — the x402 middleware is not mounted in these tests. |
| 13 | + */ |
| 14 | + |
| 15 | +import { describe, expect, test } from "bun:test"; |
| 16 | +import { Hono } from "hono"; |
| 17 | +import { CloudflareChat } from "../src/endpoints/inference/cloudflare/chat"; |
| 18 | +import type { Env, AppVariables, Logger } from "../src/types"; |
| 19 | + |
| 20 | +// --------------------------------------------------------------------------- |
| 21 | +// Constants mirrored from the source (not re-exported, but stable) |
| 22 | +// --------------------------------------------------------------------------- |
| 23 | + |
| 24 | +const DEFAULT_CF_MODEL = "@cf/meta/llama-3.1-8b-instruct"; |
| 25 | +const FALLBACK_CF_MODEL = "@cf/meta/llama-3.3-70b-instruct-fp8-fast"; |
| 26 | + |
| 27 | +// --------------------------------------------------------------------------- |
| 28 | +// Test helpers |
| 29 | +// --------------------------------------------------------------------------- |
| 30 | + |
| 31 | +/** Minimal no-op logger that satisfies the Logger interface */ |
| 32 | +function makeLogger(): Logger { |
| 33 | + return { |
| 34 | + debug: () => {}, |
| 35 | + info: () => {}, |
| 36 | + warn: () => {}, |
| 37 | + error: () => {}, |
| 38 | + child: function () { |
| 39 | + return this; |
| 40 | + }, |
| 41 | + }; |
| 42 | +} |
| 43 | + |
| 44 | +/** |
| 45 | + * Build a minimal Hono test app that: |
| 46 | + * - Injects a mock AI binding via c.env |
| 47 | + * - Sets up c.var.logger and c.var.requestId |
| 48 | + * - Skips x402 payment (sets a stub c.var.x402) |
| 49 | + * - Mounts the CloudflareChat handler at POST /inference/cloudflare/chat |
| 50 | + */ |
| 51 | +function buildTestApp(mockAI: Partial<Ai>) { |
| 52 | + const app = new Hono<{ Bindings: Env; Variables: AppVariables }>(); |
| 53 | + |
| 54 | + // Middleware: inject env + vars before route handler |
| 55 | + app.use("/inference/cloudflare/chat", async (c, next) => { |
| 56 | + // Inject the AI binding into env |
| 57 | + // @ts-expect-error — we are intentionally overriding readonly env in tests |
| 58 | + c.env = { |
| 59 | + ...c.env, |
| 60 | + AI: mockAI as Ai, |
| 61 | + }; |
| 62 | + |
| 63 | + // Set required context variables |
| 64 | + c.set("requestId", "test-req-id"); |
| 65 | + c.set("logger", makeLogger()); |
| 66 | + // Set a stub x402 context so the handler can call recordUsage safely |
| 67 | + c.set("x402", { |
| 68 | + payerAddress: "SP1TESTPAYERADDRESS", |
| 69 | + settleResult: { success: true, transaction: "", network: "mainnet:1", payer: "SP1TESTPAYERADDRESS" }, |
| 70 | + priceEstimate: { |
| 71 | + estimatedCostUsd: 0, |
| 72 | + costWithMarginUsd: 0, |
| 73 | + amountInToken: BigInt(0), |
| 74 | + tokenType: "STX" as const, |
| 75 | + tier: "standard" as const, |
| 76 | + }, |
| 77 | + }); |
| 78 | + |
| 79 | + return next(); |
| 80 | + }); |
| 81 | + |
| 82 | + // Mount the handler — CloudflareChat extends OpenAPIRoute so we call handle() directly |
| 83 | + const handler = new CloudflareChat(); |
| 84 | + app.post("/inference/cloudflare/chat", (c) => handler.handle(c)); |
| 85 | + |
| 86 | + return app; |
| 87 | +} |
| 88 | + |
| 89 | +/** Standard chat request body */ |
| 90 | +const CHAT_BODY = { |
| 91 | + model: DEFAULT_CF_MODEL, |
| 92 | + messages: [{ role: "user", content: "Hello" }], |
| 93 | +}; |
| 94 | + |
| 95 | +/** Helper to post to the chat endpoint */ |
| 96 | +async function postChat( |
| 97 | + app: ReturnType<typeof buildTestApp>, |
| 98 | + body: Record<string, unknown> = CHAT_BODY |
| 99 | +) { |
| 100 | + return app.request("/inference/cloudflare/chat", { |
| 101 | + method: "POST", |
| 102 | + headers: { "Content-Type": "application/json" }, |
| 103 | + body: JSON.stringify(body), |
| 104 | + }); |
| 105 | +} |
| 106 | + |
| 107 | +// --------------------------------------------------------------------------- |
| 108 | +// Tests |
| 109 | +// --------------------------------------------------------------------------- |
| 110 | + |
| 111 | +describe("CloudflareChat timeout fallback behavior", () => { |
| 112 | + test("non-streaming: timeout on primary model triggers fallback, response includes fallback_model", async () => { |
| 113 | + let callCount = 0; |
| 114 | + |
| 115 | + const mockAI = { |
| 116 | + run: async (model: string, _opts: unknown) => { |
| 117 | + callCount++; |
| 118 | + if (model === DEFAULT_CF_MODEL) { |
| 119 | + // Simulate Cloudflare error code 3046 |
| 120 | + throw new Error("Inference request failed: error code 3046"); |
| 121 | + } |
| 122 | + // Fallback model succeeds |
| 123 | + return { response: "Hello from fallback" }; |
| 124 | + }, |
| 125 | + }; |
| 126 | + |
| 127 | + const app = buildTestApp(mockAI); |
| 128 | + const res = await postChat(app); |
| 129 | + |
| 130 | + expect(res.status).toBe(200); |
| 131 | + const data = (await res.json()) as Record<string, unknown>; |
| 132 | + |
| 133 | + expect(data.ok).toBe(true); |
| 134 | + expect(data.fallback_model).toBe(FALLBACK_CF_MODEL); |
| 135 | + expect(data.response).toBe("Hello from fallback"); |
| 136 | + // The original requested model is preserved in the `model` field |
| 137 | + expect(data.model).toBe(DEFAULT_CF_MODEL); |
| 138 | + // Two AI calls: primary (timeout) + fallback (success) |
| 139 | + expect(callCount).toBe(2); |
| 140 | + }); |
| 141 | + |
| 142 | + test("streaming: timeout on primary model triggers fallback, response includes X-Fallback-Model header", async () => { |
| 143 | + let callCount = 0; |
| 144 | + |
| 145 | + // Minimal ReadableStream stub for streaming responses |
| 146 | + const fakeStream = new ReadableStream({ |
| 147 | + start(controller) { |
| 148 | + controller.enqueue(new TextEncoder().encode('data: {"response":"hi"}\n\n')); |
| 149 | + controller.close(); |
| 150 | + }, |
| 151 | + }); |
| 152 | + |
| 153 | + const mockAI = { |
| 154 | + run: async (model: string, _opts: unknown) => { |
| 155 | + callCount++; |
| 156 | + if (model === DEFAULT_CF_MODEL) { |
| 157 | + throw new Error("Request timed out"); |
| 158 | + } |
| 159 | + // Fallback succeeds and returns a stream |
| 160 | + return fakeStream; |
| 161 | + }, |
| 162 | + }; |
| 163 | + |
| 164 | + const app = buildTestApp(mockAI); |
| 165 | + const res = await postChat(app, { |
| 166 | + model: DEFAULT_CF_MODEL, |
| 167 | + messages: [{ role: "user", content: "Hello" }], |
| 168 | + stream: true, |
| 169 | + }); |
| 170 | + |
| 171 | + expect(res.status).toBe(200); |
| 172 | + expect(res.headers.get("Content-Type")).toContain("text/event-stream"); |
| 173 | + expect(res.headers.get("X-Fallback-Model")).toBe(FALLBACK_CF_MODEL); |
| 174 | + expect(callCount).toBe(2); |
| 175 | + }); |
| 176 | + |
| 177 | + test("normal path: primary model succeeds, no fallback metadata in response", async () => { |
| 178 | + let callCount = 0; |
| 179 | + |
| 180 | + const mockAI = { |
| 181 | + run: async (_model: string, _opts: unknown) => { |
| 182 | + callCount++; |
| 183 | + return { response: "Hello from primary" }; |
| 184 | + }, |
| 185 | + }; |
| 186 | + |
| 187 | + const app = buildTestApp(mockAI); |
| 188 | + const res = await postChat(app); |
| 189 | + |
| 190 | + expect(res.status).toBe(200); |
| 191 | + const data = (await res.json()) as Record<string, unknown>; |
| 192 | + |
| 193 | + expect(data.ok).toBe(true); |
| 194 | + expect(data.fallback_model).toBeUndefined(); |
| 195 | + // Response headers should not include X-Fallback-Model |
| 196 | + expect(res.headers.get("X-Fallback-Model")).toBeNull(); |
| 197 | + expect(data.response).toBe("Hello from primary"); |
| 198 | + // Only one AI call (no retry) |
| 199 | + expect(callCount).toBe(1); |
| 200 | + }); |
| 201 | + |
| 202 | + test("non-streaming normal path: no X-Fallback-Model header", async () => { |
| 203 | + const mockAI = { |
| 204 | + run: async (_model: string, _opts: unknown) => { |
| 205 | + return { response: "Success" }; |
| 206 | + }, |
| 207 | + }; |
| 208 | + |
| 209 | + const app = buildTestApp(mockAI); |
| 210 | + const res = await postChat(app); |
| 211 | + |
| 212 | + expect(res.status).toBe(200); |
| 213 | + expect(res.headers.get("X-Fallback-Model")).toBeNull(); |
| 214 | + }); |
| 215 | + |
| 216 | + test("non-retryable error: primary throws non-timeout error, handler returns error immediately without retry", async () => { |
| 217 | + let callCount = 0; |
| 218 | + |
| 219 | + const mockAI = { |
| 220 | + run: async (_model: string, _opts: unknown) => { |
| 221 | + callCount++; |
| 222 | + // Non-timeout error (Model not found, which maps to 404 / non-retryable) |
| 223 | + throw new Error("Model not found"); |
| 224 | + }, |
| 225 | + }; |
| 226 | + |
| 227 | + const app = buildTestApp(mockAI); |
| 228 | + const res = await postChat(app); |
| 229 | + |
| 230 | + // Should return 404 (MODEL_NOT_FOUND is non-retryable) |
| 231 | + expect(res.status).toBe(404); |
| 232 | + const data = (await res.json()) as Record<string, unknown>; |
| 233 | + expect(data.ok).toBe(false); |
| 234 | + expect(data.error_code).toBe("MODEL_NOT_FOUND"); |
| 235 | + expect(data.retryable).toBe(false); |
| 236 | + // Only one call — no fallback retry for non-timeout errors |
| 237 | + expect(callCount).toBe(1); |
| 238 | + }); |
| 239 | + |
| 240 | + test("non-retryable error (internal): primary throws generic error, returns 502 without retry", async () => { |
| 241 | + let callCount = 0; |
| 242 | + |
| 243 | + const mockAI = { |
| 244 | + run: async (_model: string, _opts: unknown) => { |
| 245 | + callCount++; |
| 246 | + throw new Error("Unexpected internal error from Cloudflare AI"); |
| 247 | + }, |
| 248 | + }; |
| 249 | + |
| 250 | + const app = buildTestApp(mockAI); |
| 251 | + const res = await postChat(app); |
| 252 | + |
| 253 | + expect(res.status).toBe(502); |
| 254 | + const data = (await res.json()) as Record<string, unknown>; |
| 255 | + expect(data.ok).toBe(false); |
| 256 | + expect(data.error_code).toBe("INTERNAL_ERROR"); |
| 257 | + expect(data.retryable).toBe(false); |
| 258 | + // Only one call — no retry for internal errors |
| 259 | + expect(callCount).toBe(1); |
| 260 | + }); |
| 261 | + |
| 262 | + test("AbortError name triggers timeout fallback", async () => { |
| 263 | + let callCount = 0; |
| 264 | + |
| 265 | + const mockAI = { |
| 266 | + run: async (model: string, _opts: unknown) => { |
| 267 | + callCount++; |
| 268 | + if (model === DEFAULT_CF_MODEL) { |
| 269 | + const err = new Error("Aborted"); |
| 270 | + err.name = "AbortError"; |
| 271 | + throw err; |
| 272 | + } |
| 273 | + return { response: "Fallback response" }; |
| 274 | + }, |
| 275 | + }; |
| 276 | + |
| 277 | + const app = buildTestApp(mockAI); |
| 278 | + const res = await postChat(app); |
| 279 | + |
| 280 | + expect(res.status).toBe(200); |
| 281 | + const data = (await res.json()) as Record<string, unknown>; |
| 282 | + expect(data.fallback_model).toBe(FALLBACK_CF_MODEL); |
| 283 | + expect(callCount).toBe(2); |
| 284 | + }); |
| 285 | + |
| 286 | + test("error response does not include fallback_model field", async () => { |
| 287 | + const mockAI = { |
| 288 | + run: async (_model: string, _opts: unknown) => { |
| 289 | + throw new Error("Rate limit exceeded"); |
| 290 | + }, |
| 291 | + }; |
| 292 | + |
| 293 | + const app = buildTestApp(mockAI); |
| 294 | + const res = await postChat(app); |
| 295 | + |
| 296 | + expect(res.status).toBe(429); |
| 297 | + const data = (await res.json()) as Record<string, unknown>; |
| 298 | + expect(data.ok).toBe(false); |
| 299 | + expect(data.fallback_model).toBeUndefined(); |
| 300 | + }); |
| 301 | +}); |
0 commit comments