diff --git a/CHANGELOG.md b/CHANGELOG.md index 65aa5b7..7397b00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # @copilotkit/aimock +## 1.12.0 + +### Minor Changes + +- Multimedia endpoint support: image generation (OpenAI DALL-E + Gemini Imagen), text-to-speech, audio transcription, and video generation with async polling (#101) +- `match.endpoint` field for fixture isolation — prevents cross-matching between chat, image, speech, transcription, video, and embedding fixtures (#101) +- Bidirectional endpoint filtering — generic fixtures only match compatible endpoint types (#101) +- Convenience methods: `onImage`, `onSpeech`, `onTranscription`, `onVideo` (#101) +- Record & replay for all multimedia endpoints — proxy to real APIs, save fixtures with correct format/type detection (#101) +- `_endpointType` explicit field on `ChatCompletionRequest` for type safety (#101) +- Comparison matrix and drift detection rules updated for multimedia (#101) +- 54 new tests (32 integration, 11 record/replay, 12 type/routing) + ## 1.11.0 ### Minor Changes diff --git a/README.md b/README.md index 3a759b5..0e6ed07 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ https://github.com/user-attachments/assets/646bf106-0320-41f2-a9b1-5090454830f3 -Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, AG-UI event streams, vector databases, search, rerank, and moderation. One package, one port, zero dependencies. +Mock infrastructure for AI application testing — LLM APIs, image generation, text-to-speech, transcription, video generation, MCP tools, A2A agents, AG-UI event streams, vector databases, search, rerank, and moderation. One package, one port, zero dependencies. ## Quick Start @@ -43,6 +43,7 @@ Run them all on one port with `npx aimock --config aimock.json`, or use the prog - **[Record & Replay](https://aimock.copilotkit.dev/record-replay)** — Proxy real APIs, save as fixtures, replay deterministically forever - **[11 LLM Providers](https://aimock.copilotkit.dev/docs)** — OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere — full streaming support +- **[Multimedia APIs](https://aimock.copilotkit.dev/images)** — Image generation (DALL-E, Imagen), text-to-speech, audio transcription, video generation - **[MCP / A2A / AG-UI / Vector](https://aimock.copilotkit.dev/mcp-mock)** — Mock every protocol your AI agents use - **[Chaos Testing](https://aimock.copilotkit.dev/chaos-testing)** — 500 errors, malformed JSON, mid-stream disconnects at any probability - **[Drift Detection](https://aimock.copilotkit.dev/drift-detection)** — Daily CI validation against real APIs diff --git a/charts/aimock/Chart.yaml b/charts/aimock/Chart.yaml index 9fa1f59..1d2e733 100644 --- a/charts/aimock/Chart.yaml +++ b/charts/aimock/Chart.yaml @@ -3,4 +3,4 @@ name: aimock description: Mock infrastructure for AI application testing (OpenAI, Anthropic, Gemini, MCP, A2A, vector) type: application version: 0.1.0 -appVersion: "1.11.0" +appVersion: "1.12.0" diff --git a/docs/fixtures/index.html b/docs/fixtures/index.html index 208611c..b023802 100644 --- a/docs/fixtures/index.html +++ b/docs/fixtures/index.html @@ -162,6 +162,26 @@

Response Types

embedding[] Vector of numbers + + Image + image.url or images[].url + Generated image URL(s) or base64 data + + + Speech + audio + Base64-encoded audio data + + + Transcription + transcription.text, words?, segments? + Transcribed text with optional timestamps + + + Video + video.url, video.duration? + Generated video URL with async polling + @@ -239,6 +259,10 @@

Programmatically

mock.onMessage("hello", { content: "Hi!" }); mock.onToolCall("get_weather", { content: "72F" }); mock.onEmbedding("my text", { embedding: [0.1, 0.2] }); +mock.onImage("sunset", { image: { url: "https://example.com/sunset.png" } }); +mock.onSpeech("hello", { audio: "SGVsbG8=" }); +mock.onTranscription("audio.mp3", { transcription: { text: "Hello" } }); +mock.onVideo("cats", { video: { url: "https://example.com/cats.mp4" } }); mock.onJsonOutput("data", { key: "value" }); mock.onToolResult("call_123", { content: "Done" }); diff --git a/docs/images/index.html b/docs/images/index.html new file mode 100644 index 0000000..8aaf28e --- /dev/null +++ b/docs/images/index.html @@ -0,0 +1,286 @@ + + + + + + Image Generation — aimock + + + + + + + + + +
+ + +
+

Image Generation

+

+ The image generation endpoints support both OpenAI + POST /v1/images/generations and Gemini Imagen + POST /v1beta/models/{model}:predict formats. Return single or multiple images + as URLs or base64-encoded data. +

+ +

Endpoints

+ + + + + + + + + + + + + + + + + + + + +
MethodPathFormat
POST/v1/images/generationsJSON (OpenAI)
POST/v1beta/models/{model}:predictJSON (Gemini Imagen)
+ +

Unit Test: Single Image URL

+

+ Using the programmatic API with vitest, register a fixture and assert on the response. +

+ +
+
image-url.test.ts ts
+
import { LLMock } from "@copilotkit/aimock";
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+
+let mock: LLMock;
+
+beforeAll(async () => {
+  mock = new LLMock();
+  await mock.start();
+});
+
+afterAll(async () => {
+  await mock.stop();
+});
+
+it("returns a single image URL", async () => {
+  mock.onImage("a sunset over mountains", {
+    image: { url: "https://example.com/sunset.png" },
+  });
+
+  const res = await fetch(`${mock.url}/v1/images/generations`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "dall-e-3",
+      prompt: "a sunset over mountains",
+      n: 1,
+      size: "1024x1024",
+    }),
+  });
+
+  const body = await res.json();
+  expect(body.data[0].url).toBe("https://example.com/sunset.png");
+});
+
+ +

Unit Test: Multiple Images

+ +
+
+ image-multiple.test.ts ts +
+
it("returns multiple images", async () => {
+  mock.onImage("cats", {
+    images: [
+      { url: "https://example.com/cat1.png" },
+      { url: "https://example.com/cat2.png" },
+    ],
+  });
+
+  const res = await fetch(`${mock.url}/v1/images/generations`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "dall-e-3",
+      prompt: "cats playing",
+      n: 2,
+    }),
+  });
+
+  const body = await res.json();
+  expect(body.data).toHaveLength(2);
+  expect(body.data[0].url).toBe("https://example.com/cat1.png");
+  expect(body.data[1].url).toBe("https://example.com/cat2.png");
+});
+
+ +

Unit Test: Base64 Response

+ +
+
image-base64.test.ts ts
+
it("returns base64-encoded image", async () => {
+  mock.onImage("logo", {
+    image: { b64_json: "iVBORw0KGgoAAAANSUhEUg..." },
+  });
+
+  const res = await fetch(`${mock.url}/v1/images/generations`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "dall-e-3",
+      prompt: "a company logo",
+      response_format: "b64_json",
+    }),
+  });
+
+  const body = await res.json();
+  expect(body.data[0].b64_json).toBeDefined();
+});
+
+ +

Unit Test: Gemini Imagen Format

+ +
+
image-gemini.test.ts ts
+
it("handles Gemini Imagen predict endpoint", async () => {
+  mock.onImage("landscape", {
+    image: { url: "https://example.com/landscape.png" },
+  });
+
+  const res = await fetch(
+    `${mock.url}/v1beta/models/imagen-3.0-generate-002:predict`,
+    {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        instances: [{ prompt: "a beautiful landscape" }],
+        parameters: { sampleCount: 1 },
+      }),
+    }
+  );
+
+  const body = await res.json();
+  expect(body.predictions).toBeDefined();
+});
+
+ +

JSON Fixture

+ +
+
+ fixtures/images.json json +
+
{
+  "fixtures": [
+    {
+      "match": { "userMessage": "sunset" },
+      "response": {
+        "image": { "url": "https://example.com/sunset.png" }
+      }
+    },
+    {
+      "match": { "userMessage": "cats" },
+      "response": {
+        "images": [
+          { "url": "https://example.com/cat1.png" },
+          { "url": "https://example.com/cat2.png" }
+        ]
+      }
+    }
+  ]
+}
+
+ +

Response Format

+

Matches the OpenAI /v1/images/generations response format:

+ + +
+

+ Image fixtures use match.userMessage which maps to the + prompt field in the request body. The prompt matcher checks + for substring matches. +

+
+ +

Record & Replay

+

+ When no fixture matches an incoming request, aimock can proxy it to the real API and + record the response as a fixture for future replays. Enable recording with the + --record flag or via RecordConfig in the programmatic API. + Recorded image fixtures capture the url or b64_json from the + provider response and save them to disk, so subsequent runs replay instantly without + hitting the real API. +

+ +
+
CLI sh
+
npx aimock --record --provider-openai https://api.openai.com
+
+
+ +
+ + + + + diff --git a/docs/index.html b/docs/index.html index 75d3c5f..406061d 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1405,8 +1405,16 @@

Chaos Testing

-
+
🎨
+

Multimedia APIs

+

+ Image generation, text-to-speech, audio transcription, and video generation — + mock every multimedia endpoint with fixtures. +

+
+ +
📊

Drift Detection

Fixtures stay accurate as providers evolve. Fixes ship before your tests break.

@@ -1539,6 +1547,38 @@

How aimock compares

+ + Image generation + Built-in ✓ + + + + + + + Text-to-Speech + Built-in ✓ + + + + + + + Audio transcription + Built-in ✓ + + + + + + + Video generation + Built-in ✓ + + + + + Structured output / JSON mode Built-in ✓ diff --git a/docs/sidebar.js b/docs/sidebar.js index 5025839..3159755 100644 --- a/docs/sidebar.js +++ b/docs/sidebar.js @@ -26,6 +26,15 @@ { label: "Compatible Providers", href: "/compatible-providers" }, ], }, + { + title: "Multimedia", + links: [ + { label: "Image Generation", href: "/images" }, + { label: "Text-to-Speech", href: "/speech" }, + { label: "Audio Transcription", href: "/transcription" }, + { label: "Video Generation", href: "/video" }, + ], + }, { title: "LLM Features", links: [ diff --git a/docs/speech/index.html b/docs/speech/index.html new file mode 100644 index 0000000..10d389c --- /dev/null +++ b/docs/speech/index.html @@ -0,0 +1,225 @@ + + + + + + Text-to-Speech — aimock + + + + + + + + + +
+ + +
+

Text-to-Speech

+

+ The POST /v1/audio/speech endpoint returns audio data from text input. + Supports multiple output formats including mp3, opus, aac, flac, wav, and pcm. +

+ +

Endpoint

+ + + + + + + + + + + + + + + +
MethodPathFormat
POST/v1/audio/speechJSON request, binary/base64 response
+ +

Unit Test: Basic Speech

+

+ Using the programmatic API with vitest, register a fixture and assert on the response. +

+ +
+
speech-basic.test.ts ts
+
import { LLMock } from "@copilotkit/aimock";
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+
+let mock: LLMock;
+
+beforeAll(async () => {
+  mock = new LLMock();
+  await mock.start();
+});
+
+afterAll(async () => {
+  await mock.stop();
+});
+
+it("returns audio for text input", async () => {
+  mock.onSpeech("Hello world", { audio: "SGVsbG8gd29ybGQ=" });
+
+  const res = await fetch(`${mock.url}/v1/audio/speech`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "tts-1",
+      input: "Hello world",
+      voice: "alloy",
+    }),
+  });
+
+  expect(res.ok).toBe(true);
+  const body = await res.json();
+  expect(body.audio).toBe("SGVsbG8gd29ybGQ=");
+});
+
+ +

Format Options

+

+ The response_format field in the request controls the audio output format. + Supported values: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FormatContent-TypeDescription
mp3audio/mpegDefault format, widely supported
opusaudio/opusLow latency, good for streaming
aacaudio/aacPreferred for mobile devices
flacaudio/flacLossless compression
wavaudio/wavUncompressed, no decoding overhead
pcmaudio/pcmRaw samples, 24kHz 16-bit signed little-endian
+ +

JSON Fixture

+ +
+
+ fixtures/speech.json json +
+
{
+  "fixtures": [
+    {
+      "match": { "userMessage": "Hello world" },
+      "response": {
+        "audio": "SGVsbG8gd29ybGQ="
+      }
+    }
+  ]
+}
+
+ +

Response Format

+

Returns audio data matching the requested format:

+
    +
  • audio — base64-encoded audio data in the fixture response
  • +
+ +
+

+ Speech fixtures use match.userMessage which maps to the + input field in the request body. The matcher checks for substring matches + on the text to be spoken. +

+
+ +

Record & Replay

+

+ When no fixture matches an incoming request, aimock can proxy it to the real API and + record the response as a fixture for future replays. Enable recording with the + --record flag or via RecordConfig in the programmatic API. + Binary audio from the provider is base64-encoded in the recorded fixture, with the format + derived from the response Content-Type header (e.g. + audio/mpeg for mp3). Subsequent requests replay the cached audio without + hitting the real API. +

+ +
+
CLI sh
+
npx aimock --record --provider-openai https://api.openai.com
+
+
+ +
+ + + + + diff --git a/docs/transcription/index.html b/docs/transcription/index.html new file mode 100644 index 0000000..0eb0653 --- /dev/null +++ b/docs/transcription/index.html @@ -0,0 +1,242 @@ + + + + + + Audio Transcription — aimock + + + + + + + + + +
+ + +
+

Audio Transcription

+

+ The POST /v1/audio/transcriptions endpoint accepts multipart form-data audio + uploads and returns transcribed text. Supports both simple and verbose response formats + with word-level timestamps and segments. +

+ +

Endpoint

+ + + + + + + + + + + + + + + +
MethodPathFormat
POST/v1/audio/transcriptionsMultipart form-data request, JSON response
+ +

Unit Test: Simple Transcription

+

+ Using the programmatic API with vitest, register a fixture and assert on the response. +

+ +
+
+ transcription-simple.test.ts ts +
+
import { LLMock } from "@copilotkit/aimock";
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+
+let mock: LLMock;
+
+beforeAll(async () => {
+  mock = new LLMock();
+  await mock.start();
+});
+
+afterAll(async () => {
+  await mock.stop();
+});
+
+it("returns simple transcription text", async () => {
+  mock.onTranscription("audio.mp3", {
+    transcription: { text: "Hello, how are you today?" },
+  });
+
+  const form = new FormData();
+  form.append("file", new Blob(["fake-audio"]), "audio.mp3");
+  form.append("model", "whisper-1");
+
+  const res = await fetch(`${mock.url}/v1/audio/transcriptions`, {
+    method: "POST",
+    body: form,
+  });
+
+  const body = await res.json();
+  expect(body.text).toBe("Hello, how are you today?");
+});
+
+ +

Unit Test: Verbose Response with Segments

+ +
+
+ transcription-verbose.test.ts ts +
+
it("returns verbose transcription with words and segments", async () => {
+  mock.onTranscription("meeting.wav", {
+    transcription: {
+      text: "Welcome to the meeting.",
+      words: [
+        { word: "Welcome", start: 0.0, end: 0.5 },
+        { word: "to", start: 0.5, end: 0.7 },
+        { word: "the", start: 0.7, end: 0.9 },
+        { word: "meeting", start: 0.9, end: 1.4 },
+      ],
+      segments: [
+        { id: 0, text: "Welcome to the meeting.", start: 0.0, end: 1.4 },
+      ],
+    },
+  });
+
+  const form = new FormData();
+  form.append("file", new Blob(["fake-audio"]), "meeting.wav");
+  form.append("model", "whisper-1");
+  form.append("response_format", "verbose_json");
+  form.append("timestamp_granularities[]", "word");
+  form.append("timestamp_granularities[]", "segment");
+
+  const res = await fetch(`${mock.url}/v1/audio/transcriptions`, {
+    method: "POST",
+    body: form,
+  });
+
+  const body = await res.json();
+  expect(body.text).toBe("Welcome to the meeting.");
+  expect(body.words).toHaveLength(4);
+  expect(body.segments).toHaveLength(1);
+});
+
+ +

JSON Fixture

+ +
+
+ fixtures/transcription.json json +
+
{
+  "fixtures": [
+    {
+      "match": { "userMessage": "audio.mp3" },
+      "response": {
+        "transcription": {
+          "text": "Hello, how are you today?"
+        }
+      }
+    }
+  ]
+}
+
+ +

Response Format

+

Matches the OpenAI /v1/audio/transcriptions response format:

+ +

Simple (default)

+
    +
  • text — the transcribed text
  • +
+ +

Verbose (response_format: "verbose_json")

+
    +
  • text — the full transcribed text
  • +
  • task"transcribe"
  • +
  • language — detected language code
  • +
  • duration — audio duration in seconds
  • +
  • + words[] — word-level timestamps with word, + start, end +
  • +
  • + segments[] — segment-level data with id, + text, start, end +
  • +
+ +
+

+ Transcription requests use multipart form-data. The fixture + match.userMessage maps to the uploaded filename. This allows matching + different fixtures based on which audio file is submitted. +

+
+ +

Record & Replay

+

+ When no fixture matches an incoming request, aimock can proxy it to the real API and + record the response as a fixture for future replays. Enable recording with the + --record flag or via RecordConfig in the programmatic API. + Recorded transcription fixtures preserve the full response including text, + language, duration, words, and + segments, so verbose-mode responses replay with complete word-level + timestamps intact. +

+ +
+
CLI sh
+
npx aimock --record --provider-openai https://api.openai.com
+
+
+ +
+ + + + + diff --git a/docs/video/index.html b/docs/video/index.html new file mode 100644 index 0000000..c6f9fb6 --- /dev/null +++ b/docs/video/index.html @@ -0,0 +1,221 @@ + + + + + + Video Generation — aimock + + + + + + + + + +
+ + +
+

Video Generation

+

+ The video generation endpoints support async creation via + POST /v1/videos and status polling via GET /v1/videos/{id}. Mock + the full async polling lifecycle with deterministic responses. +

+ +

Endpoints

+ + + + + + + + + + + + + + + + + + + + +
MethodPathFormat
POST/v1/videosJSON (create video job)
GET/v1/videos/{id}JSON (poll status)
+ +

Async Polling Pattern

+

+ Video generation is asynchronous. The POST endpoint returns a job ID, and the + GET endpoint returns the current status. aimock simulates this by returning + "processing" on the first poll and "completed" with the video + URL on subsequent polls. +

+ +

Unit Test: Create and Poll

+

+ Using the programmatic API with vitest, register a fixture and test the full async flow. +

+ +
+
+ video-polling.test.ts ts +
+
import { LLMock } from "@copilotkit/aimock";
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+
+let mock: LLMock;
+
+beforeAll(async () => {
+  mock = new LLMock();
+  await mock.start();
+});
+
+afterAll(async () => {
+  await mock.stop();
+});
+
+it("creates a video job and polls for completion", async () => {
+  mock.onVideo("a cat playing piano", {
+    video: { url: "https://example.com/cat-piano.mp4", duration: 10 },
+  });
+
+  // Step 1: Create the video job
+  const createRes = await fetch(`${mock.url}/v1/videos`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: "sora",
+      prompt: "a cat playing piano",
+      duration: 10,
+    }),
+  });
+
+  const createBody = await createRes.json();
+  expect(createBody.id).toBeDefined();
+  expect(createBody.status).toBe("processing");
+
+  // Step 2: Poll for completion
+  const pollRes = await fetch(`${mock.url}/v1/videos/${createBody.id}`);
+  const pollBody = await pollRes.json();
+
+  expect(pollBody.status).toBe("completed");
+  expect(pollBody.video.url).toBe("https://example.com/cat-piano.mp4");
+  expect(pollBody.video.duration).toBe(10);
+});
+
+ +

JSON Fixture

+ +
+
+ fixtures/video.json json +
+
{
+  "fixtures": [
+    {
+      "match": { "userMessage": "cat playing piano" },
+      "response": {
+        "video": {
+          "url": "https://example.com/cat-piano.mp4",
+          "duration": 10
+        }
+      }
+    }
+  ]
+}
+
+ +

Response Format

+ +

Create (POST /v1/videos)

+
    +
  • id — unique job identifier
  • +
  • status"processing" initially
  • +
  • created — Unix timestamp
  • +
+ +

Poll (GET /v1/videos/{id})

+
    +
  • id — the job identifier
  • +
  • + status"processing" or + "completed" +
  • +
  • video.url — URL of the generated video (when completed)
  • +
  • video.duration — video duration in seconds
  • +
+ +
+

+ Video fixtures use match.userMessage which maps to the + prompt field in the creation request. The async polling pattern is handled + automatically by aimock. +

+
+ +

Record & Replay

+

+ When no fixture matches an incoming request, aimock can proxy it to the real API and + record the response as a fixture for future replays. Enable recording with the + --record flag or via RecordConfig in the programmatic API. + Completed videos are recorded with their final URL; in-progress responses are also saved + so that the async polling lifecycle can be simulated on replay without hitting the real + API. +

+ +
+
CLI sh
+
npx aimock --record --provider-openai https://api.openai.com
+
+
+ +
+ + + + + diff --git a/package.json b/package.json index 76331bb..6ff5c19 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@copilotkit/aimock", - "version": "1.11.0", + "version": "1.12.0", "description": "Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, AG-UI event streams, vector databases, search, and more. Zero dependencies.", "license": "MIT", "repository": { diff --git a/scripts/update-competitive-matrix.ts b/scripts/update-competitive-matrix.ts index 2c20fb4..43e852e 100644 --- a/scripts/update-competitive-matrix.ts +++ b/scripts/update-competitive-matrix.ts @@ -72,6 +72,22 @@ const FEATURE_RULES: FeatureRule[] = [ rowLabel: "Embeddings API", keywords: ["embedding", "/v1/embeddings", "embed"], }, + { + rowLabel: "Image generation", + keywords: ["image", "dall-e", "dalle", "/v1/images", "image generation", "imagen"], + }, + { + rowLabel: "Text-to-Speech", + keywords: ["tts", "text-to-speech", "speech", "/v1/audio/speech", "audio generation"], + }, + { + rowLabel: "Audio transcription", + keywords: ["transcription", "whisper", "/v1/audio/transcriptions", "speech-to-text", "stt"], + }, + { + rowLabel: "Video generation", + keywords: ["video", "sora", "/v1/videos", "video generation"], + }, { rowLabel: "Structured output / JSON mode", keywords: ["json_object", "json_schema", "structured output", "response_format"], diff --git a/src/__tests__/multimedia-record.test.ts b/src/__tests__/multimedia-record.test.ts new file mode 100644 index 0000000..9f28970 --- /dev/null +++ b/src/__tests__/multimedia-record.test.ts @@ -0,0 +1,508 @@ +import { describe, it, expect } from "vitest"; + +/** + * Unit tests for multimedia record/replay support in the recorder module. + * + * These test the internal detection logic by calling buildFixtureResponse + * and buildFixtureMatch indirectly through proxyAndRecord integration, + * as well as directly importing where possible. + * + * Since buildFixtureResponse and buildFixtureMatch are not exported, + * we test them via a lightweight upstream mock that returns the expected + * shapes, verifying the recorder produces correct fixture responses. + */ + +import * as http from "node:http"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { proxyAndRecord } from "../recorder.js"; +import type { Fixture, RecordConfig, ChatCompletionRequest } from "../types.js"; +import { Logger } from "../logger.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function createUpstream( + handler: (req: http.IncomingMessage, res: http.ServerResponse) => void, +): Promise<{ server: http.Server; url: string }> { + return new Promise((resolve) => { + const server = http.createServer(handler); + server.listen(0, "127.0.0.1", () => { + const addr = server.address() as { port: number }; + resolve({ server, url: `http://127.0.0.1:${addr.port}` }); + }); + }); +} + +function closeServer(server: http.Server): Promise { + return new Promise((resolve) => server.close(() => resolve())); +} + +function createMockReqRes( + urlPath: string, + headers: Record = {}, +): { req: http.IncomingMessage; res: http.ServerResponse; getResponse: () => Promise } { + const chunks: Buffer[] = []; + let statusCode = 200; + + const req = { + method: "POST", + url: urlPath, + headers: { "content-type": "application/json", ...headers }, + } as unknown as http.IncomingMessage; + + const res = { + statusCode, + // eslint-disable-next-line @typescript-eslint/no-unused-vars + writeHead(status: number, hdrs?: Record) { + statusCode = status; + res.statusCode = status; + }, + end(data?: string | Buffer) { + if (data) chunks.push(Buffer.isBuffer(data) ? data : Buffer.from(data)); + }, + setHeader() {}, + } as unknown as http.ServerResponse; + + return { + req, + res, + getResponse: async () => Buffer.concat(chunks).toString(), + }; +} + +function makeTmpDir(): string { + return fs.mkdtempSync(path.join(os.tmpdir(), "aimock-mm-record-")); +} + +// --------------------------------------------------------------------------- +// Tests: buildFixtureResponse detection via proxyAndRecord +// --------------------------------------------------------------------------- + +describe("multimedia record: image response detection", () => { + it("detects OpenAI image generation response and saves image fixture", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + created: 1234567890, + data: [{ url: "https://example.com/img.png", revised_prompt: "a pretty sunset" }], + }), + ); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "dall-e-3", + messages: [{ role: "user", content: "sunset" }], + _endpointType: "image", + }; + + const { req, res } = createMockReqRes("/v1/images/generations"); + const proxied = await proxyAndRecord( + req, + res, + request, + "openai", + "/v1/images/generations", + fixtures, + { record, logger }, + ); + + expect(proxied).toBe(true); + expect(fixtures).toHaveLength(1); + const fixture = fixtures[0]; + expect(fixture.match.endpoint).toBe("image"); + expect(fixture.match.userMessage).toBe("sunset"); + + const response = fixture.response as { image?: { url?: string; revisedPrompt?: string } }; + expect(response.image).toBeDefined(); + expect(response.image!.url).toBe("https://example.com/img.png"); + expect(response.image!.revisedPrompt).toBe("a pretty sunset"); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); + + it("detects multi-image response", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + created: 1234567890, + data: [{ url: "https://example.com/1.png" }, { url: "https://example.com/2.png" }], + }), + ); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "dall-e-3", + messages: [{ role: "user", content: "cats" }], + _endpointType: "image", + }; + + const { req, res } = createMockReqRes("/v1/images/generations"); + await proxyAndRecord(req, res, request, "openai", "/v1/images/generations", fixtures, { + record, + logger, + }); + + const response = fixtures[0].response as { images?: Array<{ url?: string }> }; + expect(response.images).toHaveLength(2); + expect(response.images![0].url).toBe("https://example.com/1.png"); + expect(response.images![1].url).toBe("https://example.com/2.png"); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); + + it("detects Gemini Imagen response", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + predictions: [{ bytesBase64Encoded: "iVBORw0KGgo=", mimeType: "image/png" }], + }), + ); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "imagen", + messages: [{ role: "user", content: "dog" }], + _endpointType: "image", + }; + + const { req, res } = createMockReqRes("/v1beta/models/imagen:predict"); + await proxyAndRecord(req, res, request, "openai", "/v1beta/models/imagen:predict", fixtures, { + record, + logger, + }); + + const response = fixtures[0].response as { image?: { b64Json?: string } }; + expect(response.image).toBeDefined(); + expect(response.image!.b64Json).toBe("iVBORw0KGgo="); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); +}); + +describe("multimedia record: transcription response detection", () => { + it("detects OpenAI transcription response", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + task: "transcribe", + language: "english", + duration: 5.2, + text: "Hello world", + }), + ); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "whisper-1", + messages: [], + _endpointType: "transcription", + }; + + const { req, res } = createMockReqRes("/v1/audio/transcriptions"); + await proxyAndRecord(req, res, request, "openai", "/v1/audio/transcriptions", fixtures, { + record, + logger, + }); + + expect(fixtures).toHaveLength(1); + const response = fixtures[0].response as { + transcription?: { text: string; language?: string; duration?: number }; + }; + expect(response.transcription).toBeDefined(); + expect(response.transcription!.text).toBe("Hello world"); + expect(response.transcription!.language).toBe("english"); + expect(response.transcription!.duration).toBe(5.2); + expect(fixtures[0].match.endpoint).toBe("transcription"); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); + + it("detects transcription with words and segments", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + task: "transcribe", + language: "english", + duration: 2.0, + text: "Hi", + words: [{ word: "Hi", start: 0, end: 0.5 }], + segments: [{ id: 0, text: "Hi", start: 0, end: 2.0 }], + }), + ); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "whisper-1", + messages: [], + _endpointType: "transcription", + }; + + const { req, res } = createMockReqRes("/v1/audio/transcriptions"); + await proxyAndRecord(req, res, request, "openai", "/v1/audio/transcriptions", fixtures, { + record, + logger, + }); + + const response = fixtures[0].response as { + transcription?: { text: string; words?: unknown[]; segments?: unknown[] }; + }; + expect(response.transcription!.words).toHaveLength(1); + expect(response.transcription!.segments).toHaveLength(1); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); +}); + +describe("multimedia record: video response detection", () => { + it("detects completed video response", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + id: "vid_abc", + status: "completed", + url: "https://example.com/video.mp4", + }), + ); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "sora-2", + messages: [{ role: "user", content: "dancing cat" }], + _endpointType: "video", + }; + + const { req, res } = createMockReqRes("/v1/videos"); + await proxyAndRecord(req, res, request, "openai", "/v1/videos", fixtures, { record, logger }); + + expect(fixtures).toHaveLength(1); + const response = fixtures[0].response as { + video?: { id: string; status: string; url?: string }; + }; + expect(response.video).toBeDefined(); + expect(response.video!.id).toBe("vid_abc"); + expect(response.video!.status).toBe("completed"); + expect(response.video!.url).toBe("https://example.com/video.mp4"); + expect(fixtures[0].match.endpoint).toBe("video"); + expect(fixtures[0].match.userMessage).toBe("dancing cat"); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); + + it("detects in-progress video response", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ id: "vid_456", status: "in_progress" })); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "sora-2", + messages: [{ role: "user", content: "slow motion" }], + _endpointType: "video", + }; + + const { req, res } = createMockReqRes("/v1/videos"); + await proxyAndRecord(req, res, request, "openai", "/v1/videos", fixtures, { record, logger }); + + const response = fixtures[0].response as { + video?: { id: string; status: string }; + }; + expect(response.video!.id).toBe("vid_456"); + expect(response.video!.status).toBe("processing"); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); +}); + +describe("multimedia record: TTS audio response detection", () => { + it("detects binary audio response and saves as base64", async () => { + const fixturePath = makeTmpDir(); + const audioBytes = Buffer.from("fake-audio-content"); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "audio/mpeg" }); + res.end(audioBytes); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "tts-1", + messages: [{ role: "user", content: "hello world" }], + _endpointType: "speech", + }; + + const { req, res } = createMockReqRes("/v1/audio/speech"); + await proxyAndRecord(req, res, request, "openai", "/v1/audio/speech", fixtures, { + record, + logger, + }); + + expect(fixtures).toHaveLength(1); + const response = fixtures[0].response as { audio?: string }; + expect(response.audio).toBe(audioBytes.toString("base64")); + expect(fixtures[0].match.endpoint).toBe("speech"); + expect(fixtures[0].match.userMessage).toBe("hello world"); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); +}); + +describe("multimedia record: buildFixtureMatch endpoint inclusion", () => { + it("includes endpoint for image requests", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ created: 1, data: [{ url: "x.png" }] })); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "dall-e-3", + messages: [{ role: "user", content: "test" }], + _endpointType: "image", + }; + + const { req, res } = createMockReqRes("/v1/images/generations"); + await proxyAndRecord(req, res, request, "openai", "/v1/images/generations", fixtures, { + record, + logger, + }); + + expect(fixtures[0].match.endpoint).toBe("image"); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); + + it("does not include endpoint for chat requests", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + choices: [{ message: { content: "hi", role: "assistant" }, finish_reason: "stop" }], + }), + ); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "gpt-4o", + messages: [{ role: "user", content: "hello" }], + _endpointType: "chat", + }; + + const { req, res } = createMockReqRes("/v1/chat/completions"); + await proxyAndRecord(req, res, request, "openai", "/v1/chat/completions", fixtures, { + record, + logger, + }); + + expect(fixtures[0].match.endpoint).toBeUndefined(); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); + + it("does not include endpoint when _endpointType is absent", async () => { + const fixturePath = makeTmpDir(); + const { server, url } = await createUpstream((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + choices: [{ message: { content: "hi", role: "assistant" }, finish_reason: "stop" }], + }), + ); + }); + + try { + const fixtures: Fixture[] = []; + const record: RecordConfig = { providers: { openai: url }, fixturePath }; + const logger = new Logger("silent"); + const request: ChatCompletionRequest = { + model: "gpt-4o", + messages: [{ role: "user", content: "hello" }], + }; + + const { req, res } = createMockReqRes("/v1/chat/completions"); + await proxyAndRecord(req, res, request, "openai", "/v1/chat/completions", fixtures, { + record, + logger, + }); + + expect(fixtures[0].match.endpoint).toBeUndefined(); + } finally { + await closeServer(server); + fs.rmSync(fixturePath, { recursive: true, force: true }); + } + }); +}); diff --git a/src/__tests__/multimedia-types.test.ts b/src/__tests__/multimedia-types.test.ts new file mode 100644 index 0000000..1217ba2 --- /dev/null +++ b/src/__tests__/multimedia-types.test.ts @@ -0,0 +1,130 @@ +import { describe, test, expect } from "vitest"; +import { + isImageResponse, + isAudioResponse, + isTranscriptionResponse, + isVideoResponse, +} from "../helpers.js"; +import { matchFixture } from "../router.js"; +import type { Fixture, ChatCompletionRequest, FixtureResponse } from "../types.js"; + +describe("multimedia type guards", () => { + test("isImageResponse detects single image", () => { + const r: FixtureResponse = { image: { url: "https://example.com/img.png" } }; + expect(isImageResponse(r)).toBe(true); + }); + + test("isImageResponse detects multiple images", () => { + const r: FixtureResponse = { + images: [{ url: "https://example.com/1.png" }, { url: "https://example.com/2.png" }], + }; + expect(isImageResponse(r)).toBe(true); + }); + + test("isImageResponse rejects text response", () => { + const r: FixtureResponse = { content: "hello" }; + expect(isImageResponse(r)).toBe(false); + }); + + test("isAudioResponse detects audio", () => { + const r: FixtureResponse = { audio: "AAAA", format: "mp3" }; + expect(isAudioResponse(r)).toBe(true); + }); + + test("isAudioResponse rejects text response", () => { + const r: FixtureResponse = { content: "hello" }; + expect(isAudioResponse(r)).toBe(false); + }); + + test("isTranscriptionResponse detects transcription", () => { + const r: FixtureResponse = { transcription: { text: "hello" } }; + expect(isTranscriptionResponse(r)).toBe(true); + }); + + test("isTranscriptionResponse rejects text response", () => { + const r: FixtureResponse = { content: "hello" }; + expect(isTranscriptionResponse(r)).toBe(false); + }); + + test("isVideoResponse detects video", () => { + const r: FixtureResponse = { + video: { id: "v1", status: "completed", url: "https://example.com/v.mp4" }, + }; + expect(isVideoResponse(r)).toBe(true); + }); + + test("isVideoResponse rejects text response", () => { + const r: FixtureResponse = { content: "hello" }; + expect(isVideoResponse(r)).toBe(false); + }); +}); + +describe("endpoint filtering in matchFixture", () => { + test("fixture with endpoint: image only matches image requests", () => { + const fixtures: Fixture[] = [ + { + match: { userMessage: "guitar", endpoint: "image" }, + response: { image: { url: "img.png" } }, + }, + ]; + const chatReq: ChatCompletionRequest = { + model: "gpt-4", + messages: [{ role: "user", content: "guitar" }], + _endpointType: "chat", + }; + expect(matchFixture(fixtures, chatReq)).toBeNull(); + + const imageReq: ChatCompletionRequest = { + model: "dall-e-3", + messages: [{ role: "user", content: "guitar" }], + _endpointType: "image", + }; + expect(matchFixture(fixtures, imageReq)).toBe(fixtures[0]); + }); + + test("fixture without endpoint matches chat/embedding requests but not multimedia", () => { + const fixtures: Fixture[] = [ + { + match: { userMessage: "guitar" }, + response: { content: "Chat about guitars" }, + }, + ]; + // Chat requests match generic fixtures + const chatReq: ChatCompletionRequest = { + model: "gpt-4", + messages: [{ role: "user", content: "guitar" }], + _endpointType: "chat", + }; + expect(matchFixture(fixtures, chatReq)).toBe(fixtures[0]); + + // Image requests do NOT match generic chat fixtures (prevents 500s) + const imageReq: ChatCompletionRequest = { + model: "dall-e-3", + messages: [{ role: "user", content: "guitar" }], + _endpointType: "image", + }; + expect(matchFixture(fixtures, imageReq)).toBeNull(); + }); + + test("endpoint filtering works with sequenceIndex", () => { + const fixtures: Fixture[] = [ + { + match: { userMessage: "g", endpoint: "image", sequenceIndex: 0 }, + response: { image: { url: "1.png" } }, + }, + { + match: { userMessage: "g", endpoint: "image", sequenceIndex: 1 }, + response: { image: { url: "2.png" } }, + }, + ]; + const counts = new Map(); + const imageReq: ChatCompletionRequest = { + model: "dall-e-3", + messages: [{ role: "user", content: "g" }], + _endpointType: "image", + }; + + const first = matchFixture(fixtures, imageReq, counts); + expect(first).toBe(fixtures[0]); + }); +}); diff --git a/src/__tests__/multimedia.test.ts b/src/__tests__/multimedia.test.ts new file mode 100644 index 0000000..68a1265 --- /dev/null +++ b/src/__tests__/multimedia.test.ts @@ -0,0 +1,447 @@ +import { describe, test, expect } from "vitest"; +import { LLMock } from "../llmock.js"; + +describe("image generation", () => { + test("image generation returns fixture (OpenAI format)", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "a guitar", endpoint: "image" }, + response: { + image: { url: "https://example.com/guitar.png", revisedPrompt: "a guitar on display" }, + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/images/generations`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ model: "dall-e-3", prompt: "a guitar", n: 1 }), + }); + expect(res.status).toBe(200); + const data = await res.json(); + expect(data.data[0].url).toBe("https://example.com/guitar.png"); + expect(data.data[0].revised_prompt).toBe("a guitar on display"); + expect(typeof data.created).toBe("number"); + await mock.stop(); + }); + + test("multiple images", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "guitars", endpoint: "image" }, + response: { + images: [{ url: "https://example.com/1.png" }, { url: "https://example.com/2.png" }], + }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/images/generations`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ model: "dall-e-3", prompt: "guitars", n: 2 }), + }); + const data = await res.json(); + expect(data.data).toHaveLength(2); + expect(data.data[0].url).toBe("https://example.com/1.png"); + expect(data.data[1].url).toBe("https://example.com/2.png"); + await mock.stop(); + }); + + test("base64 image response", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "a cat", endpoint: "image" }, + response: { image: { b64Json: "iVBORw0KGgo=" } }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/images/generations`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ model: "dall-e-3", prompt: "a cat", response_format: "b64_json" }), + }); + const data = await res.json(); + expect(data.data[0].b64_json).toBe("iVBORw0KGgo="); + await mock.stop(); + }); + + test("Gemini Imagen endpoint", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "a guitar", endpoint: "image" }, + response: { image: { b64Json: "iVBORw0KGgo=" } }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1beta/models/imagen-3.0-generate-002:predict`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ instances: [{ prompt: "a guitar" }], parameters: { sampleCount: 1 } }), + }); + expect(res.status).toBe(200); + const data = await res.json(); + expect(data.predictions[0].bytesBase64Encoded).toBe("iVBORw0KGgo="); + expect(data.predictions[0].mimeType).toBe("image/png"); + await mock.stop(); + }); +}); + +describe("audio transcription", () => { + test("transcription returns text", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { endpoint: "transcription" }, + response: { transcription: { text: "Welcome", language: "english", duration: 2.5 } }, + }); + await mock.start(); + + const formData = new FormData(); + formData.append("file", new Blob(["fake audio"], { type: "audio/wav" }), "test.wav"); + formData.append("model", "whisper-1"); + + const res = await fetch(`${mock.url}/v1/audio/transcriptions`, { + method: "POST", + headers: { Authorization: "Bearer test" }, + body: formData, + }); + expect(res.status).toBe(200); + const data = await res.json(); + expect(data.text).toBe("Welcome"); + await mock.stop(); + }); + + test("verbose transcription includes words and segments", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { endpoint: "transcription" }, + response: { + transcription: { + text: "Welcome", + language: "english", + duration: 2.5, + words: [{ word: "Welcome", start: 0.0, end: 0.5 }], + segments: [{ id: 0, text: "Welcome", start: 0.0, end: 2.5 }], + }, + }, + }); + await mock.start(); + + const formData = new FormData(); + formData.append("file", new Blob(["fake audio"]), "test.wav"); + formData.append("model", "whisper-1"); + formData.append("response_format", "verbose_json"); + + const res = await fetch(`${mock.url}/v1/audio/transcriptions`, { + method: "POST", + headers: { Authorization: "Bearer test" }, + body: formData, + }); + const data = await res.json(); + expect(data.task).toBe("transcribe"); + expect(data.words).toHaveLength(1); + expect(data.segments).toHaveLength(1); + await mock.stop(); + }); +}); + +describe("video generation", () => { + test("video creation and status check", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "a guitar", endpoint: "video" }, + response: { + video: { id: "vid_123", status: "completed", url: "https://example.com/video.mp4" }, + }, + }); + await mock.start(); + + // Create + const create = await fetch(`${mock.url}/v1/videos`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ model: "sora-2", prompt: "a guitar" }), + }); + const job = await create.json(); + expect(job.id).toBe("vid_123"); + expect(job.status).toBe("completed"); + + // Status check + const status = await fetch(`${mock.url}/v1/videos/vid_123`, { + headers: { Authorization: "Bearer test" }, + }); + const result = await status.json(); + expect(result.status).toBe("completed"); + expect(result.url).toBe("https://example.com/video.mp4"); + await mock.stop(); + }); + + test("video processing returns minimal response then status on GET", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "slow motion", endpoint: "video" }, + response: { + video: { id: "vid_456", status: "processing", url: "https://example.com/slow.mp4" }, + }, + }); + await mock.start(); + + const create = await fetch(`${mock.url}/v1/videos`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ model: "sora-2", prompt: "slow motion" }), + }); + const job = await create.json(); + expect(job.id).toBe("vid_456"); + expect(job.status).toBe("processing"); + expect(job.url).toBeUndefined(); + + const status = await fetch(`${mock.url}/v1/videos/vid_456`, { + headers: { Authorization: "Bearer test" }, + }); + const result = await status.json(); + expect(result.id).toBe("vid_456"); + expect(result.status).toBe("processing"); + await mock.stop(); + }); + + test("video status 404 for unknown id", async () => { + const mock = new LLMock({ port: 0 }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/videos/unknown`, { + headers: { Authorization: "Bearer test" }, + }); + expect(res.status).toBe(404); + await mock.stop(); + }); +}); + +describe("convenience methods", () => { + test("onImage creates fixture with correct endpoint", async () => { + const mock = new LLMock({ port: 0 }); + mock.onImage("sunset", { image: { url: "sunset.png" } }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/images/generations`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer t" }, + body: JSON.stringify({ prompt: "sunset" }), + }); + expect((await res.json()).data[0].url).toBe("sunset.png"); + await mock.stop(); + }); + + test("onSpeech creates fixture with correct endpoint", async () => { + const mock = new LLMock({ port: 0 }); + mock.onSpeech("hello", { audio: "AAAA", format: "mp3" }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/audio/speech`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer t" }, + body: JSON.stringify({ input: "hello", model: "tts-1", voice: "alloy" }), + }); + expect(res.headers.get("content-type")).toBe("audio/mpeg"); + await mock.stop(); + }); + + test("onTranscription creates fixture with correct endpoint", async () => { + const mock = new LLMock({ port: 0 }); + mock.onTranscription({ transcription: { text: "hello world" } }); + await mock.start(); + + const formData = new FormData(); + formData.append("file", new Blob(["audio"]), "test.wav"); + formData.append("model", "whisper-1"); + const res = await fetch(`${mock.url}/v1/audio/transcriptions`, { + method: "POST", + headers: { Authorization: "Bearer t" }, + body: formData, + }); + expect((await res.json()).text).toBe("hello world"); + await mock.stop(); + }); + + test("onVideo creates fixture with correct endpoint", async () => { + const mock = new LLMock({ port: 0 }); + mock.onVideo("dancing", { video: { id: "v1", status: "completed", url: "dance.mp4" } }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/videos`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer t" }, + body: JSON.stringify({ prompt: "dancing" }), + }); + expect((await res.json()).id).toBe("v1"); + await mock.stop(); + }); +}); + +describe("X-Test-Id isolation", () => { + test("X-Test-Id works for image endpoint", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "g", endpoint: "image", sequenceIndex: 0 }, + response: { image: { url: "1.png" } }, + }); + mock.addFixture({ + match: { userMessage: "g", endpoint: "image", sequenceIndex: 1 }, + response: { image: { url: "2.png" } }, + }); + await mock.start(); + + const req = (testId: string) => + fetch(`${mock.url}/v1/images/generations`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer t", + "X-Test-Id": testId, + }, + body: JSON.stringify({ model: "dall-e-3", prompt: "g" }), + }).then((r) => r.json()); + + const [a, b] = await Promise.all([req("A"), req("B")]); + expect(a.data[0].url).toBe("1.png"); + expect(b.data[0].url).toBe("1.png"); // both get sequenceIndex 0 + + await mock.stop(); + }); +}); + +describe("endpoint cross-matching prevention", () => { + test("image fixture does not match chat request", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "guitar", endpoint: "image" }, + response: { image: { url: "img.png" } }, + }); + mock.addFixture({ + match: { userMessage: "guitar" }, + response: { content: "Chat about guitars" }, + }); + await mock.start(); + + // Chat request should NOT match the image fixture + const chat = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer t" }, + body: JSON.stringify({ + model: "gpt-4o", + messages: [{ role: "user", content: "guitar" }], + stream: false, + }), + }); + const chatData = await chat.json(); + expect(chatData.choices[0].message.content).toBe("Chat about guitars"); + + // Image request should match the image fixture + const img = await fetch(`${mock.url}/v1/images/generations`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer t" }, + body: JSON.stringify({ model: "dall-e-3", prompt: "guitar" }), + }); + const imgData = await img.json(); + expect(imgData.data[0].url).toBe("img.png"); + + await mock.stop(); + }); +}); + +describe("endpoint backfill on existing handlers", () => { + test("fixture with endpoint: chat matches chat completions", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "hello", endpoint: "chat" }, + response: { content: "Hi there" }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer t" }, + body: JSON.stringify({ + model: "gpt-4o", + messages: [{ role: "user", content: "hello" }], + stream: false, + }), + }); + const data = await res.json(); + expect(data.choices[0].message.content).toBe("Hi there"); + await mock.stop(); + }); + + test("fixture with endpoint: embedding matches embeddings", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { inputText: "test input", endpoint: "embedding" }, + response: { embedding: [0.1, 0.2, 0.3] }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/embeddings`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer t" }, + body: JSON.stringify({ model: "text-embedding-3-small", input: "test input" }), + }); + const data = await res.json(); + expect(data.data[0].embedding).toEqual([0.1, 0.2, 0.3]); + await mock.stop(); + }); +}); + +describe("text-to-speech", () => { + test("TTS returns audio bytes with correct content-type", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "hello world", endpoint: "speech" }, + response: { audio: "AAAA", format: "mp3" }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/audio/speech`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ model: "tts-1", input: "hello world", voice: "alloy" }), + }); + expect(res.status).toBe(200); + expect(res.headers.get("content-type")).toBe("audio/mpeg"); + const buffer = await res.arrayBuffer(); + expect(buffer.byteLength).toBeGreaterThan(0); + await mock.stop(); + }); + + test("TTS respects format for content-type", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "test", endpoint: "speech" }, + response: { audio: "AAAA", format: "opus" }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/audio/speech`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ model: "tts-1", input: "test", voice: "alloy" }), + }); + expect(res.headers.get("content-type")).toBe("audio/opus"); + await mock.stop(); + }); + + test("TTS defaults to mp3 when no format specified", async () => { + const mock = new LLMock({ port: 0 }); + mock.addFixture({ + match: { userMessage: "default", endpoint: "speech" }, + response: { audio: "AAAA" }, + }); + await mock.start(); + + const res = await fetch(`${mock.url}/v1/audio/speech`, { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test" }, + body: JSON.stringify({ model: "tts-1", input: "default", voice: "alloy" }), + }); + expect(res.headers.get("content-type")).toBe("audio/mpeg"); + await mock.stop(); + }); +}); diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts index 552f85c..9b3469e 100644 --- a/src/bedrock-converse.ts +++ b/src/bedrock-converse.ts @@ -271,6 +271,7 @@ export async function handleConverse( } const completionReq = converseToCompletionRequest(converseReq, modelId); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( @@ -480,6 +481,7 @@ export async function handleConverseStream( } const completionReq = converseToCompletionRequest(converseReq, modelId); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( diff --git a/src/bedrock.ts b/src/bedrock.ts index 169aee6..fabd86a 100644 --- a/src/bedrock.ts +++ b/src/bedrock.ts @@ -315,6 +315,7 @@ export async function handleBedrock( // Convert to ChatCompletionRequest for fixture matching const completionReq = bedrockToCompletionRequest(bedrockReq, modelId); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( @@ -671,6 +672,7 @@ export async function handleBedrockStream( } const completionReq = bedrockToCompletionRequest(bedrockReq, modelId); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( diff --git a/src/cohere.ts b/src/cohere.ts index 842b3ae..1d1dccf 100644 --- a/src/cohere.ts +++ b/src/cohere.ts @@ -465,6 +465,7 @@ export async function handleCohere( // Convert to ChatCompletionRequest for fixture matching const completionReq = cohereToCompletionRequest(cohereReq); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( diff --git a/src/embeddings.ts b/src/embeddings.ts index 1d80a9b..6d1f947 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -85,6 +85,7 @@ export async function handleEmbeddings( model: embeddingReq.model, messages: [], embeddingInput: combinedInput, + _endpointType: "embedding", }; const testId = getTestId(req); diff --git a/src/gemini.ts b/src/gemini.ts index 0b313dd..3c6529d 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -504,6 +504,7 @@ export async function handleGemini( // Convert to ChatCompletionRequest for fixture matching const completionReq = geminiToCompletionRequest(geminiReq, model, streaming); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( diff --git a/src/helpers.ts b/src/helpers.ts index dac9160..325ac11 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -7,6 +7,10 @@ import type { ContentWithToolCallsResponse, ErrorResponse, EmbeddingResponse, + ImageResponse, + AudioResponse, + TranscriptionResponse, + VideoResponse, SSEChunk, ToolCall, ChatCompletion, @@ -74,6 +78,33 @@ export function isEmbeddingResponse(r: FixtureResponse): r is EmbeddingResponse return "embedding" in r && Array.isArray((r as EmbeddingResponse).embedding); } +export function isImageResponse(r: FixtureResponse): r is ImageResponse { + return ( + ("image" in r && r.image != null) || + ("images" in r && Array.isArray((r as ImageResponse).images)) + ); +} + +export function isAudioResponse(r: FixtureResponse): r is AudioResponse { + return "audio" in r && typeof (r as AudioResponse).audio === "string"; +} + +export function isTranscriptionResponse(r: FixtureResponse): r is TranscriptionResponse { + return ( + "transcription" in r && + (r as TranscriptionResponse).transcription != null && + typeof (r as TranscriptionResponse).transcription === "object" + ); +} + +export function isVideoResponse(r: FixtureResponse): r is VideoResponse { + return ( + "video" in r && + (r as VideoResponse).video != null && + typeof (r as VideoResponse).video === "object" + ); +} + export function buildTextChunks( content: string, model: string, diff --git a/src/images.ts b/src/images.ts new file mode 100644 index 0000000..cb9de09 --- /dev/null +++ b/src/images.ts @@ -0,0 +1,214 @@ +import type * as http from "node:http"; +import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js"; +import { isImageResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js"; +import { matchFixture } from "./router.js"; +import { writeErrorResponse } from "./sse-writer.js"; +import type { Journal } from "./journal.js"; +import { applyChaos } from "./chaos.js"; +import { proxyAndRecord } from "./recorder.js"; + +interface OpenAIImageRequest { + model?: string; + prompt: string; + n?: number; + size?: string; + response_format?: "url" | "b64_json"; + [key: string]: unknown; +} + +interface GeminiPredictRequest { + instances: Array<{ prompt: string }>; + parameters?: { sampleCount?: number }; + [key: string]: unknown; +} + +function buildSyntheticRequest(model: string, prompt: string): ChatCompletionRequest { + return { + model, + messages: [{ role: "user", content: prompt }], + _endpointType: "image", + }; +} + +export async function handleImages( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + fixtures: Fixture[], + journal: Journal, + defaults: HandlerDefaults, + setCorsHeaders: (res: http.ServerResponse) => void, + format: "openai" | "gemini" = "openai", + geminiModel?: string, +): Promise { + setCorsHeaders(res); + const path = req.url ?? "/v1/images/generations"; + const method = req.method ?? "POST"; + + let model: string; + let prompt: string; + + try { + const body = JSON.parse(raw); + if (format === "gemini") { + const geminiReq = body as GeminiPredictRequest; + prompt = geminiReq.instances?.[0]?.prompt ?? ""; + model = geminiModel ?? "imagen"; + } else { + const openaiReq = body as OpenAIImageRequest; + prompt = openaiReq.prompt ?? ""; + model = openaiReq.model ?? "dall-e-3"; + } + } catch { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: null, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { message: "Malformed JSON", type: "invalid_request_error", code: "invalid_json" }, + }), + ); + return; + } + + const syntheticReq = buildSyntheticRequest(model, prompt); + const testId = getTestId(req); + const fixture = matchFixture( + fixtures, + syntheticReq, + journal.getFixtureMatchCountsForTest(testId), + defaults.requestTransform, + ); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures, testId); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { method, path, headers: flattenHeaders(req.headers), body: syntheticReq }, + defaults.registry, + defaults.logger, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + syntheticReq, + format === "gemini" ? "gemini" : "openai", + req.url ?? "/v1/images/generations", + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { message: strictMessage, type: "invalid_request_error", code: "no_fixture_match" }, + }), + ); + return; + } + + const response = fixture.response; + + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + if (!isImageResponse(response)) { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { message: "Fixture response is not an image type", type: "server_error" }, + }), + ); + return; + } + + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: 200, fixture }, + }); + + // Normalize to array of image items + const items = response.images ?? (response.image ? [response.image] : []); + + if (format === "gemini") { + const predictions = items.map((item) => ({ + bytesBase64Encoded: item.b64Json ?? "", + mimeType: "image/png" as const, + })); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ predictions })); + } else { + const data = items.map((item) => { + const entry: Record = {}; + if (item.url) entry.url = item.url; + if (item.b64Json) entry.b64_json = item.b64Json; + if (item.revisedPrompt) entry.revised_prompt = item.revisedPrompt; + return entry; + }); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ created: Math.floor(Date.now() / 1000), data })); + } +} diff --git a/src/index.ts b/src/index.ts index a5e9b29..4cb0cfb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -69,6 +69,13 @@ export { handleWebSocketResponses } from "./ws-responses.js"; export { handleWebSocketRealtime } from "./ws-realtime.js"; export { handleWebSocketGeminiLive } from "./ws-gemini-live.js"; +// Multimedia handlers +export { handleImages } from "./images.js"; +export { handleSpeech } from "./speech.js"; +export { handleTranscription } from "./transcription.js"; +export { handleVideoCreate, handleVideoStatus } from "./video.js"; +export type { VideoStateMap } from "./video.js"; + // Helpers export { flattenHeaders, @@ -79,6 +86,10 @@ export { buildTextChunks, buildToolCallChunks, isEmbeddingResponse, + isImageResponse, + isAudioResponse, + isTranscriptionResponse, + isVideoResponse, generateDeterministicEmbedding, buildEmbeddingResponse, } from "./helpers.js"; @@ -249,4 +260,9 @@ export type { ChatCompletion, ChatCompletionChoice, ChatCompletionMessage, + ImageItem, + ImageResponse, + AudioResponse, + TranscriptionResponse, + VideoResponse, } from "./types.js"; diff --git a/src/llmock.ts b/src/llmock.ts index 5bb532b..973be71 100644 --- a/src/llmock.ts +++ b/src/llmock.ts @@ -1,4 +1,5 @@ import type { + AudioResponse, ChaosConfig, EmbeddingFixtureOpts, Fixture, @@ -6,9 +7,12 @@ import type { FixtureMatch, FixtureOpts, FixtureResponse, + ImageResponse, MockServerOptions, Mountable, RecordConfig, + TranscriptionResponse, + VideoResponse, } from "./types.js"; import { createServer, type ServerInstance } from "./server.js"; import { @@ -124,6 +128,34 @@ export class LLMock { return this.on({ toolCallId: id }, response, opts); } + onImage(prompt: string | RegExp, response: ImageResponse): this { + return this.addFixture({ + match: { userMessage: prompt, endpoint: "image" }, + response, + }); + } + + onSpeech(input: string | RegExp, response: AudioResponse): this { + return this.addFixture({ + match: { userMessage: input, endpoint: "speech" }, + response, + }); + } + + onTranscription(response: TranscriptionResponse): this { + return this.addFixture({ + match: { endpoint: "transcription" }, + response, + }); + } + + onVideo(prompt: string | RegExp, response: VideoResponse): this { + return this.addFixture({ + match: { userMessage: prompt, endpoint: "video" }, + response, + }); + } + // ---- Service mock convenience methods ---- onSearch(pattern: string | RegExp, results: SearchResult[]): this { @@ -252,6 +284,7 @@ export class LLMock { this.moderationFixtures.length = 0; if (this.serverInstance) { this.serverInstance.journal.clear(); + this.serverInstance.videoStates.clear(); } return this; } diff --git a/src/messages.ts b/src/messages.ts index 9b77e85..c58d85a 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -678,6 +678,7 @@ export async function handleMessages( // Convert to ChatCompletionRequest for fixture matching const completionReq = claudeToCompletionRequest(claudeReq); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( diff --git a/src/ollama.ts b/src/ollama.ts index 1692054..ac0c987 100644 --- a/src/ollama.ts +++ b/src/ollama.ts @@ -388,6 +388,7 @@ export async function handleOllama( // Convert to ChatCompletionRequest for fixture matching const completionReq = ollamaToCompletionRequest(ollamaReq); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( @@ -646,6 +647,7 @@ export async function handleOllamaGenerate( // Convert to ChatCompletionRequest for fixture matching const completionReq = ollamaGenerateToCompletionRequest(generateReq); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( diff --git a/src/recorder.ts b/src/recorder.ts index d5348a1..1be0156 100644 --- a/src/recorder.ts +++ b/src/recorder.ts @@ -134,7 +134,21 @@ export async function proxyAndRecord( let fixtureResponse: FixtureResponse; - if (collapsed) { + // TTS response — binary audio, not JSON + const isAudioResponse = ctString.toLowerCase().startsWith("audio/"); + if (isAudioResponse && rawBuffer.length > 0) { + // Derive format from Content-Type (audio/mpeg→mp3, audio/opus→opus, etc.) + const audioFormat = ctString + .toLowerCase() + .replace("audio/", "") + .replace("mpeg", "mp3") + .split(";")[0] + .trim(); + fixtureResponse = { + audio: rawBuffer.toString("base64"), + ...(audioFormat && audioFormat !== "mp3" ? { format: audioFormat } : {}), + }; + } else if (collapsed) { // Streaming response — use collapsed result defaults.logger.warn(`Streaming response detected (${ctString}) — collapsing to fixture`); if (collapsed.truncated) { @@ -348,6 +362,69 @@ function buildFixtureResponse( // Corrupted base64 or non-float32 data — fall through to error } } + // OpenAI image generation: { created, data: [{ url, b64_json, revised_prompt }] } + if (first.url || first.b64_json) { + const images = (obj.data as Array>).map((item) => ({ + ...(item.url ? { url: String(item.url) } : {}), + ...(item.b64_json ? { b64Json: String(item.b64_json) } : {}), + ...(item.revised_prompt ? { revisedPrompt: String(item.revised_prompt) } : {}), + })); + if (images.length === 1) { + return { image: images[0] }; + } + return { images }; + } + } + + // Gemini Imagen: { predictions: [...] } + if (Array.isArray(obj.predictions)) { + const images = (obj.predictions as Array>).map((p) => ({ + ...(p.bytesBase64Encoded ? { b64Json: String(p.bytesBase64Encoded) } : {}), + ...(p.mimeType ? { mimeType: String(p.mimeType) } : {}), + })); + if (images.length === 1) { + return { image: images[0] }; + } + return { images }; + } + + // OpenAI transcription: { text: "...", ... } + if ( + typeof obj.text === "string" && + (obj.task === "transcribe" || obj.language !== undefined || obj.duration !== undefined) + ) { + return { + transcription: { + text: obj.text as string, + ...(obj.language ? { language: String(obj.language) } : {}), + ...(obj.duration !== undefined ? { duration: Number(obj.duration) } : {}), + ...(Array.isArray(obj.words) ? { words: obj.words } : {}), + ...(Array.isArray(obj.segments) ? { segments: obj.segments } : {}), + }, + }; + } + + // OpenAI video generation: { id, status, ... } + if ( + typeof obj.id === "string" && + typeof obj.status === "string" && + (obj.status === "completed" || obj.status === "in_progress" || obj.status === "failed") + ) { + if (obj.status === "completed" && obj.url) { + return { + video: { + id: String(obj.id), + status: "completed" as const, + url: String(obj.url), + }, + }; + } + return { + video: { + id: String(obj.id), + status: obj.status === "failed" ? ("failed" as const) : ("processing" as const), + }, + }; } // Direct embedding: { embedding: [...] } @@ -491,23 +568,34 @@ function buildFixtureResponse( /** * Derive fixture match criteria from the original request. */ +type EndpointType = "chat" | "image" | "speech" | "transcription" | "video" | "embedding"; + function buildFixtureMatch(request: ChatCompletionRequest): { userMessage?: string; inputText?: string; + endpoint?: EndpointType; } { + const match: { userMessage?: string; inputText?: string; endpoint?: EndpointType } = {}; + + // Include endpoint type for multimedia fixtures + if (request._endpointType && request._endpointType !== "chat") { + match.endpoint = request._endpointType as EndpointType; + } + // Embedding request if (request.embeddingInput) { - return { inputText: request.embeddingInput }; + match.inputText = request.embeddingInput; + return match; } - // Chat request — match on the last user message + // Chat/multimedia request — match on the last user message const lastUser = getLastMessageByRole(request.messages ?? [], "user"); if (lastUser) { const text = getTextContent(lastUser.content); if (text) { - return { userMessage: text }; + match.userMessage = text; } } - return {}; + return match; } diff --git a/src/responses.ts b/src/responses.ts index 126e997..7d6946e 100644 --- a/src/responses.ts +++ b/src/responses.ts @@ -803,6 +803,7 @@ export async function handleResponses( // Convert to ChatCompletionRequest for fixture matching const completionReq = responsesToCompletionRequest(responsesReq); + completionReq._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( diff --git a/src/router.ts b/src/router.ts index efc79c1..f235d50 100644 --- a/src/router.ts +++ b/src/router.ts @@ -1,4 +1,10 @@ import type { ChatCompletionRequest, ChatMessage, ContentPart, Fixture } from "./types.js"; +import { + isImageResponse, + isAudioResponse, + isTranscriptionResponse, + isVideoResponse, +} from "./helpers.js"; export function getLastMessageByRole(messages: ChatMessage[], role: string): ChatMessage | null { for (let i = messages.length - 1; i >= 0; i--) { @@ -41,6 +47,26 @@ export function matchFixture( if (!match.predicate(req)) continue; } + // endpoint — bidirectional filtering: + // 1. If fixture has endpoint set, only match requests of that type + // 2. If request has _endpointType but fixture doesn't, skip fixtures + // whose response type is incompatible (prevents generic chat fixtures + // from matching image/speech/video requests and causing 500s) + const reqEndpoint = effective._endpointType as string | undefined; + if (match.endpoint !== undefined) { + if (match.endpoint !== reqEndpoint) continue; + } else if (reqEndpoint && reqEndpoint !== "chat" && reqEndpoint !== "embedding") { + // Fixture has no endpoint restriction but request is multimedia — + // only match if the response type is compatible + const r = fixture.response; + const compatible = + (reqEndpoint === "image" && isImageResponse(r)) || + (reqEndpoint === "speech" && isAudioResponse(r)) || + (reqEndpoint === "transcription" && isTranscriptionResponse(r)) || + (reqEndpoint === "video" && isVideoResponse(r)); + if (!compatible) continue; + } + // userMessage — match against the last user message content if (match.userMessage !== undefined) { const msg = getLastMessageByRole(effective.messages, "user"); diff --git a/src/server.ts b/src/server.ts index 65bd499..f19f3b5 100644 --- a/src/server.ts +++ b/src/server.ts @@ -33,6 +33,10 @@ import { handleGemini } from "./gemini.js"; import { handleBedrock, handleBedrockStream } from "./bedrock.js"; import { handleConverse, handleConverseStream } from "./bedrock-converse.js"; import { handleEmbeddings } from "./embeddings.js"; +import { handleImages } from "./images.js"; +import { handleSpeech } from "./speech.js"; +import { handleTranscription } from "./transcription.js"; +import { handleVideoCreate, handleVideoStatus, type VideoStateMap } from "./video.js"; import { handleOllama, handleOllamaGenerate } from "./ollama.js"; import { handleCohere } from "./cohere.js"; import { handleSearch, type SearchFixture } from "./search.js"; @@ -52,6 +56,7 @@ export interface ServerInstance { journal: Journal; url: string; defaults: HandlerDefaults; + videoStates: VideoStateMap; } const COMPLETIONS_PATH = "/v1/chat/completions"; @@ -65,6 +70,12 @@ const COHERE_CHAT_PATH = "/v2/chat"; const SEARCH_PATH = "/search"; const RERANK_PATH = "/v2/rerank"; const MODERATIONS_PATH = "/v1/moderations"; +const IMAGES_PATH = "/v1/images/generations"; +const SPEECH_PATH = "/v1/audio/speech"; +const TRANSCRIPTIONS_PATH = "/v1/audio/transcriptions"; +const VIDEOS_PATH = "/v1/videos"; +const VIDEOS_STATUS_RE = /^\/v1\/videos\/([^/]+)$/; +const GEMINI_PREDICT_RE = /^\/v1beta\/models\/([^:]+):predict$/; const DEFAULT_CHUNK_SIZE = 20; const GEMINI_PATH_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/; @@ -140,6 +151,7 @@ async function handleControlAPI( pathname: string, fixtures: Fixture[], journal: Journal, + videoStates: VideoStateMap, ): Promise { if (!pathname.startsWith(CONTROL_PREFIX)) return false; @@ -213,6 +225,7 @@ async function handleControlAPI( if (subPath === "/reset" && req.method === "POST") { fixtures.length = 0; journal.clear(); + videoStates.clear(); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify({ reset: true })); return true; @@ -344,6 +357,7 @@ async function handleCompletions( } // Match fixture + body._endpointType = "chat"; const testId = getTestId(req); const fixture = matchFixture( fixtures, @@ -633,6 +647,7 @@ export async function createServer( } const journal = new Journal(); + const videoStates: VideoStateMap = new Map(); // Share journal and metrics registry with mounted services if (mounts) { @@ -703,7 +718,7 @@ export async function createServer( // Control API — must be checked before mounts and path rewrites if (pathname.startsWith(CONTROL_PREFIX)) { - await handleControlAPI(req, res, pathname, fixtures, journal); + await handleControlAPI(req, res, pathname, fixtures, journal, videoStates); return; } @@ -948,6 +963,136 @@ export async function createServer( return; } + // POST /v1/images/generations — OpenAI Image Generation API + if (pathname === IMAGES_PATH && req.method === "POST") { + readBody(req) + .then((raw) => handleImages(req, res, raw, fixtures, journal, defaults, setCorsHeaders)) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + + // POST /v1/audio/speech — OpenAI TTS API + if (pathname === SPEECH_PATH && req.method === "POST") { + readBody(req) + .then((raw) => handleSpeech(req, res, raw, fixtures, journal, defaults, setCorsHeaders)) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + + // POST /v1/audio/transcriptions — OpenAI Transcription API + if (pathname === TRANSCRIPTIONS_PATH && req.method === "POST") { + readBody(req) + .then((raw) => + handleTranscription(req, res, raw, fixtures, journal, defaults, setCorsHeaders), + ) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + + // POST /v1/videos — Video Generation API + if (pathname === VIDEOS_PATH && req.method === "POST") { + readBody(req) + .then((raw) => + handleVideoCreate( + req, + res, + raw, + fixtures, + journal, + defaults, + setCorsHeaders, + videoStates, + ), + ) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + + // GET /v1/videos/{id} — Video Status Check + const videoStatusMatch = pathname.match(VIDEOS_STATUS_RE); + if (videoStatusMatch && req.method === "GET") { + const videoId = videoStatusMatch[1]; + handleVideoStatus(req, res, videoId, journal, setCorsHeaders, videoStates); + return; + } + + // POST /v1beta/models/{model}:predict — Gemini Imagen API + const geminiPredictMatch = pathname.match(GEMINI_PREDICT_RE); + if (geminiPredictMatch && req.method === "POST") { + const predictModel = geminiPredictMatch[1]; + readBody(req) + .then((raw) => + handleImages( + req, + res, + raw, + fixtures, + journal, + defaults, + setCorsHeaders, + "gemini", + predictModel, + ), + ) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + // POST /v1beta/models/{model}:(generateContent|streamGenerateContent) — Google Gemini const geminiMatch = pathname.match(GEMINI_PATH_RE); if (geminiMatch && req.method === "POST") { @@ -1466,7 +1611,7 @@ export async function createServer( } } - resolve({ server, journal, url, defaults }); + resolve({ server, journal, url, defaults, videoStates }); }); }); } diff --git a/src/speech.ts b/src/speech.ts new file mode 100644 index 0000000..4245f72 --- /dev/null +++ b/src/speech.ts @@ -0,0 +1,186 @@ +import type * as http from "node:http"; +import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js"; +import { isAudioResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js"; +import { matchFixture } from "./router.js"; +import { writeErrorResponse } from "./sse-writer.js"; +import type { Journal } from "./journal.js"; +import { applyChaos } from "./chaos.js"; +import { proxyAndRecord } from "./recorder.js"; + +interface SpeechRequest { + model?: string; + input: string; + voice?: string; + response_format?: string; + speed?: number; + [key: string]: unknown; +} + +const FORMAT_TO_CONTENT_TYPE: Record = { + mp3: "audio/mpeg", + opus: "audio/opus", + aac: "audio/aac", + flac: "audio/flac", + wav: "audio/wav", + pcm: "audio/pcm", +}; + +export async function handleSpeech( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + fixtures: Fixture[], + journal: Journal, + defaults: HandlerDefaults, + setCorsHeaders: (res: http.ServerResponse) => void, +): Promise { + setCorsHeaders(res); + const path = req.url ?? "/v1/audio/speech"; + const method = req.method ?? "POST"; + + let speechReq: SpeechRequest; + try { + speechReq = JSON.parse(raw) as SpeechRequest; + } catch { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: null, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { message: "Malformed JSON", type: "invalid_request_error", code: "invalid_json" }, + }), + ); + return; + } + + const syntheticReq: ChatCompletionRequest = { + model: speechReq.model ?? "tts-1", + messages: [{ role: "user", content: speechReq.input }], + _endpointType: "speech", + }; + + const testId = getTestId(req); + const fixture = matchFixture( + fixtures, + syntheticReq, + journal.getFixtureMatchCountsForTest(testId), + defaults.requestTransform, + ); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures, testId); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { method, path, headers: flattenHeaders(req.headers), body: syntheticReq }, + defaults.registry, + defaults.logger, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + syntheticReq, + "openai", + req.url ?? "/v1/audio/speech", + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { message: strictMessage, type: "invalid_request_error", code: "no_fixture_match" }, + }), + ); + return; + } + + const response = fixture.response; + + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + if (!isAudioResponse(response)) { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { message: "Fixture response is not an audio type", type: "server_error" }, + }), + ); + return; + } + + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: 200, fixture }, + }); + + const format = response.format ?? "mp3"; + const contentType = FORMAT_TO_CONTENT_TYPE[format] ?? "audio/mpeg"; + const audioBytes = Buffer.from(response.audio, "base64"); + + res.writeHead(200, { "Content-Type": contentType }); + res.end(audioBytes); +} diff --git a/src/transcription.ts b/src/transcription.ts new file mode 100644 index 0000000..affedcc --- /dev/null +++ b/src/transcription.ts @@ -0,0 +1,184 @@ +import type * as http from "node:http"; +import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js"; +import { isTranscriptionResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js"; +import { matchFixture } from "./router.js"; +import { writeErrorResponse } from "./sse-writer.js"; +import type { Journal } from "./journal.js"; +import { applyChaos } from "./chaos.js"; +import { proxyAndRecord } from "./recorder.js"; + +/** + * Extract a named field value from a multipart/form-data body. + * Lightweight parser — scans for Content-Disposition headers + * to find simple string field values. + */ +function extractFormField(raw: string, fieldName: string): string | undefined { + const pattern = new RegExp( + `Content-Disposition:\\s*form-data;[^\\r\\n]*name="${fieldName}"[^\\r\\n]*\\r\\n\\r\\n([^\\r\\n]*)`, + "i", + ); + const match = raw.match(pattern); + return match?.[1]; +} + +export async function handleTranscription( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + fixtures: Fixture[], + journal: Journal, + defaults: HandlerDefaults, + setCorsHeaders: (res: http.ServerResponse) => void, +): Promise { + setCorsHeaders(res); + const path = req.url ?? "/v1/audio/transcriptions"; + const method = req.method ?? "POST"; + + const model = extractFormField(raw, "model") ?? "whisper-1"; + const responseFormat = extractFormField(raw, "response_format") ?? "json"; + + const syntheticReq: ChatCompletionRequest = { + model, + messages: [], + _endpointType: "transcription", + }; + + const testId = getTestId(req); + const fixture = matchFixture( + fixtures, + syntheticReq, + journal.getFixtureMatchCountsForTest(testId), + defaults.requestTransform, + ); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures, testId); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { method, path, headers: flattenHeaders(req.headers), body: syntheticReq }, + defaults.registry, + defaults.logger, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + syntheticReq, + "openai", + req.url ?? "/v1/audio/transcriptions", + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { + message: strictMessage, + type: "invalid_request_error", + code: "no_fixture_match", + }, + }), + ); + return; + } + + const response = fixture.response; + + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + if (!isTranscriptionResponse(response)) { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { + message: "Fixture response is not a transcription type", + type: "server_error", + }, + }), + ); + return; + } + + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: 200, fixture }, + }); + + const t = response.transcription; + const useVerbose = responseFormat === "verbose_json" || t.words != null || t.segments != null; + + if (useVerbose) { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + task: "transcribe", + language: t.language ?? "english", + duration: t.duration ?? 0, + text: t.text, + words: t.words ?? [], + segments: t.segments ?? [], + }), + ); + } else { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ text: t.text })); + } +} diff --git a/src/types.ts b/src/types.ts index ea64d8a..4d8a3f4 100644 --- a/src/types.ts +++ b/src/types.ts @@ -50,6 +50,8 @@ export interface ChatCompletionRequest { response_format?: { type: string; [key: string]: unknown }; /** Embedding input text, set by the embeddings handler for fixture matching. */ embeddingInput?: string; + /** Endpoint type, set by handlers for fixture endpoint filtering. */ + _endpointType?: string; [key: string]: unknown; } @@ -70,6 +72,7 @@ export interface FixtureMatch { predicate?: (req: ChatCompletionRequest) => boolean; /** Which occurrence of this match to respond to (0-indexed). Undefined means match any. */ sequenceIndex?: number; + endpoint?: "chat" | "image" | "speech" | "transcription" | "video" | "embedding"; } // Fixture response types @@ -111,12 +114,50 @@ export interface EmbeddingResponse { embedding: number[]; } +export interface ImageItem { + url?: string; + b64Json?: string; + revisedPrompt?: string; +} + +export interface ImageResponse { + image?: ImageItem; + images?: ImageItem[]; +} + +export interface AudioResponse { + audio: string; + format?: string; +} + +export interface TranscriptionResponse { + transcription: { + text: string; + language?: string; + duration?: number; + words?: Array<{ word: string; start: number; end: number }>; + segments?: Array<{ id: number; text: string; start: number; end: number }>; + }; +} + +export interface VideoResponse { + video: { + id: string; + status: "processing" | "completed" | "failed"; + url?: string; + }; +} + export type FixtureResponse = | TextResponse | ToolCallResponse | ContentWithToolCallsResponse | ErrorResponse - | EmbeddingResponse; + | EmbeddingResponse + | ImageResponse + | AudioResponse + | TranscriptionResponse + | VideoResponse; // Streaming physics @@ -165,6 +206,7 @@ export interface FixtureFileEntry { model?: string; responseFormat?: string; sequenceIndex?: number; + endpoint?: "chat" | "image" | "speech" | "transcription" | "video" | "embedding"; // predicate not supported in JSON files }; response: FixtureResponse; diff --git a/src/video.ts b/src/video.ts new file mode 100644 index 0000000..dfc4670 --- /dev/null +++ b/src/video.ts @@ -0,0 +1,238 @@ +import type * as http from "node:http"; +import type { ChatCompletionRequest, Fixture, HandlerDefaults, VideoResponse } from "./types.js"; +import { isVideoResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js"; +import { matchFixture } from "./router.js"; +import { writeErrorResponse } from "./sse-writer.js"; +import type { Journal } from "./journal.js"; +import { applyChaos } from "./chaos.js"; +import { proxyAndRecord } from "./recorder.js"; + +interface VideoRequest { + model?: string; + prompt: string; + [key: string]: unknown; +} + +/** Stored video state for GET status checks. Key: `${testId}:${videoId}` */ +export type VideoStateMap = Map; + +export async function handleVideoCreate( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + fixtures: Fixture[], + journal: Journal, + defaults: HandlerDefaults, + setCorsHeaders: (res: http.ServerResponse) => void, + videoStates: VideoStateMap, +): Promise { + setCorsHeaders(res); + const path = req.url ?? "/v1/videos"; + const method = req.method ?? "POST"; + + let videoReq: VideoRequest; + try { + videoReq = JSON.parse(raw) as VideoRequest; + } catch { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: null, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { message: "Malformed JSON", type: "invalid_request_error", code: "invalid_json" }, + }), + ); + return; + } + + const syntheticReq: ChatCompletionRequest = { + model: videoReq.model ?? "sora-2", + messages: [{ role: "user", content: videoReq.prompt }], + _endpointType: "video", + }; + + const testId = getTestId(req); + const fixture = matchFixture( + fixtures, + syntheticReq, + journal.getFixtureMatchCountsForTest(testId), + defaults.requestTransform, + ); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures, testId); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { method, path, headers: flattenHeaders(req.headers), body: syntheticReq }, + defaults.registry, + defaults.logger, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + syntheticReq, + "openai", + req.url ?? "/v1/videos", + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { message: strictMessage, type: "invalid_request_error", code: "no_fixture_match" }, + }), + ); + return; + } + + const response = fixture.response; + + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + if (!isVideoResponse(response)) { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { message: "Fixture response is not a video type", type: "server_error" }, + }), + ); + return; + } + + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: 200, fixture }, + }); + + const video = response.video; + const created_at = Math.floor(Date.now() / 1000); + + // Store for GET status checks + const stateKey = `${testId}:${video.id}`; + videoStates.set(stateKey, video); + + if (video.status === "completed") { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ id: video.id, status: video.status, url: video.url, created_at })); + } else { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ id: video.id, status: video.status, created_at })); + } +} + +export function handleVideoStatus( + req: http.IncomingMessage, + res: http.ServerResponse, + videoId: string, + journal: Journal, + setCorsHeaders: (res: http.ServerResponse) => void, + videoStates: VideoStateMap, +): void { + setCorsHeaders(res); + const path = req.url ?? `/v1/videos/${videoId}`; + const method = req.method ?? "GET"; + + const testId = getTestId(req); + const stateKey = `${testId}:${videoId}`; + const video = videoStates.get(stateKey); + + if (!video) { + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: null, + response: { status: 404, fixture: null }, + }); + writeErrorResponse( + res, + 404, + JSON.stringify({ error: { message: `Video ${videoId} not found`, type: "not_found" } }), + ); + return; + } + + journal.add({ + method, + path, + headers: flattenHeaders(req.headers), + body: null, + response: { status: 200, fixture: null }, + }); + + const created_at = Math.floor(Date.now() / 1000); + const body: Record = { + id: video.id, + status: video.status, + created_at, + }; + if (video.url) body.url = video.url; + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); +}