diff --git a/CHANGELOG.md b/CHANGELOG.md
index 65aa5b7..7397b00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
# @copilotkit/aimock
+## 1.12.0
+
+### Minor Changes
+
+- Multimedia endpoint support: image generation (OpenAI DALL-E + Gemini Imagen), text-to-speech, audio transcription, and video generation with async polling (#101)
+- `match.endpoint` field for fixture isolation — prevents cross-matching between chat, image, speech, transcription, video, and embedding fixtures (#101)
+- Bidirectional endpoint filtering — generic fixtures only match compatible endpoint types (#101)
+- Convenience methods: `onImage`, `onSpeech`, `onTranscription`, `onVideo` (#101)
+- Record & replay for all multimedia endpoints — proxy to real APIs, save fixtures with correct format/type detection (#101)
+- `_endpointType` explicit field on `ChatCompletionRequest` for type safety (#101)
+- Comparison matrix and drift detection rules updated for multimedia (#101)
+- 54 new tests (32 integration, 11 record/replay, 12 type/routing)
+
## 1.11.0
### Minor Changes
diff --git a/README.md b/README.md
index 3a759b5..0e6ed07 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
https://github.com/user-attachments/assets/646bf106-0320-41f2-a9b1-5090454830f3
-Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, AG-UI event streams, vector databases, search, rerank, and moderation. One package, one port, zero dependencies.
+Mock infrastructure for AI application testing — LLM APIs, image generation, text-to-speech, transcription, video generation, MCP tools, A2A agents, AG-UI event streams, vector databases, search, rerank, and moderation. One package, one port, zero dependencies.
## Quick Start
@@ -43,6 +43,7 @@ Run them all on one port with `npx aimock --config aimock.json`, or use the prog
- **[Record & Replay](https://aimock.copilotkit.dev/record-replay)** — Proxy real APIs, save as fixtures, replay deterministically forever
- **[11 LLM Providers](https://aimock.copilotkit.dev/docs)** — OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere — full streaming support
+- **[Multimedia APIs](https://aimock.copilotkit.dev/images)** — Image generation (DALL-E, Imagen), text-to-speech, audio transcription, video generation
- **[MCP / A2A / AG-UI / Vector](https://aimock.copilotkit.dev/mcp-mock)** — Mock every protocol your AI agents use
- **[Chaos Testing](https://aimock.copilotkit.dev/chaos-testing)** — 500 errors, malformed JSON, mid-stream disconnects at any probability
- **[Drift Detection](https://aimock.copilotkit.dev/drift-detection)** — Daily CI validation against real APIs
diff --git a/charts/aimock/Chart.yaml b/charts/aimock/Chart.yaml
index 9fa1f59..1d2e733 100644
--- a/charts/aimock/Chart.yaml
+++ b/charts/aimock/Chart.yaml
@@ -3,4 +3,4 @@ name: aimock
description: Mock infrastructure for AI application testing (OpenAI, Anthropic, Gemini, MCP, A2A, vector)
type: application
version: 0.1.0
-appVersion: "1.11.0"
+appVersion: "1.12.0"
diff --git a/docs/fixtures/index.html b/docs/fixtures/index.html
index 208611c..b023802 100644
--- a/docs/fixtures/index.html
+++ b/docs/fixtures/index.html
@@ -162,6 +162,26 @@
Response Types
embedding[]
Vector of numbers
+
+ Image
+ image.url or images[].url
+ Generated image URL(s) or base64 data
+
+
+ Speech
+ audio
+ Base64-encoded audio data
+
+
+ Transcription
+ transcription.text, words?, segments?
+ Transcribed text with optional timestamps
+
+
+ Video
+ video.url, video.duration?
+ Generated video URL with async polling
+
@@ -239,6 +259,10 @@ Programmatically
mock .onMessage ("hello" , { content : "Hi!" });
mock .onToolCall ("get_weather" , { content : "72F" });
mock .onEmbedding ("my text" , { embedding : [0.1 , 0.2 ] });
+mock .onImage ("sunset" , { image : { url : "https://example.com/sunset.png" } });
+mock .onSpeech ("hello" , { audio : "SGVsbG8=" });
+mock .onTranscription ("audio.mp3" , { transcription : { text : "Hello" } });
+mock .onVideo ("cats" , { video : { url : "https://example.com/cats.mp4" } });
mock .onJsonOutput ("data" , { key : "value" });
mock .onToolResult ("call_123" , { content : "Done" });
diff --git a/docs/images/index.html b/docs/images/index.html
new file mode 100644
index 0000000..8aaf28e
--- /dev/null
+++ b/docs/images/index.html
@@ -0,0 +1,286 @@
+
+
+
+
+
+ Image Generation — aimock
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Image Generation
+
+ The image generation endpoints support both OpenAI
+ POST /v1/images/generations and Gemini Imagen
+ POST /v1beta/models/{model}:predict formats. Return single or multiple images
+ as URLs or base64-encoded data.
+
+
+ Endpoints
+
+
+
+ Method
+ Path
+ Format
+
+
+
+
+ POST
+ /v1/images/generations
+ JSON (OpenAI)
+
+
+ POST
+ /v1beta/models/{model}:predict
+ JSON (Gemini Imagen)
+
+
+
+
+ Unit Test: Single Image URL
+
+ Using the programmatic API with vitest, register a fixture and assert on the response.
+
+
+
+
+
import { LLMock } from "@copilotkit/aimock" ;
+import { describe , it , expect , beforeAll , afterAll } from "vitest" ;
+
+let mock : LLMock ;
+
+beforeAll (async () => {
+ mock = new LLMock ();
+ await mock .start ();
+});
+
+afterAll (async () => {
+ await mock .stop ();
+});
+
+it ("returns a single image URL" , async () => {
+ mock .onImage ("a sunset over mountains" , {
+ image : { url : "https://example.com/sunset.png" },
+ });
+
+ const res = await fetch (`${mock.url}/v1/images/generations` , {
+ method : "POST" ,
+ headers : { "Content-Type" : "application/json" },
+ body : JSON .stringify ({
+ model : "dall-e-3" ,
+ prompt : "a sunset over mountains" ,
+ n : 1 ,
+ size : "1024x1024" ,
+ }),
+ });
+
+ const body = await res .json ();
+ expect (body .data [0 ].url ).toBe ("https://example.com/sunset.png" );
+});
+
+
+ Unit Test: Multiple Images
+
+
+
+
it ("returns multiple images" , async () => {
+ mock .onImage ("cats" , {
+ images : [
+ { url : "https://example.com/cat1.png" },
+ { url : "https://example.com/cat2.png" },
+ ],
+ });
+
+ const res = await fetch (`${mock.url}/v1/images/generations` , {
+ method : "POST" ,
+ headers : { "Content-Type" : "application/json" },
+ body : JSON .stringify ({
+ model : "dall-e-3" ,
+ prompt : "cats playing" ,
+ n : 2 ,
+ }),
+ });
+
+ const body = await res .json ();
+ expect (body .data ).toHaveLength (2 );
+ expect (body .data [0 ].url ).toBe ("https://example.com/cat1.png" );
+ expect (body .data [1 ].url ).toBe ("https://example.com/cat2.png" );
+});
+
+
+ Unit Test: Base64 Response
+
+
+
+
it ("returns base64-encoded image" , async () => {
+ mock .onImage ("logo" , {
+ image : { b64_json : "iVBORw0KGgoAAAANSUhEUg..." },
+ });
+
+ const res = await fetch (`${mock.url}/v1/images/generations` , {
+ method : "POST" ,
+ headers : { "Content-Type" : "application/json" },
+ body : JSON .stringify ({
+ model : "dall-e-3" ,
+ prompt : "a company logo" ,
+ response_format : "b64_json" ,
+ }),
+ });
+
+ const body = await res .json ();
+ expect (body .data [0 ].b64_json ).toBeDefined ();
+});
+
+
+ Unit Test: Gemini Imagen Format
+
+
+
+
it ("handles Gemini Imagen predict endpoint" , async () => {
+ mock .onImage ("landscape" , {
+ image : { url : "https://example.com/landscape.png" },
+ });
+
+ const res = await fetch (
+ `${mock.url}/v1beta/models/imagen-3.0-generate-002:predict` ,
+ {
+ method : "POST" ,
+ headers : { "Content-Type" : "application/json" },
+ body : JSON .stringify ({
+ instances : [{ prompt : "a beautiful landscape" }],
+ parameters : { sampleCount : 1 },
+ }),
+ }
+ );
+
+ const body = await res .json ();
+ expect (body .predictions ).toBeDefined ();
+});
+
+
+ JSON Fixture
+
+
+
+
{
+ "fixtures" : [
+ {
+ "match" : { "userMessage" : "sunset" },
+ "response" : {
+ "image" : { "url" : "https://example.com/sunset.png" }
+ }
+ },
+ {
+ "match" : { "userMessage" : "cats" },
+ "response" : {
+ "images" : [
+ { "url" : "https://example.com/cat1.png" },
+ { "url" : "https://example.com/cat2.png" }
+ ]
+ }
+ }
+ ]
+}
+
+
+ Response Format
+ Matches the OpenAI /v1/images/generations response format:
+
+ created — Unix timestamp
+
+ data[].url — URL of the generated image (when using URL format)
+
+
+ data[].b64_json — base64-encoded image data (when using b64_json
+ format)
+
+
+ data[].revised_prompt — the prompt as revised by the model (optional)
+
+
+
+
+
+ Image fixtures use match.userMessage which maps to the
+ prompt field in the request body. The prompt matcher checks
+ for substring matches.
+
+
+
+ Record & Replay
+
+ When no fixture matches an incoming request, aimock can proxy it to the real API and
+ record the response as a fixture for future replays. Enable recording with the
+ --record flag or via RecordConfig in the programmatic API.
+ Recorded image fixtures capture the url or b64_json from the
+ provider response and save them to disk, so subsequent runs replay instantly without
+ hitting the real API.
+
+
+
+
+
npx aimock --record --provider-openai https://api.openai.com
+
+
+
+
+
+
+
+
+
diff --git a/docs/index.html b/docs/index.html
index 75d3c5f..406061d 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -1405,8 +1405,16 @@ Chaos Testing
-
+
🎨
+
Multimedia APIs
+
+ Image generation, text-to-speech, audio transcription, and video generation —
+ mock every multimedia endpoint with fixtures.
+
+
+
+
📊
Drift Detection
Fixtures stay accurate as providers evolve. Fixes ship before your tests break.
@@ -1539,6 +1547,38 @@
How aimock compares
✗
✓
+
+ Image generation
+ Built-in ✓
+ ✗
+ ✗
+ ✗
+ ✗
+
+
+ Text-to-Speech
+ Built-in ✓
+ ✗
+ ✗
+ ✗
+ ✗
+
+
+ Audio transcription
+ Built-in ✓
+ ✗
+ ✗
+ ✗
+ ✗
+
+
+ Video generation
+ Built-in ✓
+ ✗
+ ✗
+ ✗
+ ✗
+
Structured output / JSON mode
Built-in ✓
diff --git a/docs/sidebar.js b/docs/sidebar.js
index 5025839..3159755 100644
--- a/docs/sidebar.js
+++ b/docs/sidebar.js
@@ -26,6 +26,15 @@
{ label: "Compatible Providers", href: "/compatible-providers" },
],
},
+ {
+ title: "Multimedia",
+ links: [
+ { label: "Image Generation", href: "/images" },
+ { label: "Text-to-Speech", href: "/speech" },
+ { label: "Audio Transcription", href: "/transcription" },
+ { label: "Video Generation", href: "/video" },
+ ],
+ },
{
title: "LLM Features",
links: [
diff --git a/docs/speech/index.html b/docs/speech/index.html
new file mode 100644
index 0000000..10d389c
--- /dev/null
+++ b/docs/speech/index.html
@@ -0,0 +1,225 @@
+
+
+
+
+
+ Text-to-Speech — aimock
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Text-to-Speech
+
+ The POST /v1/audio/speech endpoint returns audio data from text input.
+ Supports multiple output formats including mp3, opus, aac, flac, wav, and pcm.
+
+
+ Endpoint
+
+
+
+ Method
+ Path
+ Format
+
+
+
+
+ POST
+ /v1/audio/speech
+ JSON request, binary/base64 response
+
+
+
+
+ Unit Test: Basic Speech
+
+ Using the programmatic API with vitest, register a fixture and assert on the response.
+
+
+
+
+
import { LLMock } from "@copilotkit/aimock" ;
+import { describe , it , expect , beforeAll , afterAll } from "vitest" ;
+
+let mock : LLMock ;
+
+beforeAll (async () => {
+ mock = new LLMock ();
+ await mock .start ();
+});
+
+afterAll (async () => {
+ await mock .stop ();
+});
+
+it ("returns audio for text input" , async () => {
+ mock .onSpeech ("Hello world" , { audio : "SGVsbG8gd29ybGQ=" });
+
+ const res = await fetch (`${mock.url}/v1/audio/speech` , {
+ method : "POST" ,
+ headers : { "Content-Type" : "application/json" },
+ body : JSON .stringify ({
+ model : "tts-1" ,
+ input : "Hello world" ,
+ voice : "alloy" ,
+ }),
+ });
+
+ expect (res .ok ).toBe (true );
+ const body = await res .json ();
+ expect (body .audio ).toBe ("SGVsbG8gd29ybGQ=" );
+});
+
+
+ Format Options
+
+ The response_format field in the request controls the audio output format.
+ Supported values:
+
+
+
+
+ Format
+ Content-Type
+ Description
+
+
+
+
+ mp3
+ audio/mpeg
+ Default format, widely supported
+
+
+ opus
+ audio/opus
+ Low latency, good for streaming
+
+
+ aac
+ audio/aac
+ Preferred for mobile devices
+
+
+ flac
+ audio/flac
+ Lossless compression
+
+
+ wav
+ audio/wav
+ Uncompressed, no decoding overhead
+
+
+ pcm
+ audio/pcm
+ Raw samples, 24kHz 16-bit signed little-endian
+
+
+
+
+ JSON Fixture
+
+
+
+
{
+ "fixtures" : [
+ {
+ "match" : { "userMessage" : "Hello world" },
+ "response" : {
+ "audio" : "SGVsbG8gd29ybGQ="
+ }
+ }
+ ]
+}
+
+
+ Response Format
+ Returns audio data matching the requested format:
+
+ audio — base64-encoded audio data in the fixture response
+
+
+
+
+ Speech fixtures use match.userMessage which maps to the
+ input field in the request body. The matcher checks for substring matches
+ on the text to be spoken.
+
+
+
+ Record & Replay
+
+ When no fixture matches an incoming request, aimock can proxy it to the real API and
+ record the response as a fixture for future replays. Enable recording with the
+ --record flag or via RecordConfig in the programmatic API.
+ Binary audio from the provider is base64-encoded in the recorded fixture, with the format
+ derived from the response Content-Type header (e.g.
+ audio/mpeg for mp3). Subsequent requests replay the cached audio without
+ hitting the real API.
+
+
+
+
+
npx aimock --record --provider-openai https://api.openai.com
+
+
+
+
+
+
+
+
+
diff --git a/docs/transcription/index.html b/docs/transcription/index.html
new file mode 100644
index 0000000..0eb0653
--- /dev/null
+++ b/docs/transcription/index.html
@@ -0,0 +1,242 @@
+
+
+
+
+
+ Audio Transcription — aimock
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Audio Transcription
+
+ The POST /v1/audio/transcriptions endpoint accepts multipart form-data audio
+ uploads and returns transcribed text. Supports both simple and verbose response formats
+ with word-level timestamps and segments.
+
+
+ Endpoint
+
+
+
+ Method
+ Path
+ Format
+
+
+
+
+ POST
+ /v1/audio/transcriptions
+ Multipart form-data request, JSON response
+
+
+
+
+ Unit Test: Simple Transcription
+
+ Using the programmatic API with vitest, register a fixture and assert on the response.
+
+
+
+
+
import { LLMock } from "@copilotkit/aimock" ;
+import { describe , it , expect , beforeAll , afterAll } from "vitest" ;
+
+let mock : LLMock ;
+
+beforeAll (async () => {
+ mock = new LLMock ();
+ await mock .start ();
+});
+
+afterAll (async () => {
+ await mock .stop ();
+});
+
+it ("returns simple transcription text" , async () => {
+ mock .onTranscription ("audio.mp3" , {
+ transcription : { text : "Hello, how are you today?" },
+ });
+
+ const form = new FormData ();
+ form .append ("file" , new Blob (["fake-audio" ]), "audio.mp3" );
+ form .append ("model" , "whisper-1" );
+
+ const res = await fetch (`${mock.url}/v1/audio/transcriptions` , {
+ method : "POST" ,
+ body : form ,
+ });
+
+ const body = await res .json ();
+ expect (body .text ).toBe ("Hello, how are you today?" );
+});
+
+
+ Unit Test: Verbose Response with Segments
+
+
+
+
it ("returns verbose transcription with words and segments" , async () => {
+ mock .onTranscription ("meeting.wav" , {
+ transcription : {
+ text : "Welcome to the meeting." ,
+ words : [
+ { word : "Welcome" , start : 0.0 , end : 0.5 },
+ { word : "to" , start : 0.5 , end : 0.7 },
+ { word : "the" , start : 0.7 , end : 0.9 },
+ { word : "meeting" , start : 0.9 , end : 1.4 },
+ ],
+ segments : [
+ { id : 0 , text : "Welcome to the meeting." , start : 0.0 , end : 1.4 },
+ ],
+ },
+ });
+
+ const form = new FormData ();
+ form .append ("file" , new Blob (["fake-audio" ]), "meeting.wav" );
+ form .append ("model" , "whisper-1" );
+ form .append ("response_format" , "verbose_json" );
+ form .append ("timestamp_granularities[]" , "word" );
+ form .append ("timestamp_granularities[]" , "segment" );
+
+ const res = await fetch (`${mock.url}/v1/audio/transcriptions` , {
+ method : "POST" ,
+ body : form ,
+ });
+
+ const body = await res .json ();
+ expect (body .text ).toBe ("Welcome to the meeting." );
+ expect (body .words ).toHaveLength (4 );
+ expect (body .segments ).toHaveLength (1 );
+});
+
+
+ JSON Fixture
+
+
+
+
{
+ "fixtures" : [
+ {
+ "match" : { "userMessage" : "audio.mp3" },
+ "response" : {
+ "transcription" : {
+ "text" : "Hello, how are you today?"
+ }
+ }
+ }
+ ]
+}
+
+
+ Response Format
+ Matches the OpenAI /v1/audio/transcriptions response format:
+
+ Simple (default)
+
+ text — the transcribed text
+
+
+ Verbose (response_format: "verbose_json")
+
+ text — the full transcribed text
+ task — "transcribe"
+ language — detected language code
+ duration — audio duration in seconds
+
+ words[] — word-level timestamps with word,
+ start, end
+
+
+ segments[] — segment-level data with id,
+ text, start, end
+
+
+
+
+
+ Transcription requests use multipart form-data. The fixture
+ match.userMessage maps to the uploaded filename. This allows matching
+ different fixtures based on which audio file is submitted.
+
+
+
+ Record & Replay
+
+ When no fixture matches an incoming request, aimock can proxy it to the real API and
+ record the response as a fixture for future replays. Enable recording with the
+ --record flag or via RecordConfig in the programmatic API.
+ Recorded transcription fixtures preserve the full response including text,
+ language, duration, words, and
+ segments, so verbose-mode responses replay with complete word-level
+ timestamps intact.
+
+
+
+
+
npx aimock --record --provider-openai https://api.openai.com
+
+
+
+
+
+
+
+
+
diff --git a/docs/video/index.html b/docs/video/index.html
new file mode 100644
index 0000000..c6f9fb6
--- /dev/null
+++ b/docs/video/index.html
@@ -0,0 +1,221 @@
+
+
+
+
+
+ Video Generation — aimock
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Video Generation
+
+ The video generation endpoints support async creation via
+ POST /v1/videos and status polling via GET /v1/videos/{id}. Mock
+ the full async polling lifecycle with deterministic responses.
+
+
+ Endpoints
+
+
+
+ Method
+ Path
+ Format
+
+
+
+
+ POST
+ /v1/videos
+ JSON (create video job)
+
+
+ GET
+ /v1/videos/{id}
+ JSON (poll status)
+
+
+
+
+ Async Polling Pattern
+
+ Video generation is asynchronous. The POST endpoint returns a job ID, and the
+ GET endpoint returns the current status. aimock simulates this by returning
+ "processing" on the first poll and "completed" with the video
+ URL on subsequent polls.
+
+
+ Unit Test: Create and Poll
+
+ Using the programmatic API with vitest, register a fixture and test the full async flow.
+
+
+
+
+
import { LLMock } from "@copilotkit/aimock" ;
+import { describe , it , expect , beforeAll , afterAll } from "vitest" ;
+
+let mock : LLMock ;
+
+beforeAll (async () => {
+ mock = new LLMock ();
+ await mock .start ();
+});
+
+afterAll (async () => {
+ await mock .stop ();
+});
+
+it ("creates a video job and polls for completion" , async () => {
+ mock .onVideo ("a cat playing piano" , {
+ video : { url : "https://example.com/cat-piano.mp4" , duration : 10 },
+ });
+
+ // Step 1: Create the video job
+ const createRes = await fetch (`${mock.url}/v1/videos` , {
+ method : "POST" ,
+ headers : { "Content-Type" : "application/json" },
+ body : JSON .stringify ({
+ model : "sora" ,
+ prompt : "a cat playing piano" ,
+ duration : 10 ,
+ }),
+ });
+
+ const createBody = await createRes .json ();
+ expect (createBody .id ).toBeDefined ();
+ expect (createBody .status ).toBe ("processing" );
+
+ // Step 2: Poll for completion
+ const pollRes = await fetch (`${mock.url}/v1/videos/${createBody.id}` );
+ const pollBody = await pollRes .json ();
+
+ expect (pollBody .status ).toBe ("completed" );
+ expect (pollBody .video .url ).toBe ("https://example.com/cat-piano.mp4" );
+ expect (pollBody .video .duration ).toBe (10 );
+});
+
+
+ JSON Fixture
+
+
+
+
{
+ "fixtures" : [
+ {
+ "match" : { "userMessage" : "cat playing piano" },
+ "response" : {
+ "video" : {
+ "url" : "https://example.com/cat-piano.mp4" ,
+ "duration" : 10
+ }
+ }
+ }
+ ]
+}
+
+
+ Response Format
+
+ Create (POST /v1/videos)
+
+ id — unique job identifier
+ status — "processing" initially
+ created — Unix timestamp
+
+
+ Poll (GET /v1/videos/{id})
+
+ id — the job identifier
+
+ status — "processing" or
+ "completed"
+
+ video.url — URL of the generated video (when completed)
+ video.duration — video duration in seconds
+
+
+
+
+ Video fixtures use match.userMessage which maps to the
+ prompt field in the creation request. The async polling pattern is handled
+ automatically by aimock.
+
+
+
+ Record & Replay
+
+ When no fixture matches an incoming request, aimock can proxy it to the real API and
+ record the response as a fixture for future replays. Enable recording with the
+ --record flag or via RecordConfig in the programmatic API.
+ Completed videos are recorded with their final URL; in-progress responses are also saved
+ so that the async polling lifecycle can be simulated on replay without hitting the real
+ API.
+
+
+
+
+
npx aimock --record --provider-openai https://api.openai.com
+
+
+
+
+
+
+
+
+
diff --git a/package.json b/package.json
index 76331bb..6ff5c19 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@copilotkit/aimock",
- "version": "1.11.0",
+ "version": "1.12.0",
"description": "Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, AG-UI event streams, vector databases, search, and more. Zero dependencies.",
"license": "MIT",
"repository": {
diff --git a/scripts/update-competitive-matrix.ts b/scripts/update-competitive-matrix.ts
index 2c20fb4..43e852e 100644
--- a/scripts/update-competitive-matrix.ts
+++ b/scripts/update-competitive-matrix.ts
@@ -72,6 +72,22 @@ const FEATURE_RULES: FeatureRule[] = [
rowLabel: "Embeddings API",
keywords: ["embedding", "/v1/embeddings", "embed"],
},
+ {
+ rowLabel: "Image generation",
+ keywords: ["image", "dall-e", "dalle", "/v1/images", "image generation", "imagen"],
+ },
+ {
+ rowLabel: "Text-to-Speech",
+ keywords: ["tts", "text-to-speech", "speech", "/v1/audio/speech", "audio generation"],
+ },
+ {
+ rowLabel: "Audio transcription",
+ keywords: ["transcription", "whisper", "/v1/audio/transcriptions", "speech-to-text", "stt"],
+ },
+ {
+ rowLabel: "Video generation",
+ keywords: ["video", "sora", "/v1/videos", "video generation"],
+ },
{
rowLabel: "Structured output / JSON mode",
keywords: ["json_object", "json_schema", "structured output", "response_format"],
diff --git a/src/__tests__/multimedia-record.test.ts b/src/__tests__/multimedia-record.test.ts
new file mode 100644
index 0000000..9f28970
--- /dev/null
+++ b/src/__tests__/multimedia-record.test.ts
@@ -0,0 +1,508 @@
+import { describe, it, expect } from "vitest";
+
+/**
+ * Unit tests for multimedia record/replay support in the recorder module.
+ *
+ * These test the internal detection logic by calling buildFixtureResponse
+ * and buildFixtureMatch indirectly through proxyAndRecord integration,
+ * as well as directly importing where possible.
+ *
+ * Since buildFixtureResponse and buildFixtureMatch are not exported,
+ * we test them via a lightweight upstream mock that returns the expected
+ * shapes, verifying the recorder produces correct fixture responses.
+ */
+
+import * as http from "node:http";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { proxyAndRecord } from "../recorder.js";
+import type { Fixture, RecordConfig, ChatCompletionRequest } from "../types.js";
+import { Logger } from "../logger.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function createUpstream(
+ handler: (req: http.IncomingMessage, res: http.ServerResponse) => void,
+): Promise<{ server: http.Server; url: string }> {
+ return new Promise((resolve) => {
+ const server = http.createServer(handler);
+ server.listen(0, "127.0.0.1", () => {
+ const addr = server.address() as { port: number };
+ resolve({ server, url: `http://127.0.0.1:${addr.port}` });
+ });
+ });
+}
+
+function closeServer(server: http.Server): Promise {
+ return new Promise((resolve) => server.close(() => resolve()));
+}
+
+function createMockReqRes(
+ urlPath: string,
+ headers: Record = {},
+): { req: http.IncomingMessage; res: http.ServerResponse; getResponse: () => Promise } {
+ const chunks: Buffer[] = [];
+ let statusCode = 200;
+
+ const req = {
+ method: "POST",
+ url: urlPath,
+ headers: { "content-type": "application/json", ...headers },
+ } as unknown as http.IncomingMessage;
+
+ const res = {
+ statusCode,
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
+ writeHead(status: number, hdrs?: Record) {
+ statusCode = status;
+ res.statusCode = status;
+ },
+ end(data?: string | Buffer) {
+ if (data) chunks.push(Buffer.isBuffer(data) ? data : Buffer.from(data));
+ },
+ setHeader() {},
+ } as unknown as http.ServerResponse;
+
+ return {
+ req,
+ res,
+ getResponse: async () => Buffer.concat(chunks).toString(),
+ };
+}
+
+function makeTmpDir(): string {
+ return fs.mkdtempSync(path.join(os.tmpdir(), "aimock-mm-record-"));
+}
+
+// ---------------------------------------------------------------------------
+// Tests: buildFixtureResponse detection via proxyAndRecord
+// ---------------------------------------------------------------------------
+
+describe("multimedia record: image response detection", () => {
+ it("detects OpenAI image generation response and saves image fixture", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(
+ JSON.stringify({
+ created: 1234567890,
+ data: [{ url: "https://example.com/img.png", revised_prompt: "a pretty sunset" }],
+ }),
+ );
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "dall-e-3",
+ messages: [{ role: "user", content: "sunset" }],
+ _endpointType: "image",
+ };
+
+ const { req, res } = createMockReqRes("/v1/images/generations");
+ const proxied = await proxyAndRecord(
+ req,
+ res,
+ request,
+ "openai",
+ "/v1/images/generations",
+ fixtures,
+ { record, logger },
+ );
+
+ expect(proxied).toBe(true);
+ expect(fixtures).toHaveLength(1);
+ const fixture = fixtures[0];
+ expect(fixture.match.endpoint).toBe("image");
+ expect(fixture.match.userMessage).toBe("sunset");
+
+ const response = fixture.response as { image?: { url?: string; revisedPrompt?: string } };
+ expect(response.image).toBeDefined();
+ expect(response.image!.url).toBe("https://example.com/img.png");
+ expect(response.image!.revisedPrompt).toBe("a pretty sunset");
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+
+ it("detects multi-image response", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(
+ JSON.stringify({
+ created: 1234567890,
+ data: [{ url: "https://example.com/1.png" }, { url: "https://example.com/2.png" }],
+ }),
+ );
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "dall-e-3",
+ messages: [{ role: "user", content: "cats" }],
+ _endpointType: "image",
+ };
+
+ const { req, res } = createMockReqRes("/v1/images/generations");
+ await proxyAndRecord(req, res, request, "openai", "/v1/images/generations", fixtures, {
+ record,
+ logger,
+ });
+
+ const response = fixtures[0].response as { images?: Array<{ url?: string }> };
+ expect(response.images).toHaveLength(2);
+ expect(response.images![0].url).toBe("https://example.com/1.png");
+ expect(response.images![1].url).toBe("https://example.com/2.png");
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+
+ it("detects Gemini Imagen response", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(
+ JSON.stringify({
+ predictions: [{ bytesBase64Encoded: "iVBORw0KGgo=", mimeType: "image/png" }],
+ }),
+ );
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "imagen",
+ messages: [{ role: "user", content: "dog" }],
+ _endpointType: "image",
+ };
+
+ const { req, res } = createMockReqRes("/v1beta/models/imagen:predict");
+ await proxyAndRecord(req, res, request, "openai", "/v1beta/models/imagen:predict", fixtures, {
+ record,
+ logger,
+ });
+
+ const response = fixtures[0].response as { image?: { b64Json?: string } };
+ expect(response.image).toBeDefined();
+ expect(response.image!.b64Json).toBe("iVBORw0KGgo=");
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+});
+
+describe("multimedia record: transcription response detection", () => {
+ it("detects OpenAI transcription response", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(
+ JSON.stringify({
+ task: "transcribe",
+ language: "english",
+ duration: 5.2,
+ text: "Hello world",
+ }),
+ );
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "whisper-1",
+ messages: [],
+ _endpointType: "transcription",
+ };
+
+ const { req, res } = createMockReqRes("/v1/audio/transcriptions");
+ await proxyAndRecord(req, res, request, "openai", "/v1/audio/transcriptions", fixtures, {
+ record,
+ logger,
+ });
+
+ expect(fixtures).toHaveLength(1);
+ const response = fixtures[0].response as {
+ transcription?: { text: string; language?: string; duration?: number };
+ };
+ expect(response.transcription).toBeDefined();
+ expect(response.transcription!.text).toBe("Hello world");
+ expect(response.transcription!.language).toBe("english");
+ expect(response.transcription!.duration).toBe(5.2);
+ expect(fixtures[0].match.endpoint).toBe("transcription");
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+
+ it("detects transcription with words and segments", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(
+ JSON.stringify({
+ task: "transcribe",
+ language: "english",
+ duration: 2.0,
+ text: "Hi",
+ words: [{ word: "Hi", start: 0, end: 0.5 }],
+ segments: [{ id: 0, text: "Hi", start: 0, end: 2.0 }],
+ }),
+ );
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "whisper-1",
+ messages: [],
+ _endpointType: "transcription",
+ };
+
+ const { req, res } = createMockReqRes("/v1/audio/transcriptions");
+ await proxyAndRecord(req, res, request, "openai", "/v1/audio/transcriptions", fixtures, {
+ record,
+ logger,
+ });
+
+ const response = fixtures[0].response as {
+ transcription?: { text: string; words?: unknown[]; segments?: unknown[] };
+ };
+ expect(response.transcription!.words).toHaveLength(1);
+ expect(response.transcription!.segments).toHaveLength(1);
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+});
+
+describe("multimedia record: video response detection", () => {
+ it("detects completed video response", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(
+ JSON.stringify({
+ id: "vid_abc",
+ status: "completed",
+ url: "https://example.com/video.mp4",
+ }),
+ );
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "sora-2",
+ messages: [{ role: "user", content: "dancing cat" }],
+ _endpointType: "video",
+ };
+
+ const { req, res } = createMockReqRes("/v1/videos");
+ await proxyAndRecord(req, res, request, "openai", "/v1/videos", fixtures, { record, logger });
+
+ expect(fixtures).toHaveLength(1);
+ const response = fixtures[0].response as {
+ video?: { id: string; status: string; url?: string };
+ };
+ expect(response.video).toBeDefined();
+ expect(response.video!.id).toBe("vid_abc");
+ expect(response.video!.status).toBe("completed");
+ expect(response.video!.url).toBe("https://example.com/video.mp4");
+ expect(fixtures[0].match.endpoint).toBe("video");
+ expect(fixtures[0].match.userMessage).toBe("dancing cat");
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+
+ it("detects in-progress video response", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify({ id: "vid_456", status: "in_progress" }));
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "sora-2",
+ messages: [{ role: "user", content: "slow motion" }],
+ _endpointType: "video",
+ };
+
+ const { req, res } = createMockReqRes("/v1/videos");
+ await proxyAndRecord(req, res, request, "openai", "/v1/videos", fixtures, { record, logger });
+
+ const response = fixtures[0].response as {
+ video?: { id: string; status: string };
+ };
+ expect(response.video!.id).toBe("vid_456");
+ expect(response.video!.status).toBe("processing");
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+});
+
+describe("multimedia record: TTS audio response detection", () => {
+ it("detects binary audio response and saves as base64", async () => {
+ const fixturePath = makeTmpDir();
+ const audioBytes = Buffer.from("fake-audio-content");
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "audio/mpeg" });
+ res.end(audioBytes);
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "tts-1",
+ messages: [{ role: "user", content: "hello world" }],
+ _endpointType: "speech",
+ };
+
+ const { req, res } = createMockReqRes("/v1/audio/speech");
+ await proxyAndRecord(req, res, request, "openai", "/v1/audio/speech", fixtures, {
+ record,
+ logger,
+ });
+
+ expect(fixtures).toHaveLength(1);
+ const response = fixtures[0].response as { audio?: string };
+ expect(response.audio).toBe(audioBytes.toString("base64"));
+ expect(fixtures[0].match.endpoint).toBe("speech");
+ expect(fixtures[0].match.userMessage).toBe("hello world");
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+});
+
+describe("multimedia record: buildFixtureMatch endpoint inclusion", () => {
+ it("includes endpoint for image requests", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify({ created: 1, data: [{ url: "x.png" }] }));
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "dall-e-3",
+ messages: [{ role: "user", content: "test" }],
+ _endpointType: "image",
+ };
+
+ const { req, res } = createMockReqRes("/v1/images/generations");
+ await proxyAndRecord(req, res, request, "openai", "/v1/images/generations", fixtures, {
+ record,
+ logger,
+ });
+
+ expect(fixtures[0].match.endpoint).toBe("image");
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+
+ it("does not include endpoint for chat requests", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(
+ JSON.stringify({
+ choices: [{ message: { content: "hi", role: "assistant" }, finish_reason: "stop" }],
+ }),
+ );
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "hello" }],
+ _endpointType: "chat",
+ };
+
+ const { req, res } = createMockReqRes("/v1/chat/completions");
+ await proxyAndRecord(req, res, request, "openai", "/v1/chat/completions", fixtures, {
+ record,
+ logger,
+ });
+
+ expect(fixtures[0].match.endpoint).toBeUndefined();
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+
+ it("does not include endpoint when _endpointType is absent", async () => {
+ const fixturePath = makeTmpDir();
+ const { server, url } = await createUpstream((_req, res) => {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(
+ JSON.stringify({
+ choices: [{ message: { content: "hi", role: "assistant" }, finish_reason: "stop" }],
+ }),
+ );
+ });
+
+ try {
+ const fixtures: Fixture[] = [];
+ const record: RecordConfig = { providers: { openai: url }, fixturePath };
+ const logger = new Logger("silent");
+ const request: ChatCompletionRequest = {
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "hello" }],
+ };
+
+ const { req, res } = createMockReqRes("/v1/chat/completions");
+ await proxyAndRecord(req, res, request, "openai", "/v1/chat/completions", fixtures, {
+ record,
+ logger,
+ });
+
+ expect(fixtures[0].match.endpoint).toBeUndefined();
+ } finally {
+ await closeServer(server);
+ fs.rmSync(fixturePath, { recursive: true, force: true });
+ }
+ });
+});
diff --git a/src/__tests__/multimedia-types.test.ts b/src/__tests__/multimedia-types.test.ts
new file mode 100644
index 0000000..1217ba2
--- /dev/null
+++ b/src/__tests__/multimedia-types.test.ts
@@ -0,0 +1,130 @@
+import { describe, test, expect } from "vitest";
+import {
+ isImageResponse,
+ isAudioResponse,
+ isTranscriptionResponse,
+ isVideoResponse,
+} from "../helpers.js";
+import { matchFixture } from "../router.js";
+import type { Fixture, ChatCompletionRequest, FixtureResponse } from "../types.js";
+
+describe("multimedia type guards", () => {
+ test("isImageResponse detects single image", () => {
+ const r: FixtureResponse = { image: { url: "https://example.com/img.png" } };
+ expect(isImageResponse(r)).toBe(true);
+ });
+
+ test("isImageResponse detects multiple images", () => {
+ const r: FixtureResponse = {
+ images: [{ url: "https://example.com/1.png" }, { url: "https://example.com/2.png" }],
+ };
+ expect(isImageResponse(r)).toBe(true);
+ });
+
+ test("isImageResponse rejects text response", () => {
+ const r: FixtureResponse = { content: "hello" };
+ expect(isImageResponse(r)).toBe(false);
+ });
+
+ test("isAudioResponse detects audio", () => {
+ const r: FixtureResponse = { audio: "AAAA", format: "mp3" };
+ expect(isAudioResponse(r)).toBe(true);
+ });
+
+ test("isAudioResponse rejects text response", () => {
+ const r: FixtureResponse = { content: "hello" };
+ expect(isAudioResponse(r)).toBe(false);
+ });
+
+ test("isTranscriptionResponse detects transcription", () => {
+ const r: FixtureResponse = { transcription: { text: "hello" } };
+ expect(isTranscriptionResponse(r)).toBe(true);
+ });
+
+ test("isTranscriptionResponse rejects text response", () => {
+ const r: FixtureResponse = { content: "hello" };
+ expect(isTranscriptionResponse(r)).toBe(false);
+ });
+
+ test("isVideoResponse detects video", () => {
+ const r: FixtureResponse = {
+ video: { id: "v1", status: "completed", url: "https://example.com/v.mp4" },
+ };
+ expect(isVideoResponse(r)).toBe(true);
+ });
+
+ test("isVideoResponse rejects text response", () => {
+ const r: FixtureResponse = { content: "hello" };
+ expect(isVideoResponse(r)).toBe(false);
+ });
+});
+
+describe("endpoint filtering in matchFixture", () => {
+ test("fixture with endpoint: image only matches image requests", () => {
+ const fixtures: Fixture[] = [
+ {
+ match: { userMessage: "guitar", endpoint: "image" },
+ response: { image: { url: "img.png" } },
+ },
+ ];
+ const chatReq: ChatCompletionRequest = {
+ model: "gpt-4",
+ messages: [{ role: "user", content: "guitar" }],
+ _endpointType: "chat",
+ };
+ expect(matchFixture(fixtures, chatReq)).toBeNull();
+
+ const imageReq: ChatCompletionRequest = {
+ model: "dall-e-3",
+ messages: [{ role: "user", content: "guitar" }],
+ _endpointType: "image",
+ };
+ expect(matchFixture(fixtures, imageReq)).toBe(fixtures[0]);
+ });
+
+ test("fixture without endpoint matches chat/embedding requests but not multimedia", () => {
+ const fixtures: Fixture[] = [
+ {
+ match: { userMessage: "guitar" },
+ response: { content: "Chat about guitars" },
+ },
+ ];
+ // Chat requests match generic fixtures
+ const chatReq: ChatCompletionRequest = {
+ model: "gpt-4",
+ messages: [{ role: "user", content: "guitar" }],
+ _endpointType: "chat",
+ };
+ expect(matchFixture(fixtures, chatReq)).toBe(fixtures[0]);
+
+ // Image requests do NOT match generic chat fixtures (prevents 500s)
+ const imageReq: ChatCompletionRequest = {
+ model: "dall-e-3",
+ messages: [{ role: "user", content: "guitar" }],
+ _endpointType: "image",
+ };
+ expect(matchFixture(fixtures, imageReq)).toBeNull();
+ });
+
+ test("endpoint filtering works with sequenceIndex", () => {
+ const fixtures: Fixture[] = [
+ {
+ match: { userMessage: "g", endpoint: "image", sequenceIndex: 0 },
+ response: { image: { url: "1.png" } },
+ },
+ {
+ match: { userMessage: "g", endpoint: "image", sequenceIndex: 1 },
+ response: { image: { url: "2.png" } },
+ },
+ ];
+ const counts = new Map();
+ const imageReq: ChatCompletionRequest = {
+ model: "dall-e-3",
+ messages: [{ role: "user", content: "g" }],
+ _endpointType: "image",
+ };
+
+ const first = matchFixture(fixtures, imageReq, counts);
+ expect(first).toBe(fixtures[0]);
+ });
+});
diff --git a/src/__tests__/multimedia.test.ts b/src/__tests__/multimedia.test.ts
new file mode 100644
index 0000000..68a1265
--- /dev/null
+++ b/src/__tests__/multimedia.test.ts
@@ -0,0 +1,447 @@
+import { describe, test, expect } from "vitest";
+import { LLMock } from "../llmock.js";
+
+describe("image generation", () => {
+ test("image generation returns fixture (OpenAI format)", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "a guitar", endpoint: "image" },
+ response: {
+ image: { url: "https://example.com/guitar.png", revisedPrompt: "a guitar on display" },
+ },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/images/generations`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({ model: "dall-e-3", prompt: "a guitar", n: 1 }),
+ });
+ expect(res.status).toBe(200);
+ const data = await res.json();
+ expect(data.data[0].url).toBe("https://example.com/guitar.png");
+ expect(data.data[0].revised_prompt).toBe("a guitar on display");
+ expect(typeof data.created).toBe("number");
+ await mock.stop();
+ });
+
+ test("multiple images", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "guitars", endpoint: "image" },
+ response: {
+ images: [{ url: "https://example.com/1.png" }, { url: "https://example.com/2.png" }],
+ },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/images/generations`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({ model: "dall-e-3", prompt: "guitars", n: 2 }),
+ });
+ const data = await res.json();
+ expect(data.data).toHaveLength(2);
+ expect(data.data[0].url).toBe("https://example.com/1.png");
+ expect(data.data[1].url).toBe("https://example.com/2.png");
+ await mock.stop();
+ });
+
+ test("base64 image response", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "a cat", endpoint: "image" },
+ response: { image: { b64Json: "iVBORw0KGgo=" } },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/images/generations`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({ model: "dall-e-3", prompt: "a cat", response_format: "b64_json" }),
+ });
+ const data = await res.json();
+ expect(data.data[0].b64_json).toBe("iVBORw0KGgo=");
+ await mock.stop();
+ });
+
+ test("Gemini Imagen endpoint", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "a guitar", endpoint: "image" },
+ response: { image: { b64Json: "iVBORw0KGgo=" } },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1beta/models/imagen-3.0-generate-002:predict`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ instances: [{ prompt: "a guitar" }], parameters: { sampleCount: 1 } }),
+ });
+ expect(res.status).toBe(200);
+ const data = await res.json();
+ expect(data.predictions[0].bytesBase64Encoded).toBe("iVBORw0KGgo=");
+ expect(data.predictions[0].mimeType).toBe("image/png");
+ await mock.stop();
+ });
+});
+
+describe("audio transcription", () => {
+ test("transcription returns text", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { endpoint: "transcription" },
+ response: { transcription: { text: "Welcome", language: "english", duration: 2.5 } },
+ });
+ await mock.start();
+
+ const formData = new FormData();
+ formData.append("file", new Blob(["fake audio"], { type: "audio/wav" }), "test.wav");
+ formData.append("model", "whisper-1");
+
+ const res = await fetch(`${mock.url}/v1/audio/transcriptions`, {
+ method: "POST",
+ headers: { Authorization: "Bearer test" },
+ body: formData,
+ });
+ expect(res.status).toBe(200);
+ const data = await res.json();
+ expect(data.text).toBe("Welcome");
+ await mock.stop();
+ });
+
+ test("verbose transcription includes words and segments", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { endpoint: "transcription" },
+ response: {
+ transcription: {
+ text: "Welcome",
+ language: "english",
+ duration: 2.5,
+ words: [{ word: "Welcome", start: 0.0, end: 0.5 }],
+ segments: [{ id: 0, text: "Welcome", start: 0.0, end: 2.5 }],
+ },
+ },
+ });
+ await mock.start();
+
+ const formData = new FormData();
+ formData.append("file", new Blob(["fake audio"]), "test.wav");
+ formData.append("model", "whisper-1");
+ formData.append("response_format", "verbose_json");
+
+ const res = await fetch(`${mock.url}/v1/audio/transcriptions`, {
+ method: "POST",
+ headers: { Authorization: "Bearer test" },
+ body: formData,
+ });
+ const data = await res.json();
+ expect(data.task).toBe("transcribe");
+ expect(data.words).toHaveLength(1);
+ expect(data.segments).toHaveLength(1);
+ await mock.stop();
+ });
+});
+
+describe("video generation", () => {
+ test("video creation and status check", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "a guitar", endpoint: "video" },
+ response: {
+ video: { id: "vid_123", status: "completed", url: "https://example.com/video.mp4" },
+ },
+ });
+ await mock.start();
+
+ // Create
+ const create = await fetch(`${mock.url}/v1/videos`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({ model: "sora-2", prompt: "a guitar" }),
+ });
+ const job = await create.json();
+ expect(job.id).toBe("vid_123");
+ expect(job.status).toBe("completed");
+
+ // Status check
+ const status = await fetch(`${mock.url}/v1/videos/vid_123`, {
+ headers: { Authorization: "Bearer test" },
+ });
+ const result = await status.json();
+ expect(result.status).toBe("completed");
+ expect(result.url).toBe("https://example.com/video.mp4");
+ await mock.stop();
+ });
+
+ test("video processing returns minimal response then status on GET", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "slow motion", endpoint: "video" },
+ response: {
+ video: { id: "vid_456", status: "processing", url: "https://example.com/slow.mp4" },
+ },
+ });
+ await mock.start();
+
+ const create = await fetch(`${mock.url}/v1/videos`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({ model: "sora-2", prompt: "slow motion" }),
+ });
+ const job = await create.json();
+ expect(job.id).toBe("vid_456");
+ expect(job.status).toBe("processing");
+ expect(job.url).toBeUndefined();
+
+ const status = await fetch(`${mock.url}/v1/videos/vid_456`, {
+ headers: { Authorization: "Bearer test" },
+ });
+ const result = await status.json();
+ expect(result.id).toBe("vid_456");
+ expect(result.status).toBe("processing");
+ await mock.stop();
+ });
+
+ test("video status 404 for unknown id", async () => {
+ const mock = new LLMock({ port: 0 });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/videos/unknown`, {
+ headers: { Authorization: "Bearer test" },
+ });
+ expect(res.status).toBe(404);
+ await mock.stop();
+ });
+});
+
+describe("convenience methods", () => {
+ test("onImage creates fixture with correct endpoint", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.onImage("sunset", { image: { url: "sunset.png" } });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/images/generations`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+ body: JSON.stringify({ prompt: "sunset" }),
+ });
+ expect((await res.json()).data[0].url).toBe("sunset.png");
+ await mock.stop();
+ });
+
+ test("onSpeech creates fixture with correct endpoint", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.onSpeech("hello", { audio: "AAAA", format: "mp3" });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/audio/speech`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+ body: JSON.stringify({ input: "hello", model: "tts-1", voice: "alloy" }),
+ });
+ expect(res.headers.get("content-type")).toBe("audio/mpeg");
+ await mock.stop();
+ });
+
+ test("onTranscription creates fixture with correct endpoint", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.onTranscription({ transcription: { text: "hello world" } });
+ await mock.start();
+
+ const formData = new FormData();
+ formData.append("file", new Blob(["audio"]), "test.wav");
+ formData.append("model", "whisper-1");
+ const res = await fetch(`${mock.url}/v1/audio/transcriptions`, {
+ method: "POST",
+ headers: { Authorization: "Bearer t" },
+ body: formData,
+ });
+ expect((await res.json()).text).toBe("hello world");
+ await mock.stop();
+ });
+
+ test("onVideo creates fixture with correct endpoint", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.onVideo("dancing", { video: { id: "v1", status: "completed", url: "dance.mp4" } });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/videos`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+ body: JSON.stringify({ prompt: "dancing" }),
+ });
+ expect((await res.json()).id).toBe("v1");
+ await mock.stop();
+ });
+});
+
+describe("X-Test-Id isolation", () => {
+ test("X-Test-Id works for image endpoint", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "g", endpoint: "image", sequenceIndex: 0 },
+ response: { image: { url: "1.png" } },
+ });
+ mock.addFixture({
+ match: { userMessage: "g", endpoint: "image", sequenceIndex: 1 },
+ response: { image: { url: "2.png" } },
+ });
+ await mock.start();
+
+ const req = (testId: string) =>
+ fetch(`${mock.url}/v1/images/generations`, {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ Authorization: "Bearer t",
+ "X-Test-Id": testId,
+ },
+ body: JSON.stringify({ model: "dall-e-3", prompt: "g" }),
+ }).then((r) => r.json());
+
+ const [a, b] = await Promise.all([req("A"), req("B")]);
+ expect(a.data[0].url).toBe("1.png");
+ expect(b.data[0].url).toBe("1.png"); // both get sequenceIndex 0
+
+ await mock.stop();
+ });
+});
+
+describe("endpoint cross-matching prevention", () => {
+ test("image fixture does not match chat request", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "guitar", endpoint: "image" },
+ response: { image: { url: "img.png" } },
+ });
+ mock.addFixture({
+ match: { userMessage: "guitar" },
+ response: { content: "Chat about guitars" },
+ });
+ await mock.start();
+
+ // Chat request should NOT match the image fixture
+ const chat = await fetch(`${mock.url}/v1/chat/completions`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "guitar" }],
+ stream: false,
+ }),
+ });
+ const chatData = await chat.json();
+ expect(chatData.choices[0].message.content).toBe("Chat about guitars");
+
+ // Image request should match the image fixture
+ const img = await fetch(`${mock.url}/v1/images/generations`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+ body: JSON.stringify({ model: "dall-e-3", prompt: "guitar" }),
+ });
+ const imgData = await img.json();
+ expect(imgData.data[0].url).toBe("img.png");
+
+ await mock.stop();
+ });
+});
+
+describe("endpoint backfill on existing handlers", () => {
+ test("fixture with endpoint: chat matches chat completions", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "hello", endpoint: "chat" },
+ response: { content: "Hi there" },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/chat/completions`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+ body: JSON.stringify({
+ model: "gpt-4o",
+ messages: [{ role: "user", content: "hello" }],
+ stream: false,
+ }),
+ });
+ const data = await res.json();
+ expect(data.choices[0].message.content).toBe("Hi there");
+ await mock.stop();
+ });
+
+ test("fixture with endpoint: embedding matches embeddings", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { inputText: "test input", endpoint: "embedding" },
+ response: { embedding: [0.1, 0.2, 0.3] },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/embeddings`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+ body: JSON.stringify({ model: "text-embedding-3-small", input: "test input" }),
+ });
+ const data = await res.json();
+ expect(data.data[0].embedding).toEqual([0.1, 0.2, 0.3]);
+ await mock.stop();
+ });
+});
+
+describe("text-to-speech", () => {
+ test("TTS returns audio bytes with correct content-type", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "hello world", endpoint: "speech" },
+ response: { audio: "AAAA", format: "mp3" },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/audio/speech`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({ model: "tts-1", input: "hello world", voice: "alloy" }),
+ });
+ expect(res.status).toBe(200);
+ expect(res.headers.get("content-type")).toBe("audio/mpeg");
+ const buffer = await res.arrayBuffer();
+ expect(buffer.byteLength).toBeGreaterThan(0);
+ await mock.stop();
+ });
+
+ test("TTS respects format for content-type", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "test", endpoint: "speech" },
+ response: { audio: "AAAA", format: "opus" },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/audio/speech`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({ model: "tts-1", input: "test", voice: "alloy" }),
+ });
+ expect(res.headers.get("content-type")).toBe("audio/opus");
+ await mock.stop();
+ });
+
+ test("TTS defaults to mp3 when no format specified", async () => {
+ const mock = new LLMock({ port: 0 });
+ mock.addFixture({
+ match: { userMessage: "default", endpoint: "speech" },
+ response: { audio: "AAAA" },
+ });
+ await mock.start();
+
+ const res = await fetch(`${mock.url}/v1/audio/speech`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+ body: JSON.stringify({ model: "tts-1", input: "default", voice: "alloy" }),
+ });
+ expect(res.headers.get("content-type")).toBe("audio/mpeg");
+ await mock.stop();
+ });
+});
diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts
index 552f85c..9b3469e 100644
--- a/src/bedrock-converse.ts
+++ b/src/bedrock-converse.ts
@@ -271,6 +271,7 @@ export async function handleConverse(
}
const completionReq = converseToCompletionRequest(converseReq, modelId);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
@@ -480,6 +481,7 @@ export async function handleConverseStream(
}
const completionReq = converseToCompletionRequest(converseReq, modelId);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
diff --git a/src/bedrock.ts b/src/bedrock.ts
index 169aee6..fabd86a 100644
--- a/src/bedrock.ts
+++ b/src/bedrock.ts
@@ -315,6 +315,7 @@ export async function handleBedrock(
// Convert to ChatCompletionRequest for fixture matching
const completionReq = bedrockToCompletionRequest(bedrockReq, modelId);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
@@ -671,6 +672,7 @@ export async function handleBedrockStream(
}
const completionReq = bedrockToCompletionRequest(bedrockReq, modelId);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
diff --git a/src/cohere.ts b/src/cohere.ts
index 842b3ae..1d1dccf 100644
--- a/src/cohere.ts
+++ b/src/cohere.ts
@@ -465,6 +465,7 @@ export async function handleCohere(
// Convert to ChatCompletionRequest for fixture matching
const completionReq = cohereToCompletionRequest(cohereReq);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
diff --git a/src/embeddings.ts b/src/embeddings.ts
index 1d80a9b..6d1f947 100644
--- a/src/embeddings.ts
+++ b/src/embeddings.ts
@@ -85,6 +85,7 @@ export async function handleEmbeddings(
model: embeddingReq.model,
messages: [],
embeddingInput: combinedInput,
+ _endpointType: "embedding",
};
const testId = getTestId(req);
diff --git a/src/gemini.ts b/src/gemini.ts
index 0b313dd..3c6529d 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -504,6 +504,7 @@ export async function handleGemini(
// Convert to ChatCompletionRequest for fixture matching
const completionReq = geminiToCompletionRequest(geminiReq, model, streaming);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
diff --git a/src/helpers.ts b/src/helpers.ts
index dac9160..325ac11 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -7,6 +7,10 @@ import type {
ContentWithToolCallsResponse,
ErrorResponse,
EmbeddingResponse,
+ ImageResponse,
+ AudioResponse,
+ TranscriptionResponse,
+ VideoResponse,
SSEChunk,
ToolCall,
ChatCompletion,
@@ -74,6 +78,33 @@ export function isEmbeddingResponse(r: FixtureResponse): r is EmbeddingResponse
return "embedding" in r && Array.isArray((r as EmbeddingResponse).embedding);
}
+export function isImageResponse(r: FixtureResponse): r is ImageResponse {
+ return (
+ ("image" in r && r.image != null) ||
+ ("images" in r && Array.isArray((r as ImageResponse).images))
+ );
+}
+
+export function isAudioResponse(r: FixtureResponse): r is AudioResponse {
+ return "audio" in r && typeof (r as AudioResponse).audio === "string";
+}
+
+export function isTranscriptionResponse(r: FixtureResponse): r is TranscriptionResponse {
+ return (
+ "transcription" in r &&
+ (r as TranscriptionResponse).transcription != null &&
+ typeof (r as TranscriptionResponse).transcription === "object"
+ );
+}
+
+export function isVideoResponse(r: FixtureResponse): r is VideoResponse {
+ return (
+ "video" in r &&
+ (r as VideoResponse).video != null &&
+ typeof (r as VideoResponse).video === "object"
+ );
+}
+
export function buildTextChunks(
content: string,
model: string,
diff --git a/src/images.ts b/src/images.ts
new file mode 100644
index 0000000..cb9de09
--- /dev/null
+++ b/src/images.ts
@@ -0,0 +1,214 @@
+import type * as http from "node:http";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js";
+import { isImageResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+interface OpenAIImageRequest {
+ model?: string;
+ prompt: string;
+ n?: number;
+ size?: string;
+ response_format?: "url" | "b64_json";
+ [key: string]: unknown;
+}
+
+interface GeminiPredictRequest {
+ instances: Array<{ prompt: string }>;
+ parameters?: { sampleCount?: number };
+ [key: string]: unknown;
+}
+
+function buildSyntheticRequest(model: string, prompt: string): ChatCompletionRequest {
+ return {
+ model,
+ messages: [{ role: "user", content: prompt }],
+ _endpointType: "image",
+ };
+}
+
+export async function handleImages(
+ req: http.IncomingMessage,
+ res: http.ServerResponse,
+ raw: string,
+ fixtures: Fixture[],
+ journal: Journal,
+ defaults: HandlerDefaults,
+ setCorsHeaders: (res: http.ServerResponse) => void,
+ format: "openai" | "gemini" = "openai",
+ geminiModel?: string,
+): Promise {
+ setCorsHeaders(res);
+ const path = req.url ?? "/v1/images/generations";
+ const method = req.method ?? "POST";
+
+ let model: string;
+ let prompt: string;
+
+ try {
+ const body = JSON.parse(raw);
+ if (format === "gemini") {
+ const geminiReq = body as GeminiPredictRequest;
+ prompt = geminiReq.instances?.[0]?.prompt ?? "";
+ model = geminiModel ?? "imagen";
+ } else {
+ const openaiReq = body as OpenAIImageRequest;
+ prompt = openaiReq.prompt ?? "";
+ model = openaiReq.model ?? "dall-e-3";
+ }
+ } catch {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: null,
+ response: { status: 400, fixture: null },
+ });
+ writeErrorResponse(
+ res,
+ 400,
+ JSON.stringify({
+ error: { message: "Malformed JSON", type: "invalid_request_error", code: "invalid_json" },
+ }),
+ );
+ return;
+ }
+
+ const syntheticReq = buildSyntheticRequest(model, prompt);
+ const testId = getTestId(req);
+ const fixture = matchFixture(
+ fixtures,
+ syntheticReq,
+ journal.getFixtureMatchCountsForTest(testId),
+ defaults.requestTransform,
+ );
+
+ if (fixture) {
+ journal.incrementFixtureMatchCount(fixture, fixtures, testId);
+ }
+
+ if (
+ applyChaos(
+ res,
+ fixture,
+ defaults.chaos,
+ req.headers,
+ journal,
+ { method, path, headers: flattenHeaders(req.headers), body: syntheticReq },
+ defaults.registry,
+ defaults.logger,
+ )
+ )
+ return;
+
+ if (!fixture) {
+ if (defaults.record) {
+ const proxied = await proxyAndRecord(
+ req,
+ res,
+ syntheticReq,
+ format === "gemini" ? "gemini" : "openai",
+ req.url ?? "/v1/images/generations",
+ fixtures,
+ defaults,
+ raw,
+ );
+ if (proxied) {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: res.statusCode ?? 200, fixture: null },
+ });
+ return;
+ }
+ }
+
+ const strictStatus = defaults.strict ? 503 : 404;
+ const strictMessage = defaults.strict
+ ? "Strict mode: no fixture matched"
+ : "No fixture matched";
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: strictStatus, fixture: null },
+ });
+ writeErrorResponse(
+ res,
+ strictStatus,
+ JSON.stringify({
+ error: { message: strictMessage, type: "invalid_request_error", code: "no_fixture_match" },
+ }),
+ );
+ return;
+ }
+
+ const response = fixture.response;
+
+ if (isErrorResponse(response)) {
+ const status = response.status ?? 500;
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status, fixture },
+ });
+ writeErrorResponse(res, status, JSON.stringify(response));
+ return;
+ }
+
+ if (!isImageResponse(response)) {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: 500, fixture },
+ });
+ writeErrorResponse(
+ res,
+ 500,
+ JSON.stringify({
+ error: { message: "Fixture response is not an image type", type: "server_error" },
+ }),
+ );
+ return;
+ }
+
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: 200, fixture },
+ });
+
+ // Normalize to array of image items
+ const items = response.images ?? (response.image ? [response.image] : []);
+
+ if (format === "gemini") {
+ const predictions = items.map((item) => ({
+ bytesBase64Encoded: item.b64Json ?? "",
+ mimeType: "image/png" as const,
+ }));
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify({ predictions }));
+ } else {
+ const data = items.map((item) => {
+ const entry: Record = {};
+ if (item.url) entry.url = item.url;
+ if (item.b64Json) entry.b64_json = item.b64Json;
+ if (item.revisedPrompt) entry.revised_prompt = item.revisedPrompt;
+ return entry;
+ });
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify({ created: Math.floor(Date.now() / 1000), data }));
+ }
+}
diff --git a/src/index.ts b/src/index.ts
index a5e9b29..4cb0cfb 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -69,6 +69,13 @@ export { handleWebSocketResponses } from "./ws-responses.js";
export { handleWebSocketRealtime } from "./ws-realtime.js";
export { handleWebSocketGeminiLive } from "./ws-gemini-live.js";
+// Multimedia handlers
+export { handleImages } from "./images.js";
+export { handleSpeech } from "./speech.js";
+export { handleTranscription } from "./transcription.js";
+export { handleVideoCreate, handleVideoStatus } from "./video.js";
+export type { VideoStateMap } from "./video.js";
+
// Helpers
export {
flattenHeaders,
@@ -79,6 +86,10 @@ export {
buildTextChunks,
buildToolCallChunks,
isEmbeddingResponse,
+ isImageResponse,
+ isAudioResponse,
+ isTranscriptionResponse,
+ isVideoResponse,
generateDeterministicEmbedding,
buildEmbeddingResponse,
} from "./helpers.js";
@@ -249,4 +260,9 @@ export type {
ChatCompletion,
ChatCompletionChoice,
ChatCompletionMessage,
+ ImageItem,
+ ImageResponse,
+ AudioResponse,
+ TranscriptionResponse,
+ VideoResponse,
} from "./types.js";
diff --git a/src/llmock.ts b/src/llmock.ts
index 5bb532b..973be71 100644
--- a/src/llmock.ts
+++ b/src/llmock.ts
@@ -1,4 +1,5 @@
import type {
+ AudioResponse,
ChaosConfig,
EmbeddingFixtureOpts,
Fixture,
@@ -6,9 +7,12 @@ import type {
FixtureMatch,
FixtureOpts,
FixtureResponse,
+ ImageResponse,
MockServerOptions,
Mountable,
RecordConfig,
+ TranscriptionResponse,
+ VideoResponse,
} from "./types.js";
import { createServer, type ServerInstance } from "./server.js";
import {
@@ -124,6 +128,34 @@ export class LLMock {
return this.on({ toolCallId: id }, response, opts);
}
+ onImage(prompt: string | RegExp, response: ImageResponse): this {
+ return this.addFixture({
+ match: { userMessage: prompt, endpoint: "image" },
+ response,
+ });
+ }
+
+ onSpeech(input: string | RegExp, response: AudioResponse): this {
+ return this.addFixture({
+ match: { userMessage: input, endpoint: "speech" },
+ response,
+ });
+ }
+
+ onTranscription(response: TranscriptionResponse): this {
+ return this.addFixture({
+ match: { endpoint: "transcription" },
+ response,
+ });
+ }
+
+ onVideo(prompt: string | RegExp, response: VideoResponse): this {
+ return this.addFixture({
+ match: { userMessage: prompt, endpoint: "video" },
+ response,
+ });
+ }
+
// ---- Service mock convenience methods ----
onSearch(pattern: string | RegExp, results: SearchResult[]): this {
@@ -252,6 +284,7 @@ export class LLMock {
this.moderationFixtures.length = 0;
if (this.serverInstance) {
this.serverInstance.journal.clear();
+ this.serverInstance.videoStates.clear();
}
return this;
}
diff --git a/src/messages.ts b/src/messages.ts
index 9b77e85..c58d85a 100644
--- a/src/messages.ts
+++ b/src/messages.ts
@@ -678,6 +678,7 @@ export async function handleMessages(
// Convert to ChatCompletionRequest for fixture matching
const completionReq = claudeToCompletionRequest(claudeReq);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
diff --git a/src/ollama.ts b/src/ollama.ts
index 1692054..ac0c987 100644
--- a/src/ollama.ts
+++ b/src/ollama.ts
@@ -388,6 +388,7 @@ export async function handleOllama(
// Convert to ChatCompletionRequest for fixture matching
const completionReq = ollamaToCompletionRequest(ollamaReq);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
@@ -646,6 +647,7 @@ export async function handleOllamaGenerate(
// Convert to ChatCompletionRequest for fixture matching
const completionReq = ollamaGenerateToCompletionRequest(generateReq);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
diff --git a/src/recorder.ts b/src/recorder.ts
index d5348a1..1be0156 100644
--- a/src/recorder.ts
+++ b/src/recorder.ts
@@ -134,7 +134,21 @@ export async function proxyAndRecord(
let fixtureResponse: FixtureResponse;
- if (collapsed) {
+ // TTS response — binary audio, not JSON
+ const isAudioResponse = ctString.toLowerCase().startsWith("audio/");
+ if (isAudioResponse && rawBuffer.length > 0) {
+ // Derive format from Content-Type (audio/mpeg→mp3, audio/opus→opus, etc.)
+ const audioFormat = ctString
+ .toLowerCase()
+ .replace("audio/", "")
+ .replace("mpeg", "mp3")
+ .split(";")[0]
+ .trim();
+ fixtureResponse = {
+ audio: rawBuffer.toString("base64"),
+ ...(audioFormat && audioFormat !== "mp3" ? { format: audioFormat } : {}),
+ };
+ } else if (collapsed) {
// Streaming response — use collapsed result
defaults.logger.warn(`Streaming response detected (${ctString}) — collapsing to fixture`);
if (collapsed.truncated) {
@@ -348,6 +362,69 @@ function buildFixtureResponse(
// Corrupted base64 or non-float32 data — fall through to error
}
}
+ // OpenAI image generation: { created, data: [{ url, b64_json, revised_prompt }] }
+ if (first.url || first.b64_json) {
+ const images = (obj.data as Array>).map((item) => ({
+ ...(item.url ? { url: String(item.url) } : {}),
+ ...(item.b64_json ? { b64Json: String(item.b64_json) } : {}),
+ ...(item.revised_prompt ? { revisedPrompt: String(item.revised_prompt) } : {}),
+ }));
+ if (images.length === 1) {
+ return { image: images[0] };
+ }
+ return { images };
+ }
+ }
+
+ // Gemini Imagen: { predictions: [...] }
+ if (Array.isArray(obj.predictions)) {
+ const images = (obj.predictions as Array>).map((p) => ({
+ ...(p.bytesBase64Encoded ? { b64Json: String(p.bytesBase64Encoded) } : {}),
+ ...(p.mimeType ? { mimeType: String(p.mimeType) } : {}),
+ }));
+ if (images.length === 1) {
+ return { image: images[0] };
+ }
+ return { images };
+ }
+
+ // OpenAI transcription: { text: "...", ... }
+ if (
+ typeof obj.text === "string" &&
+ (obj.task === "transcribe" || obj.language !== undefined || obj.duration !== undefined)
+ ) {
+ return {
+ transcription: {
+ text: obj.text as string,
+ ...(obj.language ? { language: String(obj.language) } : {}),
+ ...(obj.duration !== undefined ? { duration: Number(obj.duration) } : {}),
+ ...(Array.isArray(obj.words) ? { words: obj.words } : {}),
+ ...(Array.isArray(obj.segments) ? { segments: obj.segments } : {}),
+ },
+ };
+ }
+
+ // OpenAI video generation: { id, status, ... }
+ if (
+ typeof obj.id === "string" &&
+ typeof obj.status === "string" &&
+ (obj.status === "completed" || obj.status === "in_progress" || obj.status === "failed")
+ ) {
+ if (obj.status === "completed" && obj.url) {
+ return {
+ video: {
+ id: String(obj.id),
+ status: "completed" as const,
+ url: String(obj.url),
+ },
+ };
+ }
+ return {
+ video: {
+ id: String(obj.id),
+ status: obj.status === "failed" ? ("failed" as const) : ("processing" as const),
+ },
+ };
}
// Direct embedding: { embedding: [...] }
@@ -491,23 +568,34 @@ function buildFixtureResponse(
/**
* Derive fixture match criteria from the original request.
*/
+type EndpointType = "chat" | "image" | "speech" | "transcription" | "video" | "embedding";
+
function buildFixtureMatch(request: ChatCompletionRequest): {
userMessage?: string;
inputText?: string;
+ endpoint?: EndpointType;
} {
+ const match: { userMessage?: string; inputText?: string; endpoint?: EndpointType } = {};
+
+ // Include endpoint type for multimedia fixtures
+ if (request._endpointType && request._endpointType !== "chat") {
+ match.endpoint = request._endpointType as EndpointType;
+ }
+
// Embedding request
if (request.embeddingInput) {
- return { inputText: request.embeddingInput };
+ match.inputText = request.embeddingInput;
+ return match;
}
- // Chat request — match on the last user message
+ // Chat/multimedia request — match on the last user message
const lastUser = getLastMessageByRole(request.messages ?? [], "user");
if (lastUser) {
const text = getTextContent(lastUser.content);
if (text) {
- return { userMessage: text };
+ match.userMessage = text;
}
}
- return {};
+ return match;
}
diff --git a/src/responses.ts b/src/responses.ts
index 126e997..7d6946e 100644
--- a/src/responses.ts
+++ b/src/responses.ts
@@ -803,6 +803,7 @@ export async function handleResponses(
// Convert to ChatCompletionRequest for fixture matching
const completionReq = responsesToCompletionRequest(responsesReq);
+ completionReq._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
diff --git a/src/router.ts b/src/router.ts
index efc79c1..f235d50 100644
--- a/src/router.ts
+++ b/src/router.ts
@@ -1,4 +1,10 @@
import type { ChatCompletionRequest, ChatMessage, ContentPart, Fixture } from "./types.js";
+import {
+ isImageResponse,
+ isAudioResponse,
+ isTranscriptionResponse,
+ isVideoResponse,
+} from "./helpers.js";
export function getLastMessageByRole(messages: ChatMessage[], role: string): ChatMessage | null {
for (let i = messages.length - 1; i >= 0; i--) {
@@ -41,6 +47,26 @@ export function matchFixture(
if (!match.predicate(req)) continue;
}
+ // endpoint — bidirectional filtering:
+ // 1. If fixture has endpoint set, only match requests of that type
+ // 2. If request has _endpointType but fixture doesn't, skip fixtures
+ // whose response type is incompatible (prevents generic chat fixtures
+ // from matching image/speech/video requests and causing 500s)
+ const reqEndpoint = effective._endpointType as string | undefined;
+ if (match.endpoint !== undefined) {
+ if (match.endpoint !== reqEndpoint) continue;
+ } else if (reqEndpoint && reqEndpoint !== "chat" && reqEndpoint !== "embedding") {
+ // Fixture has no endpoint restriction but request is multimedia —
+ // only match if the response type is compatible
+ const r = fixture.response;
+ const compatible =
+ (reqEndpoint === "image" && isImageResponse(r)) ||
+ (reqEndpoint === "speech" && isAudioResponse(r)) ||
+ (reqEndpoint === "transcription" && isTranscriptionResponse(r)) ||
+ (reqEndpoint === "video" && isVideoResponse(r));
+ if (!compatible) continue;
+ }
+
// userMessage — match against the last user message content
if (match.userMessage !== undefined) {
const msg = getLastMessageByRole(effective.messages, "user");
diff --git a/src/server.ts b/src/server.ts
index 65bd499..f19f3b5 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -33,6 +33,10 @@ import { handleGemini } from "./gemini.js";
import { handleBedrock, handleBedrockStream } from "./bedrock.js";
import { handleConverse, handleConverseStream } from "./bedrock-converse.js";
import { handleEmbeddings } from "./embeddings.js";
+import { handleImages } from "./images.js";
+import { handleSpeech } from "./speech.js";
+import { handleTranscription } from "./transcription.js";
+import { handleVideoCreate, handleVideoStatus, type VideoStateMap } from "./video.js";
import { handleOllama, handleOllamaGenerate } from "./ollama.js";
import { handleCohere } from "./cohere.js";
import { handleSearch, type SearchFixture } from "./search.js";
@@ -52,6 +56,7 @@ export interface ServerInstance {
journal: Journal;
url: string;
defaults: HandlerDefaults;
+ videoStates: VideoStateMap;
}
const COMPLETIONS_PATH = "/v1/chat/completions";
@@ -65,6 +70,12 @@ const COHERE_CHAT_PATH = "/v2/chat";
const SEARCH_PATH = "/search";
const RERANK_PATH = "/v2/rerank";
const MODERATIONS_PATH = "/v1/moderations";
+const IMAGES_PATH = "/v1/images/generations";
+const SPEECH_PATH = "/v1/audio/speech";
+const TRANSCRIPTIONS_PATH = "/v1/audio/transcriptions";
+const VIDEOS_PATH = "/v1/videos";
+const VIDEOS_STATUS_RE = /^\/v1\/videos\/([^/]+)$/;
+const GEMINI_PREDICT_RE = /^\/v1beta\/models\/([^:]+):predict$/;
const DEFAULT_CHUNK_SIZE = 20;
const GEMINI_PATH_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/;
@@ -140,6 +151,7 @@ async function handleControlAPI(
pathname: string,
fixtures: Fixture[],
journal: Journal,
+ videoStates: VideoStateMap,
): Promise {
if (!pathname.startsWith(CONTROL_PREFIX)) return false;
@@ -213,6 +225,7 @@ async function handleControlAPI(
if (subPath === "/reset" && req.method === "POST") {
fixtures.length = 0;
journal.clear();
+ videoStates.clear();
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ reset: true }));
return true;
@@ -344,6 +357,7 @@ async function handleCompletions(
}
// Match fixture
+ body._endpointType = "chat";
const testId = getTestId(req);
const fixture = matchFixture(
fixtures,
@@ -633,6 +647,7 @@ export async function createServer(
}
const journal = new Journal();
+ const videoStates: VideoStateMap = new Map();
// Share journal and metrics registry with mounted services
if (mounts) {
@@ -703,7 +718,7 @@ export async function createServer(
// Control API — must be checked before mounts and path rewrites
if (pathname.startsWith(CONTROL_PREFIX)) {
- await handleControlAPI(req, res, pathname, fixtures, journal);
+ await handleControlAPI(req, res, pathname, fixtures, journal, videoStates);
return;
}
@@ -948,6 +963,136 @@ export async function createServer(
return;
}
+ // POST /v1/images/generations — OpenAI Image Generation API
+ if (pathname === IMAGES_PATH && req.method === "POST") {
+ readBody(req)
+ .then((raw) => handleImages(req, res, raw, fixtures, journal, defaults, setCorsHeaders))
+ .catch((err: unknown) => {
+ const msg = err instanceof Error ? err.message : "Internal error";
+ if (!res.headersSent) {
+ writeErrorResponse(
+ res,
+ 500,
+ JSON.stringify({ error: { message: msg, type: "server_error" } }),
+ );
+ } else if (!res.writableEnded) {
+ res.destroy();
+ }
+ });
+ return;
+ }
+
+ // POST /v1/audio/speech — OpenAI TTS API
+ if (pathname === SPEECH_PATH && req.method === "POST") {
+ readBody(req)
+ .then((raw) => handleSpeech(req, res, raw, fixtures, journal, defaults, setCorsHeaders))
+ .catch((err: unknown) => {
+ const msg = err instanceof Error ? err.message : "Internal error";
+ if (!res.headersSent) {
+ writeErrorResponse(
+ res,
+ 500,
+ JSON.stringify({ error: { message: msg, type: "server_error" } }),
+ );
+ } else if (!res.writableEnded) {
+ res.destroy();
+ }
+ });
+ return;
+ }
+
+ // POST /v1/audio/transcriptions — OpenAI Transcription API
+ if (pathname === TRANSCRIPTIONS_PATH && req.method === "POST") {
+ readBody(req)
+ .then((raw) =>
+ handleTranscription(req, res, raw, fixtures, journal, defaults, setCorsHeaders),
+ )
+ .catch((err: unknown) => {
+ const msg = err instanceof Error ? err.message : "Internal error";
+ if (!res.headersSent) {
+ writeErrorResponse(
+ res,
+ 500,
+ JSON.stringify({ error: { message: msg, type: "server_error" } }),
+ );
+ } else if (!res.writableEnded) {
+ res.destroy();
+ }
+ });
+ return;
+ }
+
+ // POST /v1/videos — Video Generation API
+ if (pathname === VIDEOS_PATH && req.method === "POST") {
+ readBody(req)
+ .then((raw) =>
+ handleVideoCreate(
+ req,
+ res,
+ raw,
+ fixtures,
+ journal,
+ defaults,
+ setCorsHeaders,
+ videoStates,
+ ),
+ )
+ .catch((err: unknown) => {
+ const msg = err instanceof Error ? err.message : "Internal error";
+ if (!res.headersSent) {
+ writeErrorResponse(
+ res,
+ 500,
+ JSON.stringify({ error: { message: msg, type: "server_error" } }),
+ );
+ } else if (!res.writableEnded) {
+ res.destroy();
+ }
+ });
+ return;
+ }
+
+ // GET /v1/videos/{id} — Video Status Check
+ const videoStatusMatch = pathname.match(VIDEOS_STATUS_RE);
+ if (videoStatusMatch && req.method === "GET") {
+ const videoId = videoStatusMatch[1];
+ handleVideoStatus(req, res, videoId, journal, setCorsHeaders, videoStates);
+ return;
+ }
+
+ // POST /v1beta/models/{model}:predict — Gemini Imagen API
+ const geminiPredictMatch = pathname.match(GEMINI_PREDICT_RE);
+ if (geminiPredictMatch && req.method === "POST") {
+ const predictModel = geminiPredictMatch[1];
+ readBody(req)
+ .then((raw) =>
+ handleImages(
+ req,
+ res,
+ raw,
+ fixtures,
+ journal,
+ defaults,
+ setCorsHeaders,
+ "gemini",
+ predictModel,
+ ),
+ )
+ .catch((err: unknown) => {
+ const msg = err instanceof Error ? err.message : "Internal error";
+ if (!res.headersSent) {
+ writeErrorResponse(
+ res,
+ 500,
+ JSON.stringify({ error: { message: msg, type: "server_error" } }),
+ );
+ } else if (!res.writableEnded) {
+ res.destroy();
+ }
+ });
+ return;
+ }
+
// POST /v1beta/models/{model}:(generateContent|streamGenerateContent) — Google Gemini
const geminiMatch = pathname.match(GEMINI_PATH_RE);
if (geminiMatch && req.method === "POST") {
@@ -1466,7 +1611,7 @@ export async function createServer(
}
}
- resolve({ server, journal, url, defaults });
+ resolve({ server, journal, url, defaults, videoStates });
});
});
}
diff --git a/src/speech.ts b/src/speech.ts
new file mode 100644
index 0000000..4245f72
--- /dev/null
+++ b/src/speech.ts
@@ -0,0 +1,186 @@
+import type * as http from "node:http";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js";
+import { isAudioResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+interface SpeechRequest {
+ model?: string;
+ input: string;
+ voice?: string;
+ response_format?: string;
+ speed?: number;
+ [key: string]: unknown;
+}
+
+const FORMAT_TO_CONTENT_TYPE: Record = {
+ mp3: "audio/mpeg",
+ opus: "audio/opus",
+ aac: "audio/aac",
+ flac: "audio/flac",
+ wav: "audio/wav",
+ pcm: "audio/pcm",
+};
+
+export async function handleSpeech(
+ req: http.IncomingMessage,
+ res: http.ServerResponse,
+ raw: string,
+ fixtures: Fixture[],
+ journal: Journal,
+ defaults: HandlerDefaults,
+ setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise {
+ setCorsHeaders(res);
+ const path = req.url ?? "/v1/audio/speech";
+ const method = req.method ?? "POST";
+
+ let speechReq: SpeechRequest;
+ try {
+ speechReq = JSON.parse(raw) as SpeechRequest;
+ } catch {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: null,
+ response: { status: 400, fixture: null },
+ });
+ writeErrorResponse(
+ res,
+ 400,
+ JSON.stringify({
+ error: { message: "Malformed JSON", type: "invalid_request_error", code: "invalid_json" },
+ }),
+ );
+ return;
+ }
+
+ const syntheticReq: ChatCompletionRequest = {
+ model: speechReq.model ?? "tts-1",
+ messages: [{ role: "user", content: speechReq.input }],
+ _endpointType: "speech",
+ };
+
+ const testId = getTestId(req);
+ const fixture = matchFixture(
+ fixtures,
+ syntheticReq,
+ journal.getFixtureMatchCountsForTest(testId),
+ defaults.requestTransform,
+ );
+
+ if (fixture) {
+ journal.incrementFixtureMatchCount(fixture, fixtures, testId);
+ }
+
+ if (
+ applyChaos(
+ res,
+ fixture,
+ defaults.chaos,
+ req.headers,
+ journal,
+ { method, path, headers: flattenHeaders(req.headers), body: syntheticReq },
+ defaults.registry,
+ defaults.logger,
+ )
+ )
+ return;
+
+ if (!fixture) {
+ if (defaults.record) {
+ const proxied = await proxyAndRecord(
+ req,
+ res,
+ syntheticReq,
+ "openai",
+ req.url ?? "/v1/audio/speech",
+ fixtures,
+ defaults,
+ raw,
+ );
+ if (proxied) {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: res.statusCode ?? 200, fixture: null },
+ });
+ return;
+ }
+ }
+
+ const strictStatus = defaults.strict ? 503 : 404;
+ const strictMessage = defaults.strict
+ ? "Strict mode: no fixture matched"
+ : "No fixture matched";
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: strictStatus, fixture: null },
+ });
+ writeErrorResponse(
+ res,
+ strictStatus,
+ JSON.stringify({
+ error: { message: strictMessage, type: "invalid_request_error", code: "no_fixture_match" },
+ }),
+ );
+ return;
+ }
+
+ const response = fixture.response;
+
+ if (isErrorResponse(response)) {
+ const status = response.status ?? 500;
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status, fixture },
+ });
+ writeErrorResponse(res, status, JSON.stringify(response));
+ return;
+ }
+
+ if (!isAudioResponse(response)) {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: 500, fixture },
+ });
+ writeErrorResponse(
+ res,
+ 500,
+ JSON.stringify({
+ error: { message: "Fixture response is not an audio type", type: "server_error" },
+ }),
+ );
+ return;
+ }
+
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: 200, fixture },
+ });
+
+ const format = response.format ?? "mp3";
+ const contentType = FORMAT_TO_CONTENT_TYPE[format] ?? "audio/mpeg";
+ const audioBytes = Buffer.from(response.audio, "base64");
+
+ res.writeHead(200, { "Content-Type": contentType });
+ res.end(audioBytes);
+}
diff --git a/src/transcription.ts b/src/transcription.ts
new file mode 100644
index 0000000..affedcc
--- /dev/null
+++ b/src/transcription.ts
@@ -0,0 +1,184 @@
+import type * as http from "node:http";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js";
+import { isTranscriptionResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+/**
+ * Extract a named field value from a multipart/form-data body.
+ * Lightweight parser — scans for Content-Disposition headers
+ * to find simple string field values.
+ */
+function extractFormField(raw: string, fieldName: string): string | undefined {
+ const pattern = new RegExp(
+ `Content-Disposition:\\s*form-data;[^\\r\\n]*name="${fieldName}"[^\\r\\n]*\\r\\n\\r\\n([^\\r\\n]*)`,
+ "i",
+ );
+ const match = raw.match(pattern);
+ return match?.[1];
+}
+
+export async function handleTranscription(
+ req: http.IncomingMessage,
+ res: http.ServerResponse,
+ raw: string,
+ fixtures: Fixture[],
+ journal: Journal,
+ defaults: HandlerDefaults,
+ setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise {
+ setCorsHeaders(res);
+ const path = req.url ?? "/v1/audio/transcriptions";
+ const method = req.method ?? "POST";
+
+ const model = extractFormField(raw, "model") ?? "whisper-1";
+ const responseFormat = extractFormField(raw, "response_format") ?? "json";
+
+ const syntheticReq: ChatCompletionRequest = {
+ model,
+ messages: [],
+ _endpointType: "transcription",
+ };
+
+ const testId = getTestId(req);
+ const fixture = matchFixture(
+ fixtures,
+ syntheticReq,
+ journal.getFixtureMatchCountsForTest(testId),
+ defaults.requestTransform,
+ );
+
+ if (fixture) {
+ journal.incrementFixtureMatchCount(fixture, fixtures, testId);
+ }
+
+ if (
+ applyChaos(
+ res,
+ fixture,
+ defaults.chaos,
+ req.headers,
+ journal,
+ { method, path, headers: flattenHeaders(req.headers), body: syntheticReq },
+ defaults.registry,
+ defaults.logger,
+ )
+ )
+ return;
+
+ if (!fixture) {
+ if (defaults.record) {
+ const proxied = await proxyAndRecord(
+ req,
+ res,
+ syntheticReq,
+ "openai",
+ req.url ?? "/v1/audio/transcriptions",
+ fixtures,
+ defaults,
+ raw,
+ );
+ if (proxied) {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: res.statusCode ?? 200, fixture: null },
+ });
+ return;
+ }
+ }
+
+ const strictStatus = defaults.strict ? 503 : 404;
+ const strictMessage = defaults.strict
+ ? "Strict mode: no fixture matched"
+ : "No fixture matched";
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: strictStatus, fixture: null },
+ });
+ writeErrorResponse(
+ res,
+ strictStatus,
+ JSON.stringify({
+ error: {
+ message: strictMessage,
+ type: "invalid_request_error",
+ code: "no_fixture_match",
+ },
+ }),
+ );
+ return;
+ }
+
+ const response = fixture.response;
+
+ if (isErrorResponse(response)) {
+ const status = response.status ?? 500;
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status, fixture },
+ });
+ writeErrorResponse(res, status, JSON.stringify(response));
+ return;
+ }
+
+ if (!isTranscriptionResponse(response)) {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: 500, fixture },
+ });
+ writeErrorResponse(
+ res,
+ 500,
+ JSON.stringify({
+ error: {
+ message: "Fixture response is not a transcription type",
+ type: "server_error",
+ },
+ }),
+ );
+ return;
+ }
+
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: 200, fixture },
+ });
+
+ const t = response.transcription;
+ const useVerbose = responseFormat === "verbose_json" || t.words != null || t.segments != null;
+
+ if (useVerbose) {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(
+ JSON.stringify({
+ task: "transcribe",
+ language: t.language ?? "english",
+ duration: t.duration ?? 0,
+ text: t.text,
+ words: t.words ?? [],
+ segments: t.segments ?? [],
+ }),
+ );
+ } else {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify({ text: t.text }));
+ }
+}
diff --git a/src/types.ts b/src/types.ts
index ea64d8a..4d8a3f4 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -50,6 +50,8 @@ export interface ChatCompletionRequest {
response_format?: { type: string; [key: string]: unknown };
/** Embedding input text, set by the embeddings handler for fixture matching. */
embeddingInput?: string;
+ /** Endpoint type, set by handlers for fixture endpoint filtering. */
+ _endpointType?: string;
[key: string]: unknown;
}
@@ -70,6 +72,7 @@ export interface FixtureMatch {
predicate?: (req: ChatCompletionRequest) => boolean;
/** Which occurrence of this match to respond to (0-indexed). Undefined means match any. */
sequenceIndex?: number;
+ endpoint?: "chat" | "image" | "speech" | "transcription" | "video" | "embedding";
}
// Fixture response types
@@ -111,12 +114,50 @@ export interface EmbeddingResponse {
embedding: number[];
}
+export interface ImageItem {
+ url?: string;
+ b64Json?: string;
+ revisedPrompt?: string;
+}
+
+export interface ImageResponse {
+ image?: ImageItem;
+ images?: ImageItem[];
+}
+
+export interface AudioResponse {
+ audio: string;
+ format?: string;
+}
+
+export interface TranscriptionResponse {
+ transcription: {
+ text: string;
+ language?: string;
+ duration?: number;
+ words?: Array<{ word: string; start: number; end: number }>;
+ segments?: Array<{ id: number; text: string; start: number; end: number }>;
+ };
+}
+
+export interface VideoResponse {
+ video: {
+ id: string;
+ status: "processing" | "completed" | "failed";
+ url?: string;
+ };
+}
+
export type FixtureResponse =
| TextResponse
| ToolCallResponse
| ContentWithToolCallsResponse
| ErrorResponse
- | EmbeddingResponse;
+ | EmbeddingResponse
+ | ImageResponse
+ | AudioResponse
+ | TranscriptionResponse
+ | VideoResponse;
// Streaming physics
@@ -165,6 +206,7 @@ export interface FixtureFileEntry {
model?: string;
responseFormat?: string;
sequenceIndex?: number;
+ endpoint?: "chat" | "image" | "speech" | "transcription" | "video" | "embedding";
// predicate not supported in JSON files
};
response: FixtureResponse;
diff --git a/src/video.ts b/src/video.ts
new file mode 100644
index 0000000..dfc4670
--- /dev/null
+++ b/src/video.ts
@@ -0,0 +1,238 @@
+import type * as http from "node:http";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults, VideoResponse } from "./types.js";
+import { isVideoResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+interface VideoRequest {
+ model?: string;
+ prompt: string;
+ [key: string]: unknown;
+}
+
+/** Stored video state for GET status checks. Key: `${testId}:${videoId}` */
+export type VideoStateMap = Map;
+
+export async function handleVideoCreate(
+ req: http.IncomingMessage,
+ res: http.ServerResponse,
+ raw: string,
+ fixtures: Fixture[],
+ journal: Journal,
+ defaults: HandlerDefaults,
+ setCorsHeaders: (res: http.ServerResponse) => void,
+ videoStates: VideoStateMap,
+): Promise {
+ setCorsHeaders(res);
+ const path = req.url ?? "/v1/videos";
+ const method = req.method ?? "POST";
+
+ let videoReq: VideoRequest;
+ try {
+ videoReq = JSON.parse(raw) as VideoRequest;
+ } catch {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: null,
+ response: { status: 400, fixture: null },
+ });
+ writeErrorResponse(
+ res,
+ 400,
+ JSON.stringify({
+ error: { message: "Malformed JSON", type: "invalid_request_error", code: "invalid_json" },
+ }),
+ );
+ return;
+ }
+
+ const syntheticReq: ChatCompletionRequest = {
+ model: videoReq.model ?? "sora-2",
+ messages: [{ role: "user", content: videoReq.prompt }],
+ _endpointType: "video",
+ };
+
+ const testId = getTestId(req);
+ const fixture = matchFixture(
+ fixtures,
+ syntheticReq,
+ journal.getFixtureMatchCountsForTest(testId),
+ defaults.requestTransform,
+ );
+
+ if (fixture) {
+ journal.incrementFixtureMatchCount(fixture, fixtures, testId);
+ }
+
+ if (
+ applyChaos(
+ res,
+ fixture,
+ defaults.chaos,
+ req.headers,
+ journal,
+ { method, path, headers: flattenHeaders(req.headers), body: syntheticReq },
+ defaults.registry,
+ defaults.logger,
+ )
+ )
+ return;
+
+ if (!fixture) {
+ if (defaults.record) {
+ const proxied = await proxyAndRecord(
+ req,
+ res,
+ syntheticReq,
+ "openai",
+ req.url ?? "/v1/videos",
+ fixtures,
+ defaults,
+ raw,
+ );
+ if (proxied) {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: res.statusCode ?? 200, fixture: null },
+ });
+ return;
+ }
+ }
+
+ const strictStatus = defaults.strict ? 503 : 404;
+ const strictMessage = defaults.strict
+ ? "Strict mode: no fixture matched"
+ : "No fixture matched";
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: strictStatus, fixture: null },
+ });
+ writeErrorResponse(
+ res,
+ strictStatus,
+ JSON.stringify({
+ error: { message: strictMessage, type: "invalid_request_error", code: "no_fixture_match" },
+ }),
+ );
+ return;
+ }
+
+ const response = fixture.response;
+
+ if (isErrorResponse(response)) {
+ const status = response.status ?? 500;
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status, fixture },
+ });
+ writeErrorResponse(res, status, JSON.stringify(response));
+ return;
+ }
+
+ if (!isVideoResponse(response)) {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: 500, fixture },
+ });
+ writeErrorResponse(
+ res,
+ 500,
+ JSON.stringify({
+ error: { message: "Fixture response is not a video type", type: "server_error" },
+ }),
+ );
+ return;
+ }
+
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: syntheticReq,
+ response: { status: 200, fixture },
+ });
+
+ const video = response.video;
+ const created_at = Math.floor(Date.now() / 1000);
+
+ // Store for GET status checks
+ const stateKey = `${testId}:${video.id}`;
+ videoStates.set(stateKey, video);
+
+ if (video.status === "completed") {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify({ id: video.id, status: video.status, url: video.url, created_at }));
+ } else {
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify({ id: video.id, status: video.status, created_at }));
+ }
+}
+
+export function handleVideoStatus(
+ req: http.IncomingMessage,
+ res: http.ServerResponse,
+ videoId: string,
+ journal: Journal,
+ setCorsHeaders: (res: http.ServerResponse) => void,
+ videoStates: VideoStateMap,
+): void {
+ setCorsHeaders(res);
+ const path = req.url ?? `/v1/videos/${videoId}`;
+ const method = req.method ?? "GET";
+
+ const testId = getTestId(req);
+ const stateKey = `${testId}:${videoId}`;
+ const video = videoStates.get(stateKey);
+
+ if (!video) {
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: null,
+ response: { status: 404, fixture: null },
+ });
+ writeErrorResponse(
+ res,
+ 404,
+ JSON.stringify({ error: { message: `Video ${videoId} not found`, type: "not_found" } }),
+ );
+ return;
+ }
+
+ journal.add({
+ method,
+ path,
+ headers: flattenHeaders(req.headers),
+ body: null,
+ response: { status: 200, fixture: null },
+ });
+
+ const created_at = Math.floor(Date.now() / 1000);
+ const body: Record = {
+ id: video.id,
+ status: video.status,
+ created_at,
+ };
+ if (video.url) body.url = video.url;
+
+ res.writeHead(200, { "Content-Type": "application/json" });
+ res.end(JSON.stringify(body));
+}