diff --git a/CHANGELOG.md b/CHANGELOG.md
index 65aa5b7..7397b00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # @copilotkit/aimock
 
+## 1.12.0
+
+### Minor Changes
+
+- Multimedia endpoint support: image generation (OpenAI DALL-E + Gemini Imagen), text-to-speech, audio transcription, and video generation with async polling (#101)
+- `match.endpoint` field for fixture isolation — prevents cross-matching between chat, image, speech, transcription, video, and embedding fixtures (#101)
+- Bidirectional endpoint filtering — generic fixtures only match compatible endpoint types (#101)
+- Convenience methods: `onImage`, `onSpeech`, `onTranscription`, `onVideo` (#101)
+- Record & replay for all multimedia endpoints — proxy to real APIs, save fixtures with correct format/type detection (#101)
+- `_endpointType` explicit field on `ChatCompletionRequest` for type safety (#101)
+- Comparison matrix and drift detection rules updated for multimedia (#101)
+- 54 new tests (32 integration, 11 record/replay, 12 type/routing)
+
 ## 1.11.0
 
 ### Minor Changes
diff --git a/README.md b/README.md
index 3a759b5..0e6ed07 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 https://github.com/user-attachments/assets/646bf106-0320-41f2-a9b1-5090454830f3
 
-Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, AG-UI event streams, vector databases, search, rerank, and moderation. One package, one port, zero dependencies.
+Mock infrastructure for AI application testing — LLM APIs, image generation, text-to-speech, transcription, video generation, MCP tools, A2A agents, AG-UI event streams, vector databases, search, rerank, and moderation. One package, one port, zero dependencies.
 
 ## Quick Start
 
@@ -43,6 +43,7 @@ Run them all on one port with `npx aimock --config aimock.json`, or use the prog
 
 - **[Record & Replay](https://aimock.copilotkit.dev/record-replay)** — Proxy real APIs, save as fixtures, replay deterministically forever
 - **[11 LLM Providers](https://aimock.copilotkit.dev/docs)** — OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere — full streaming support
+- **[Multimedia APIs](https://aimock.copilotkit.dev/images)** — Image generation (DALL-E, Imagen), text-to-speech, audio transcription, video generation
 - **[MCP / A2A / AG-UI / Vector](https://aimock.copilotkit.dev/mcp-mock)** — Mock every protocol your AI agents use
 - **[Chaos Testing](https://aimock.copilotkit.dev/chaos-testing)** — 500 errors, malformed JSON, mid-stream disconnects at any probability
 - **[Drift Detection](https://aimock.copilotkit.dev/drift-detection)** — Daily CI validation against real APIs
diff --git a/charts/aimock/Chart.yaml b/charts/aimock/Chart.yaml
index 9fa1f59..1d2e733 100644
--- a/charts/aimock/Chart.yaml
+++ b/charts/aimock/Chart.yaml
@@ -3,4 +3,4 @@ name: aimock
 description: Mock infrastructure for AI application testing (OpenAI, Anthropic, Gemini, MCP, A2A, vector)
 type: application
 version: 0.1.0
-appVersion: "1.11.0"
+appVersion: "1.12.0"
diff --git a/docs/fixtures/index.html b/docs/fixtures/index.html
index 208611c..b023802 100644
--- a/docs/fixtures/index.html
+++ b/docs/fixtures/index.html
@@ -162,6 +162,26 @@ <h2>Response Types</h2>
               <td>embedding[]</td>
               <td>Vector of numbers</td>
             </tr>
+            <tr>
+              <td>Image</td>
+              <td>image.url or images[].url</td>
+              <td>Generated image URL(s) or base64 data</td>
+            </tr>
+            <tr>
+              <td>Speech</td>
+              <td>audio</td>
+              <td>Base64-encoded audio data</td>
+            </tr>
+            <tr>
+              <td>Transcription</td>
+              <td>transcription.text, words?, segments?</td>
+              <td>Transcribed text with optional timestamps</td>
+            </tr>
+            <tr>
+              <td>Video</td>
+              <td>video.url, video.duration?</td>
+              <td>Generated video URL with async polling</td>
+            </tr>
           </tbody>
         </table>
 
@@ -239,6 +259,10 @@ <h3>Programmatically</h3>
 <span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi!"</span> });
 <span class="op">mock</span>.<span class="fn">onToolCall</span>(<span class="str">"get_weather"</span>, { <span class="prop">content</span>: <span class="str">"72F"</span> });
 <span class="op">mock</span>.<span class="fn">onEmbedding</span>(<span class="str">"my text"</span>, { <span class="prop">embedding</span>: [<span class="num">0.1</span>, <span class="num">0.2</span>] });
+<span class="op">mock</span>.<span class="fn">onImage</span>(<span class="str">"sunset"</span>, { <span class="prop">image</span>: { <span class="prop">url</span>: <span class="str">"https://example.com/sunset.png"</span> } });
+<span class="op">mock</span>.<span class="fn">onSpeech</span>(<span class="str">"hello"</span>, { <span class="prop">audio</span>: <span class="str">"SGVsbG8="</span> });
+<span class="op">mock</span>.<span class="fn">onTranscription</span>(<span class="str">"audio.mp3"</span>, { <span class="prop">transcription</span>: { <span class="prop">text</span>: <span class="str">"Hello"</span> } });
+<span class="op">mock</span>.<span class="fn">onVideo</span>(<span class="str">"cats"</span>, { <span class="prop">video</span>: { <span class="prop">url</span>: <span class="str">"https://example.com/cats.mp4"</span> } });
 <span class="op">mock</span>.<span class="fn">onJsonOutput</span>(<span class="str">"data"</span>, { <span class="prop">key</span>: <span class="str">"value"</span> });
 <span class="op">mock</span>.<span class="fn">onToolResult</span>(<span class="str">"call_123"</span>, { <span class="prop">content</span>: <span class="str">"Done"</span> });
 
diff --git a/docs/images/index.html b/docs/images/index.html
new file mode 100644
index 0000000..8aaf28e
--- /dev/null
+++ b/docs/images/index.html
@@ -0,0 +1,286 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Image Generation — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="../favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="../style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="/" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="/">Home</a></li>
+          <li><a href="/docs" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/aimock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Image Generation</h1>
+        <p class="lead">
+          The image generation endpoints support both OpenAI
+          <code>POST /v1/images/generations</code> and Gemini Imagen
+          <code>POST /v1beta/models/{model}:predict</code> formats. Return single or multiple images
+          as URLs or base64-encoded data.
+        </p>
+
+        <h2>Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Format</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v1/images/generations</td>
+              <td>JSON (OpenAI)</td>
+            </tr>
+            <tr>
+              <td>POST</td>
+              <td>/v1beta/models/{model}:predict</td>
+              <td>JSON (Gemini Imagen)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Unit Test: Single Image URL</h2>
+        <p>
+          Using the programmatic API with vitest, register a fixture and assert on the response.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">image-url.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+<span class="kw">import</span> { <span class="fn">describe</span>, <span class="fn">it</span>, <span class="fn">expect</span>, <span class="fn">beforeAll</span>, <span class="fn">afterAll</span> } <span class="kw">from</span> <span class="str">"vitest"</span>;
+
+<span class="kw">let</span> <span class="op">mock</span>: <span class="type">LLMock</span>;
+
+<span class="fn">beforeAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+});
+
+<span class="fn">afterAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">stop</span>();
+});
+
+<span class="fn">it</span>(<span class="str">"returns a single image URL"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">onImage</span>(<span class="str">"a sunset over mountains"</span>, {
+    <span class="prop">image</span>: { <span class="prop">url</span>: <span class="str">"https://example.com/sunset.png"</span> },
+  });
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/images/generations`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+    <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+      <span class="prop">model</span>: <span class="str">"dall-e-3"</span>,
+      <span class="prop">prompt</span>: <span class="str">"a sunset over mountains"</span>,
+      <span class="prop">n</span>: <span class="num">1</span>,
+      <span class="prop">size</span>: <span class="str">"1024x1024"</span>,
+    }),
+  });
+
+  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">data</span>[<span class="num">0</span>].<span class="prop">url</span>).<span class="fn">toBe</span>(<span class="str">"https://example.com/sunset.png"</span>);
+});</code></pre>
+        </div>
+
+        <h2>Unit Test: Multiple Images</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            image-multiple.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="fn">it</span>(<span class="str">"returns multiple images"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">onImage</span>(<span class="str">"cats"</span>, {
+    <span class="prop">images</span>: [
+      { <span class="prop">url</span>: <span class="str">"https://example.com/cat1.png"</span> },
+      { <span class="prop">url</span>: <span class="str">"https://example.com/cat2.png"</span> },
+    ],
+  });
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/images/generations`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+    <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+      <span class="prop">model</span>: <span class="str">"dall-e-3"</span>,
+      <span class="prop">prompt</span>: <span class="str">"cats playing"</span>,
+      <span class="prop">n</span>: <span class="num">2</span>,
+    }),
+  });
+
+  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">data</span>).<span class="fn">toHaveLength</span>(<span class="num">2</span>);
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">data</span>[<span class="num">0</span>].<span class="prop">url</span>).<span class="fn">toBe</span>(<span class="str">"https://example.com/cat1.png"</span>);
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">data</span>[<span class="num">1</span>].<span class="prop">url</span>).<span class="fn">toBe</span>(<span class="str">"https://example.com/cat2.png"</span>);
+});</code></pre>
+        </div>
+
+        <h2>Unit Test: Base64 Response</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">image-base64.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="fn">it</span>(<span class="str">"returns base64-encoded image"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">onImage</span>(<span class="str">"logo"</span>, {
+    <span class="prop">image</span>: { <span class="prop">b64_json</span>: <span class="str">"iVBORw0KGgoAAAANSUhEUg..."</span> },
+  });
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/images/generations`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+    <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+      <span class="prop">model</span>: <span class="str">"dall-e-3"</span>,
+      <span class="prop">prompt</span>: <span class="str">"a company logo"</span>,
+      <span class="prop">response_format</span>: <span class="str">"b64_json"</span>,
+    }),
+  });
+
+  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">data</span>[<span class="num">0</span>].<span class="prop">b64_json</span>).<span class="fn">toBeDefined</span>();
+});</code></pre>
+        </div>
+
+        <h2>Unit Test: Gemini Imagen Format</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">image-gemini.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="fn">it</span>(<span class="str">"handles Gemini Imagen predict endpoint"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">onImage</span>(<span class="str">"landscape"</span>, {
+    <span class="prop">image</span>: { <span class="prop">url</span>: <span class="str">"https://example.com/landscape.png"</span> },
+  });
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(
+    <span class="str">`${mock.url}/v1beta/models/imagen-3.0-generate-002:predict`</span>,
+    {
+      <span class="prop">method</span>: <span class="str">"POST"</span>,
+      <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+      <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+        <span class="prop">instances</span>: [{ <span class="prop">prompt</span>: <span class="str">"a beautiful landscape"</span> }],
+        <span class="prop">parameters</span>: { <span class="prop">sampleCount</span>: <span class="num">1</span> },
+      }),
+    }
+  );
+
+  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">predictions</span>).<span class="fn">toBeDefined</span>();
+});</code></pre>
+        </div>
+
+        <h2>JSON Fixture</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/images.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"sunset"</span> },
+      <span class="key">"response"</span>: {
+        <span class="key">"image"</span>: { <span class="key">"url"</span>: <span class="str">"https://example.com/sunset.png"</span> }
+      }
+    },
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"cats"</span> },
+      <span class="key">"response"</span>: {
+        <span class="key">"images"</span>: [
+          { <span class="key">"url"</span>: <span class="str">"https://example.com/cat1.png"</span> },
+          { <span class="key">"url"</span>: <span class="str">"https://example.com/cat2.png"</span> }
+        ]
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Response Format</h2>
+        <p>Matches the OpenAI <code>/v1/images/generations</code> response format:</p>
+        <ul>
+          <li><code>created</code> &mdash; Unix timestamp</li>
+          <li>
+            <code>data[].url</code> &mdash; URL of the generated image (when using URL format)
+          </li>
+          <li>
+            <code>data[].b64_json</code> &mdash; base64-encoded image data (when using b64_json
+            format)
+          </li>
+          <li>
+            <code>data[].revised_prompt</code> &mdash; the prompt as revised by the model (optional)
+          </li>
+        </ul>
+
+        <div class="info-box">
+          <p>
+            Image fixtures use <code>match.userMessage</code> which maps to the
+            <code>prompt</code> field in the request body. The <code>prompt</code> matcher checks
+            for substring matches.
+          </p>
+        </div>
+
+        <h2>Record &amp; Replay</h2>
+        <p>
+          When no fixture matches an incoming request, aimock can proxy it to the real API and
+          record the response as a fixture for future replays. Enable recording with the
+          <code>--record</code> flag or via <code>RecordConfig</code> in the programmatic API.
+          Recorded image fixtures capture the <code>url</code> or <code>b64_json</code> from the
+          provider response and save them to disk, so subsequent runs replay instantly without
+          hitting the real API.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">CLI <span class="lang-tag">sh</span></div>
+          <pre><code>npx aimock --record --provider-openai https://api.openai.com</code></pre>
+        </div>
+      </main>
+      <aside class="page-toc" id="page-toc"></aside>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/aimock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="../sidebar.js"></script>
+    <script src="../cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/index.html b/docs/index.html
index 75d3c5f..406061d 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -1405,8 +1405,16 @@ <h3>Chaos Testing</h3>
             </p>
           </div>
 
-          <div></div>
           <div class="feature-card fade-in" style="transition-delay: 0.48s">
+            <div class="feature-icon">&#127912;</div>
+            <h3>Multimedia APIs</h3>
+            <p>
+              Image generation, text-to-speech, audio transcription, and video generation &mdash;
+              mock every multimedia endpoint with fixtures.
+            </p>
+          </div>
+
+          <div class="feature-card fade-in" style="transition-delay: 0.56s">
             <div class="feature-icon">&#128202;</div>
             <h3>Drift Detection</h3>
             <p>Fixtures stay accurate as providers evolve. Fixes ship before your tests break.</p>
@@ -1539,6 +1547,38 @@ <h2 class="fade-in">How aimock compares</h2>
                 <td><span class="no">&#10007;</span></td>
                 <td><span class="yes">&#10003;</span></td>
               </tr>
+              <tr>
+                <td>Image generation</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+              </tr>
+              <tr>
+                <td>Text-to-Speech</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+              </tr>
+              <tr>
+                <td>Audio transcription</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+              </tr>
+              <tr>
+                <td>Video generation</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+              </tr>
               <tr>
                 <td>Structured output / JSON mode</td>
                 <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
diff --git a/docs/sidebar.js b/docs/sidebar.js
index 5025839..3159755 100644
--- a/docs/sidebar.js
+++ b/docs/sidebar.js
@@ -26,6 +26,15 @@
         { label: "Compatible Providers", href: "/compatible-providers" },
       ],
     },
+    {
+      title: "Multimedia",
+      links: [
+        { label: "Image Generation", href: "/images" },
+        { label: "Text-to-Speech", href: "/speech" },
+        { label: "Audio Transcription", href: "/transcription" },
+        { label: "Video Generation", href: "/video" },
+      ],
+    },
     {
       title: "LLM Features",
       links: [
diff --git a/docs/speech/index.html b/docs/speech/index.html
new file mode 100644
index 0000000..10d389c
--- /dev/null
+++ b/docs/speech/index.html
@@ -0,0 +1,225 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Text-to-Speech — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="../favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="../style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="/" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="/">Home</a></li>
+          <li><a href="/docs" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/aimock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Text-to-Speech</h1>
+        <p class="lead">
+          The <code>POST /v1/audio/speech</code> endpoint returns audio data from text input.
+          Supports multiple output formats including mp3, opus, aac, flac, wav, and pcm.
+        </p>
+
+        <h2>Endpoint</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Format</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v1/audio/speech</td>
+              <td>JSON request, binary/base64 response</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Unit Test: Basic Speech</h2>
+        <p>
+          Using the programmatic API with vitest, register a fixture and assert on the response.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">speech-basic.test.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+<span class="kw">import</span> { <span class="fn">describe</span>, <span class="fn">it</span>, <span class="fn">expect</span>, <span class="fn">beforeAll</span>, <span class="fn">afterAll</span> } <span class="kw">from</span> <span class="str">"vitest"</span>;
+
+<span class="kw">let</span> <span class="op">mock</span>: <span class="type">LLMock</span>;
+
+<span class="fn">beforeAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+});
+
+<span class="fn">afterAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">stop</span>();
+});
+
+<span class="fn">it</span>(<span class="str">"returns audio for text input"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">onSpeech</span>(<span class="str">"Hello world"</span>, { <span class="prop">audio</span>: <span class="str">"SGVsbG8gd29ybGQ="</span> });
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/audio/speech`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+    <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+      <span class="prop">model</span>: <span class="str">"tts-1"</span>,
+      <span class="prop">input</span>: <span class="str">"Hello world"</span>,
+      <span class="prop">voice</span>: <span class="str">"alloy"</span>,
+    }),
+  });
+
+  <span class="fn">expect</span>(<span class="op">res</span>.<span class="prop">ok</span>).<span class="fn">toBe</span>(<span class="kw">true</span>);
+  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">audio</span>).<span class="fn">toBe</span>(<span class="str">"SGVsbG8gd29ybGQ="</span>);
+});</code></pre>
+        </div>
+
+        <h2>Format Options</h2>
+        <p>
+          The <code>response_format</code> field in the request controls the audio output format.
+          Supported values:
+        </p>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Format</th>
+              <th>Content-Type</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>mp3</td>
+              <td>audio/mpeg</td>
+              <td>Default format, widely supported</td>
+            </tr>
+            <tr>
+              <td>opus</td>
+              <td>audio/opus</td>
+              <td>Low latency, good for streaming</td>
+            </tr>
+            <tr>
+              <td>aac</td>
+              <td>audio/aac</td>
+              <td>Preferred for mobile devices</td>
+            </tr>
+            <tr>
+              <td>flac</td>
+              <td>audio/flac</td>
+              <td>Lossless compression</td>
+            </tr>
+            <tr>
+              <td>wav</td>
+              <td>audio/wav</td>
+              <td>Uncompressed, no decoding overhead</td>
+            </tr>
+            <tr>
+              <td>pcm</td>
+              <td>audio/pcm</td>
+              <td>Raw samples, 24kHz 16-bit signed little-endian</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>JSON Fixture</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/speech.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"Hello world"</span> },
+      <span class="key">"response"</span>: {
+        <span class="key">"audio"</span>: <span class="str">"SGVsbG8gd29ybGQ="</span>
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Response Format</h2>
+        <p>Returns audio data matching the requested format:</p>
+        <ul>
+          <li><code>audio</code> &mdash; base64-encoded audio data in the fixture response</li>
+        </ul>
+
+        <div class="info-box">
+          <p>
+            Speech fixtures use <code>match.userMessage</code> which maps to the
+            <code>input</code> field in the request body. The matcher checks for substring matches
+            on the text to be spoken.
+          </p>
+        </div>
+
+        <h2>Record &amp; Replay</h2>
+        <p>
+          When no fixture matches an incoming request, aimock can proxy it to the real API and
+          record the response as a fixture for future replays. Enable recording with the
+          <code>--record</code> flag or via <code>RecordConfig</code> in the programmatic API.
+          Binary audio from the provider is base64-encoded in the recorded fixture, with the format
+          derived from the response <code>Content-Type</code> header (e.g.
+          <code>audio/mpeg</code> for mp3). Subsequent requests replay the cached audio without
+          hitting the real API.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">CLI <span class="lang-tag">sh</span></div>
+          <pre><code>npx aimock --record --provider-openai https://api.openai.com</code></pre>
+        </div>
+      </main>
+      <aside class="page-toc" id="page-toc"></aside>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/aimock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="../sidebar.js"></script>
+    <script src="../cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/transcription/index.html b/docs/transcription/index.html
new file mode 100644
index 0000000..0eb0653
--- /dev/null
+++ b/docs/transcription/index.html
@@ -0,0 +1,242 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Audio Transcription — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="../favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="../style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="/" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="/">Home</a></li>
+          <li><a href="/docs" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/aimock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Audio Transcription</h1>
+        <p class="lead">
+          The <code>POST /v1/audio/transcriptions</code> endpoint accepts multipart form-data audio
+          uploads and returns transcribed text. Supports both simple and verbose response formats
+          with word-level timestamps and segments.
+        </p>
+
+        <h2>Endpoint</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Format</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v1/audio/transcriptions</td>
+              <td>Multipart form-data request, JSON response</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Unit Test: Simple Transcription</h2>
+        <p>
+          Using the programmatic API with vitest, register a fixture and assert on the response.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            transcription-simple.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+<span class="kw">import</span> { <span class="fn">describe</span>, <span class="fn">it</span>, <span class="fn">expect</span>, <span class="fn">beforeAll</span>, <span class="fn">afterAll</span> } <span class="kw">from</span> <span class="str">"vitest"</span>;
+
+<span class="kw">let</span> <span class="op">mock</span>: <span class="type">LLMock</span>;
+
+<span class="fn">beforeAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+});
+
+<span class="fn">afterAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">stop</span>();
+});
+
+<span class="fn">it</span>(<span class="str">"returns simple transcription text"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">onTranscription</span>(<span class="str">"audio.mp3"</span>, {
+    <span class="prop">transcription</span>: { <span class="prop">text</span>: <span class="str">"Hello, how are you today?"</span> },
+  });
+
+  <span class="kw">const</span> <span class="op">form</span> = <span class="kw">new</span> <span class="type">FormData</span>();
+  <span class="op">form</span>.<span class="fn">append</span>(<span class="str">"file"</span>, <span class="kw">new</span> <span class="type">Blob</span>([<span class="str">"fake-audio"</span>]), <span class="str">"audio.mp3"</span>);
+  <span class="op">form</span>.<span class="fn">append</span>(<span class="str">"model"</span>, <span class="str">"whisper-1"</span>);
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/audio/transcriptions`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">body</span>: <span class="op">form</span>,
+  });
+
+  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">text</span>).<span class="fn">toBe</span>(<span class="str">"Hello, how are you today?"</span>);
+});</code></pre>
+        </div>
+
+        <h2>Unit Test: Verbose Response with Segments</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            transcription-verbose.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="fn">it</span>(<span class="str">"returns verbose transcription with words and segments"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">onTranscription</span>(<span class="str">"meeting.wav"</span>, {
+    <span class="prop">transcription</span>: {
+      <span class="prop">text</span>: <span class="str">"Welcome to the meeting."</span>,
+      <span class="prop">words</span>: [
+        { <span class="prop">word</span>: <span class="str">"Welcome"</span>, <span class="prop">start</span>: <span class="num">0.0</span>, <span class="prop">end</span>: <span class="num">0.5</span> },
+        { <span class="prop">word</span>: <span class="str">"to"</span>, <span class="prop">start</span>: <span class="num">0.5</span>, <span class="prop">end</span>: <span class="num">0.7</span> },
+        { <span class="prop">word</span>: <span class="str">"the"</span>, <span class="prop">start</span>: <span class="num">0.7</span>, <span class="prop">end</span>: <span class="num">0.9</span> },
+        { <span class="prop">word</span>: <span class="str">"meeting"</span>, <span class="prop">start</span>: <span class="num">0.9</span>, <span class="prop">end</span>: <span class="num">1.4</span> },
+      ],
+      <span class="prop">segments</span>: [
+        { <span class="prop">id</span>: <span class="num">0</span>, <span class="prop">text</span>: <span class="str">"Welcome to the meeting."</span>, <span class="prop">start</span>: <span class="num">0.0</span>, <span class="prop">end</span>: <span class="num">1.4</span> },
+      ],
+    },
+  });
+
+  <span class="kw">const</span> <span class="op">form</span> = <span class="kw">new</span> <span class="type">FormData</span>();
+  <span class="op">form</span>.<span class="fn">append</span>(<span class="str">"file"</span>, <span class="kw">new</span> <span class="type">Blob</span>([<span class="str">"fake-audio"</span>]), <span class="str">"meeting.wav"</span>);
+  <span class="op">form</span>.<span class="fn">append</span>(<span class="str">"model"</span>, <span class="str">"whisper-1"</span>);
+  <span class="op">form</span>.<span class="fn">append</span>(<span class="str">"response_format"</span>, <span class="str">"verbose_json"</span>);
+  <span class="op">form</span>.<span class="fn">append</span>(<span class="str">"timestamp_granularities[]"</span>, <span class="str">"word"</span>);
+  <span class="op">form</span>.<span class="fn">append</span>(<span class="str">"timestamp_granularities[]"</span>, <span class="str">"segment"</span>);
+
+  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/audio/transcriptions`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">body</span>: <span class="op">form</span>,
+  });
+
+  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">text</span>).<span class="fn">toBe</span>(<span class="str">"Welcome to the meeting."</span>);
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">words</span>).<span class="fn">toHaveLength</span>(<span class="num">4</span>);
+  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">segments</span>).<span class="fn">toHaveLength</span>(<span class="num">1</span>);
+});</code></pre>
+        </div>
+
+        <h2>JSON Fixture</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/transcription.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"audio.mp3"</span> },
+      <span class="key">"response"</span>: {
+        <span class="key">"transcription"</span>: {
+          <span class="key">"text"</span>: <span class="str">"Hello, how are you today?"</span>
+        }
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Response Format</h2>
+        <p>Matches the OpenAI <code>/v1/audio/transcriptions</code> response format:</p>
+
+        <h3>Simple (default)</h3>
+        <ul>
+          <li><code>text</code> &mdash; the transcribed text</li>
+        </ul>
+
+        <h3>Verbose (response_format: "verbose_json")</h3>
+        <ul>
+          <li><code>text</code> &mdash; the full transcribed text</li>
+          <li><code>task</code> &mdash; <code>"transcribe"</code></li>
+          <li><code>language</code> &mdash; detected language code</li>
+          <li><code>duration</code> &mdash; audio duration in seconds</li>
+          <li>
+            <code>words[]</code> &mdash; word-level timestamps with <code>word</code>,
+            <code>start</code>, <code>end</code>
+          </li>
+          <li>
+            <code>segments[]</code> &mdash; segment-level data with <code>id</code>,
+            <code>text</code>, <code>start</code>, <code>end</code>
+          </li>
+        </ul>
+
+        <div class="info-box">
+          <p>
+            Transcription requests use multipart form-data. The fixture
+            <code>match.userMessage</code> maps to the uploaded filename. This allows matching
+            different fixtures based on which audio file is submitted.
+          </p>
+        </div>
+
+        <h2>Record &amp; Replay</h2>
+        <p>
+          When no fixture matches an incoming request, aimock can proxy it to the real API and
+          record the response as a fixture for future replays. Enable recording with the
+          <code>--record</code> flag or via <code>RecordConfig</code> in the programmatic API.
+          Recorded transcription fixtures preserve the full response including <code>text</code>,
+          <code>language</code>, <code>duration</code>, <code>words</code>, and
+          <code>segments</code>, so verbose-mode responses replay with complete word-level
+          timestamps intact.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">CLI <span class="lang-tag">sh</span></div>
+          <pre><code>npx aimock --record --provider-openai https://api.openai.com</code></pre>
+        </div>
+      </main>
+      <aside class="page-toc" id="page-toc"></aside>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/aimock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="../sidebar.js"></script>
+    <script src="../cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/video/index.html b/docs/video/index.html
new file mode 100644
index 0000000..c6f9fb6
--- /dev/null
+++ b/docs/video/index.html
@@ -0,0 +1,221 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Video Generation — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="../favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="../style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="/" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="/">Home</a></li>
+          <li><a href="/docs" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/aimock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Video Generation</h1>
+        <p class="lead">
+          The video generation endpoints support async creation via
+          <code>POST /v1/videos</code> and status polling via <code>GET /v1/videos/{id}</code>. Mock
+          the full async polling lifecycle with deterministic responses.
+        </p>
+
+        <h2>Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Format</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>POST</td>
+              <td>/v1/videos</td>
+              <td>JSON (create video job)</td>
+            </tr>
+            <tr>
+              <td>GET</td>
+              <td>/v1/videos/{id}</td>
+              <td>JSON (poll status)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Async Polling Pattern</h2>
+        <p>
+          Video generation is asynchronous. The <code>POST</code> endpoint returns a job ID, and the
+          <code>GET</code> endpoint returns the current status. aimock simulates this by returning
+          <code>"processing"</code> on the first poll and <code>"completed"</code> with the video
+          URL on subsequent polls.
+        </p>
+
+        <h2>Unit Test: Create and Poll</h2>
+        <p>
+          Using the programmatic API with vitest, register a fixture and test the full async flow.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            video-polling.test.ts <span class="lang-tag">ts</span>
+          </div>
+          <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+<span class="kw">import</span> { <span class="fn">describe</span>, <span class="fn">it</span>, <span class="fn">expect</span>, <span class="fn">beforeAll</span>, <span class="fn">afterAll</span> } <span class="kw">from</span> <span class="str">"vitest"</span>;
+
+<span class="kw">let</span> <span class="op">mock</span>: <span class="type">LLMock</span>;
+
+<span class="fn">beforeAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
+});
+
+<span class="fn">afterAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">stop</span>();
+});
+
+<span class="fn">it</span>(<span class="str">"creates a video job and polls for completion"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mock</span>.<span class="fn">onVideo</span>(<span class="str">"a cat playing piano"</span>, {
+    <span class="prop">video</span>: { <span class="prop">url</span>: <span class="str">"https://example.com/cat-piano.mp4"</span>, <span class="prop">duration</span>: <span class="num">10</span> },
+  });
+
+  <span class="cm">// Step 1: Create the video job</span>
+  <span class="kw">const</span> <span class="op">createRes</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/videos`</span>, {
+    <span class="prop">method</span>: <span class="str">"POST"</span>,
+    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
+    <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
+      <span class="prop">model</span>: <span class="str">"sora"</span>,
+      <span class="prop">prompt</span>: <span class="str">"a cat playing piano"</span>,
+      <span class="prop">duration</span>: <span class="num">10</span>,
+    }),
+  });
+
+  <span class="kw">const</span> <span class="op">createBody</span> = <span class="kw">await</span> <span class="op">createRes</span>.<span class="fn">json</span>();
+  <span class="fn">expect</span>(<span class="op">createBody</span>.<span class="prop">id</span>).<span class="fn">toBeDefined</span>();
+  <span class="fn">expect</span>(<span class="op">createBody</span>.<span class="prop">status</span>).<span class="fn">toBe</span>(<span class="str">"processing"</span>);
+
+  <span class="cm">// Step 2: Poll for completion</span>
+  <span class="kw">const</span> <span class="op">pollRes</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/videos/${createBody.id}`</span>);
+  <span class="kw">const</span> <span class="op">pollBody</span> = <span class="kw">await</span> <span class="op">pollRes</span>.<span class="fn">json</span>();
+
+  <span class="fn">expect</span>(<span class="op">pollBody</span>.<span class="prop">status</span>).<span class="fn">toBe</span>(<span class="str">"completed"</span>);
+  <span class="fn">expect</span>(<span class="op">pollBody</span>.<span class="prop">video</span>.<span class="prop">url</span>).<span class="fn">toBe</span>(<span class="str">"https://example.com/cat-piano.mp4"</span>);
+  <span class="fn">expect</span>(<span class="op">pollBody</span>.<span class="prop">video</span>.<span class="prop">duration</span>).<span class="fn">toBe</span>(<span class="num">10</span>);
+});</code></pre>
+        </div>
+
+        <h2>JSON Fixture</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/video.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="key">"fixtures"</span>: [
+    {
+      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"cat playing piano"</span> },
+      <span class="key">"response"</span>: {
+        <span class="key">"video"</span>: {
+          <span class="key">"url"</span>: <span class="str">"https://example.com/cat-piano.mp4"</span>,
+          <span class="key">"duration"</span>: <span class="num">10</span>
+        }
+      }
+    }
+  ]
+}</code></pre>
+        </div>
+
+        <h2>Response Format</h2>
+
+        <h3>Create (POST /v1/videos)</h3>
+        <ul>
+          <li><code>id</code> &mdash; unique job identifier</li>
+          <li><code>status</code> &mdash; <code>"processing"</code> initially</li>
+          <li><code>created</code> &mdash; Unix timestamp</li>
+        </ul>
+
+        <h3>Poll (GET /v1/videos/{id})</h3>
+        <ul>
+          <li><code>id</code> &mdash; the job identifier</li>
+          <li>
+            <code>status</code> &mdash; <code>"processing"</code> or
+            <code>"completed"</code>
+          </li>
+          <li><code>video.url</code> &mdash; URL of the generated video (when completed)</li>
+          <li><code>video.duration</code> &mdash; video duration in seconds</li>
+        </ul>
+
+        <div class="info-box">
+          <p>
+            Video fixtures use <code>match.userMessage</code> which maps to the
+            <code>prompt</code> field in the creation request. The async polling pattern is handled
+            automatically by aimock.
+          </p>
+        </div>
+
+        <h2>Record &amp; Replay</h2>
+        <p>
+          When no fixture matches an incoming request, aimock can proxy it to the real API and
+          record the response as a fixture for future replays. Enable recording with the
+          <code>--record</code> flag or via <code>RecordConfig</code> in the programmatic API.
+          Completed videos are recorded with their final URL; in-progress responses are also saved
+          so that the async polling lifecycle can be simulated on replay without hitting the real
+          API.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">CLI <span class="lang-tag">sh</span></div>
+          <pre><code>npx aimock --record --provider-openai https://api.openai.com</code></pre>
+        </div>
+      </main>
+      <aside class="page-toc" id="page-toc"></aside>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/aimock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="../sidebar.js"></script>
+    <script src="../cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/package.json b/package.json
index 76331bb..6ff5c19 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@copilotkit/aimock",
-  "version": "1.11.0",
+  "version": "1.12.0",
   "description": "Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, AG-UI event streams, vector databases, search, and more. Zero dependencies.",
   "license": "MIT",
   "repository": {
diff --git a/scripts/update-competitive-matrix.ts b/scripts/update-competitive-matrix.ts
index 2c20fb4..43e852e 100644
--- a/scripts/update-competitive-matrix.ts
+++ b/scripts/update-competitive-matrix.ts
@@ -72,6 +72,22 @@ const FEATURE_RULES: FeatureRule[] = [
     rowLabel: "Embeddings API",
     keywords: ["embedding", "/v1/embeddings", "embed"],
   },
+  {
+    rowLabel: "Image generation",
+    keywords: ["image", "dall-e", "dalle", "/v1/images", "image generation", "imagen"],
+  },
+  {
+    rowLabel: "Text-to-Speech",
+    keywords: ["tts", "text-to-speech", "speech", "/v1/audio/speech", "audio generation"],
+  },
+  {
+    rowLabel: "Audio transcription",
+    keywords: ["transcription", "whisper", "/v1/audio/transcriptions", "speech-to-text", "stt"],
+  },
+  {
+    rowLabel: "Video generation",
+    keywords: ["video", "sora", "/v1/videos", "video generation"],
+  },
   {
     rowLabel: "Structured output / JSON mode",
     keywords: ["json_object", "json_schema", "structured output", "response_format"],
diff --git a/src/__tests__/multimedia-record.test.ts b/src/__tests__/multimedia-record.test.ts
new file mode 100644
index 0000000..9f28970
--- /dev/null
+++ b/src/__tests__/multimedia-record.test.ts
@@ -0,0 +1,508 @@
+import { describe, it, expect } from "vitest";
+
+/**
+ * Unit tests for multimedia record/replay support in the recorder module.
+ *
+ * These test the internal detection logic by calling buildFixtureResponse
+ * and buildFixtureMatch indirectly through proxyAndRecord integration,
+ * as well as directly importing where possible.
+ *
+ * Since buildFixtureResponse and buildFixtureMatch are not exported,
+ * we test them via a lightweight upstream mock that returns the expected
+ * shapes, verifying the recorder produces correct fixture responses.
+ */
+
+import * as http from "node:http";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { proxyAndRecord } from "../recorder.js";
+import type { Fixture, RecordConfig, ChatCompletionRequest } from "../types.js";
+import { Logger } from "../logger.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function createUpstream(
+  handler: (req: http.IncomingMessage, res: http.ServerResponse) => void,
+): Promise<{ server: http.Server; url: string }> {
+  return new Promise((resolve) => {
+    const server = http.createServer(handler);
+    server.listen(0, "127.0.0.1", () => {
+      const addr = server.address() as { port: number };
+      resolve({ server, url: `http://127.0.0.1:${addr.port}` });
+    });
+  });
+}
+
+function closeServer(server: http.Server): Promise<void> {
+  return new Promise((resolve) => server.close(() => resolve()));
+}
+
+function createMockReqRes(
+  urlPath: string,
+  headers: Record<string, string> = {},
+): { req: http.IncomingMessage; res: http.ServerResponse; getResponse: () => Promise<string> } {
+  const chunks: Buffer[] = [];
+  let statusCode = 200;
+
+  const req = {
+    method: "POST",
+    url: urlPath,
+    headers: { "content-type": "application/json", ...headers },
+  } as unknown as http.IncomingMessage;
+
+  const res = {
+    statusCode,
+    // eslint-disable-next-line @typescript-eslint/no-unused-vars
+    writeHead(status: number, hdrs?: Record<string, string>) {
+      statusCode = status;
+      res.statusCode = status;
+    },
+    end(data?: string | Buffer) {
+      if (data) chunks.push(Buffer.isBuffer(data) ? data : Buffer.from(data));
+    },
+    setHeader() {},
+  } as unknown as http.ServerResponse;
+
+  return {
+    req,
+    res,
+    getResponse: async () => Buffer.concat(chunks).toString(),
+  };
+}
+
+function makeTmpDir(): string {
+  return fs.mkdtempSync(path.join(os.tmpdir(), "aimock-mm-record-"));
+}
+
+// ---------------------------------------------------------------------------
+// Tests: buildFixtureResponse detection via proxyAndRecord
+// ---------------------------------------------------------------------------
+
+describe("multimedia record: image response detection", () => {
+  it("detects OpenAI image generation response and saves image fixture", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          created: 1234567890,
+          data: [{ url: "https://example.com/img.png", revised_prompt: "a pretty sunset" }],
+        }),
+      );
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "dall-e-3",
+        messages: [{ role: "user", content: "sunset" }],
+        _endpointType: "image",
+      };
+
+      const { req, res } = createMockReqRes("/v1/images/generations");
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        request,
+        "openai",
+        "/v1/images/generations",
+        fixtures,
+        { record, logger },
+      );
+
+      expect(proxied).toBe(true);
+      expect(fixtures).toHaveLength(1);
+      const fixture = fixtures[0];
+      expect(fixture.match.endpoint).toBe("image");
+      expect(fixture.match.userMessage).toBe("sunset");
+
+      const response = fixture.response as { image?: { url?: string; revisedPrompt?: string } };
+      expect(response.image).toBeDefined();
+      expect(response.image!.url).toBe("https://example.com/img.png");
+      expect(response.image!.revisedPrompt).toBe("a pretty sunset");
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+
+  it("detects multi-image response", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          created: 1234567890,
+          data: [{ url: "https://example.com/1.png" }, { url: "https://example.com/2.png" }],
+        }),
+      );
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "dall-e-3",
+        messages: [{ role: "user", content: "cats" }],
+        _endpointType: "image",
+      };
+
+      const { req, res } = createMockReqRes("/v1/images/generations");
+      await proxyAndRecord(req, res, request, "openai", "/v1/images/generations", fixtures, {
+        record,
+        logger,
+      });
+
+      const response = fixtures[0].response as { images?: Array<{ url?: string }> };
+      expect(response.images).toHaveLength(2);
+      expect(response.images![0].url).toBe("https://example.com/1.png");
+      expect(response.images![1].url).toBe("https://example.com/2.png");
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+
+  it("detects Gemini Imagen response", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          predictions: [{ bytesBase64Encoded: "iVBORw0KGgo=", mimeType: "image/png" }],
+        }),
+      );
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "imagen",
+        messages: [{ role: "user", content: "dog" }],
+        _endpointType: "image",
+      };
+
+      const { req, res } = createMockReqRes("/v1beta/models/imagen:predict");
+      await proxyAndRecord(req, res, request, "openai", "/v1beta/models/imagen:predict", fixtures, {
+        record,
+        logger,
+      });
+
+      const response = fixtures[0].response as { image?: { b64Json?: string } };
+      expect(response.image).toBeDefined();
+      expect(response.image!.b64Json).toBe("iVBORw0KGgo=");
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("multimedia record: transcription response detection", () => {
+  it("detects OpenAI transcription response", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          task: "transcribe",
+          language: "english",
+          duration: 5.2,
+          text: "Hello world",
+        }),
+      );
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "whisper-1",
+        messages: [],
+        _endpointType: "transcription",
+      };
+
+      const { req, res } = createMockReqRes("/v1/audio/transcriptions");
+      await proxyAndRecord(req, res, request, "openai", "/v1/audio/transcriptions", fixtures, {
+        record,
+        logger,
+      });
+
+      expect(fixtures).toHaveLength(1);
+      const response = fixtures[0].response as {
+        transcription?: { text: string; language?: string; duration?: number };
+      };
+      expect(response.transcription).toBeDefined();
+      expect(response.transcription!.text).toBe("Hello world");
+      expect(response.transcription!.language).toBe("english");
+      expect(response.transcription!.duration).toBe(5.2);
+      expect(fixtures[0].match.endpoint).toBe("transcription");
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+
+  it("detects transcription with words and segments", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          task: "transcribe",
+          language: "english",
+          duration: 2.0,
+          text: "Hi",
+          words: [{ word: "Hi", start: 0, end: 0.5 }],
+          segments: [{ id: 0, text: "Hi", start: 0, end: 2.0 }],
+        }),
+      );
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "whisper-1",
+        messages: [],
+        _endpointType: "transcription",
+      };
+
+      const { req, res } = createMockReqRes("/v1/audio/transcriptions");
+      await proxyAndRecord(req, res, request, "openai", "/v1/audio/transcriptions", fixtures, {
+        record,
+        logger,
+      });
+
+      const response = fixtures[0].response as {
+        transcription?: { text: string; words?: unknown[]; segments?: unknown[] };
+      };
+      expect(response.transcription!.words).toHaveLength(1);
+      expect(response.transcription!.segments).toHaveLength(1);
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("multimedia record: video response detection", () => {
+  it("detects completed video response", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          id: "vid_abc",
+          status: "completed",
+          url: "https://example.com/video.mp4",
+        }),
+      );
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "sora-2",
+        messages: [{ role: "user", content: "dancing cat" }],
+        _endpointType: "video",
+      };
+
+      const { req, res } = createMockReqRes("/v1/videos");
+      await proxyAndRecord(req, res, request, "openai", "/v1/videos", fixtures, { record, logger });
+
+      expect(fixtures).toHaveLength(1);
+      const response = fixtures[0].response as {
+        video?: { id: string; status: string; url?: string };
+      };
+      expect(response.video).toBeDefined();
+      expect(response.video!.id).toBe("vid_abc");
+      expect(response.video!.status).toBe("completed");
+      expect(response.video!.url).toBe("https://example.com/video.mp4");
+      expect(fixtures[0].match.endpoint).toBe("video");
+      expect(fixtures[0].match.userMessage).toBe("dancing cat");
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+
+  it("detects in-progress video response", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ id: "vid_456", status: "in_progress" }));
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "sora-2",
+        messages: [{ role: "user", content: "slow motion" }],
+        _endpointType: "video",
+      };
+
+      const { req, res } = createMockReqRes("/v1/videos");
+      await proxyAndRecord(req, res, request, "openai", "/v1/videos", fixtures, { record, logger });
+
+      const response = fixtures[0].response as {
+        video?: { id: string; status: string };
+      };
+      expect(response.video!.id).toBe("vid_456");
+      expect(response.video!.status).toBe("processing");
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("multimedia record: TTS audio response detection", () => {
+  it("detects binary audio response and saves as base64", async () => {
+    const fixturePath = makeTmpDir();
+    const audioBytes = Buffer.from("fake-audio-content");
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "audio/mpeg" });
+      res.end(audioBytes);
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "tts-1",
+        messages: [{ role: "user", content: "hello world" }],
+        _endpointType: "speech",
+      };
+
+      const { req, res } = createMockReqRes("/v1/audio/speech");
+      await proxyAndRecord(req, res, request, "openai", "/v1/audio/speech", fixtures, {
+        record,
+        logger,
+      });
+
+      expect(fixtures).toHaveLength(1);
+      const response = fixtures[0].response as { audio?: string };
+      expect(response.audio).toBe(audioBytes.toString("base64"));
+      expect(fixtures[0].match.endpoint).toBe("speech");
+      expect(fixtures[0].match.userMessage).toBe("hello world");
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("multimedia record: buildFixtureMatch endpoint inclusion", () => {
+  it("includes endpoint for image requests", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ created: 1, data: [{ url: "x.png" }] }));
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "dall-e-3",
+        messages: [{ role: "user", content: "test" }],
+        _endpointType: "image",
+      };
+
+      const { req, res } = createMockReqRes("/v1/images/generations");
+      await proxyAndRecord(req, res, request, "openai", "/v1/images/generations", fixtures, {
+        record,
+        logger,
+      });
+
+      expect(fixtures[0].match.endpoint).toBe("image");
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+
+  it("does not include endpoint for chat requests", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          choices: [{ message: { content: "hi", role: "assistant" }, finish_reason: "stop" }],
+        }),
+      );
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "gpt-4o",
+        messages: [{ role: "user", content: "hello" }],
+        _endpointType: "chat",
+      };
+
+      const { req, res } = createMockReqRes("/v1/chat/completions");
+      await proxyAndRecord(req, res, request, "openai", "/v1/chat/completions", fixtures, {
+        record,
+        logger,
+      });
+
+      expect(fixtures[0].match.endpoint).toBeUndefined();
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+
+  it("does not include endpoint when _endpointType is absent", async () => {
+    const fixturePath = makeTmpDir();
+    const { server, url } = await createUpstream((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          choices: [{ message: { content: "hi", role: "assistant" }, finish_reason: "stop" }],
+        }),
+      );
+    });
+
+    try {
+      const fixtures: Fixture[] = [];
+      const record: RecordConfig = { providers: { openai: url }, fixturePath };
+      const logger = new Logger("silent");
+      const request: ChatCompletionRequest = {
+        model: "gpt-4o",
+        messages: [{ role: "user", content: "hello" }],
+      };
+
+      const { req, res } = createMockReqRes("/v1/chat/completions");
+      await proxyAndRecord(req, res, request, "openai", "/v1/chat/completions", fixtures, {
+        record,
+        logger,
+      });
+
+      expect(fixtures[0].match.endpoint).toBeUndefined();
+    } finally {
+      await closeServer(server);
+      fs.rmSync(fixturePath, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/src/__tests__/multimedia-types.test.ts b/src/__tests__/multimedia-types.test.ts
new file mode 100644
index 0000000..1217ba2
--- /dev/null
+++ b/src/__tests__/multimedia-types.test.ts
@@ -0,0 +1,130 @@
+import { describe, test, expect } from "vitest";
+import {
+  isImageResponse,
+  isAudioResponse,
+  isTranscriptionResponse,
+  isVideoResponse,
+} from "../helpers.js";
+import { matchFixture } from "../router.js";
+import type { Fixture, ChatCompletionRequest, FixtureResponse } from "../types.js";
+
+describe("multimedia type guards", () => {
+  test("isImageResponse detects single image", () => {
+    const r: FixtureResponse = { image: { url: "https://example.com/img.png" } };
+    expect(isImageResponse(r)).toBe(true);
+  });
+
+  test("isImageResponse detects multiple images", () => {
+    const r: FixtureResponse = {
+      images: [{ url: "https://example.com/1.png" }, { url: "https://example.com/2.png" }],
+    };
+    expect(isImageResponse(r)).toBe(true);
+  });
+
+  test("isImageResponse rejects text response", () => {
+    const r: FixtureResponse = { content: "hello" };
+    expect(isImageResponse(r)).toBe(false);
+  });
+
+  test("isAudioResponse detects audio", () => {
+    const r: FixtureResponse = { audio: "AAAA", format: "mp3" };
+    expect(isAudioResponse(r)).toBe(true);
+  });
+
+  test("isAudioResponse rejects text response", () => {
+    const r: FixtureResponse = { content: "hello" };
+    expect(isAudioResponse(r)).toBe(false);
+  });
+
+  test("isTranscriptionResponse detects transcription", () => {
+    const r: FixtureResponse = { transcription: { text: "hello" } };
+    expect(isTranscriptionResponse(r)).toBe(true);
+  });
+
+  test("isTranscriptionResponse rejects text response", () => {
+    const r: FixtureResponse = { content: "hello" };
+    expect(isTranscriptionResponse(r)).toBe(false);
+  });
+
+  test("isVideoResponse detects video", () => {
+    const r: FixtureResponse = {
+      video: { id: "v1", status: "completed", url: "https://example.com/v.mp4" },
+    };
+    expect(isVideoResponse(r)).toBe(true);
+  });
+
+  test("isVideoResponse rejects text response", () => {
+    const r: FixtureResponse = { content: "hello" };
+    expect(isVideoResponse(r)).toBe(false);
+  });
+});
+
+describe("endpoint filtering in matchFixture", () => {
+  test("fixture with endpoint: image only matches image requests", () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "guitar", endpoint: "image" },
+        response: { image: { url: "img.png" } },
+      },
+    ];
+    const chatReq: ChatCompletionRequest = {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "guitar" }],
+      _endpointType: "chat",
+    };
+    expect(matchFixture(fixtures, chatReq)).toBeNull();
+
+    const imageReq: ChatCompletionRequest = {
+      model: "dall-e-3",
+      messages: [{ role: "user", content: "guitar" }],
+      _endpointType: "image",
+    };
+    expect(matchFixture(fixtures, imageReq)).toBe(fixtures[0]);
+  });
+
+  test("fixture without endpoint matches chat/embedding requests but not multimedia", () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "guitar" },
+        response: { content: "Chat about guitars" },
+      },
+    ];
+    // Chat requests match generic fixtures
+    const chatReq: ChatCompletionRequest = {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "guitar" }],
+      _endpointType: "chat",
+    };
+    expect(matchFixture(fixtures, chatReq)).toBe(fixtures[0]);
+
+    // Image requests do NOT match generic chat fixtures (prevents 500s)
+    const imageReq: ChatCompletionRequest = {
+      model: "dall-e-3",
+      messages: [{ role: "user", content: "guitar" }],
+      _endpointType: "image",
+    };
+    expect(matchFixture(fixtures, imageReq)).toBeNull();
+  });
+
+  test("endpoint filtering works with sequenceIndex", () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "g", endpoint: "image", sequenceIndex: 0 },
+        response: { image: { url: "1.png" } },
+      },
+      {
+        match: { userMessage: "g", endpoint: "image", sequenceIndex: 1 },
+        response: { image: { url: "2.png" } },
+      },
+    ];
+    const counts = new Map<Fixture, number>();
+    const imageReq: ChatCompletionRequest = {
+      model: "dall-e-3",
+      messages: [{ role: "user", content: "g" }],
+      _endpointType: "image",
+    };
+
+    const first = matchFixture(fixtures, imageReq, counts);
+    expect(first).toBe(fixtures[0]);
+  });
+});
diff --git a/src/__tests__/multimedia.test.ts b/src/__tests__/multimedia.test.ts
new file mode 100644
index 0000000..68a1265
--- /dev/null
+++ b/src/__tests__/multimedia.test.ts
@@ -0,0 +1,447 @@
+import { describe, test, expect } from "vitest";
+import { LLMock } from "../llmock.js";
+
+describe("image generation", () => {
+  test("image generation returns fixture (OpenAI format)", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "a guitar", endpoint: "image" },
+      response: {
+        image: { url: "https://example.com/guitar.png", revisedPrompt: "a guitar on display" },
+      },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/images/generations`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+      body: JSON.stringify({ model: "dall-e-3", prompt: "a guitar", n: 1 }),
+    });
+    expect(res.status).toBe(200);
+    const data = await res.json();
+    expect(data.data[0].url).toBe("https://example.com/guitar.png");
+    expect(data.data[0].revised_prompt).toBe("a guitar on display");
+    expect(typeof data.created).toBe("number");
+    await mock.stop();
+  });
+
+  test("multiple images", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "guitars", endpoint: "image" },
+      response: {
+        images: [{ url: "https://example.com/1.png" }, { url: "https://example.com/2.png" }],
+      },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/images/generations`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+      body: JSON.stringify({ model: "dall-e-3", prompt: "guitars", n: 2 }),
+    });
+    const data = await res.json();
+    expect(data.data).toHaveLength(2);
+    expect(data.data[0].url).toBe("https://example.com/1.png");
+    expect(data.data[1].url).toBe("https://example.com/2.png");
+    await mock.stop();
+  });
+
+  test("base64 image response", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "a cat", endpoint: "image" },
+      response: { image: { b64Json: "iVBORw0KGgo=" } },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/images/generations`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+      body: JSON.stringify({ model: "dall-e-3", prompt: "a cat", response_format: "b64_json" }),
+    });
+    const data = await res.json();
+    expect(data.data[0].b64_json).toBe("iVBORw0KGgo=");
+    await mock.stop();
+  });
+
+  test("Gemini Imagen endpoint", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "a guitar", endpoint: "image" },
+      response: { image: { b64Json: "iVBORw0KGgo=" } },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1beta/models/imagen-3.0-generate-002:predict`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ instances: [{ prompt: "a guitar" }], parameters: { sampleCount: 1 } }),
+    });
+    expect(res.status).toBe(200);
+    const data = await res.json();
+    expect(data.predictions[0].bytesBase64Encoded).toBe("iVBORw0KGgo=");
+    expect(data.predictions[0].mimeType).toBe("image/png");
+    await mock.stop();
+  });
+});
+
+describe("audio transcription", () => {
+  test("transcription returns text", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { endpoint: "transcription" },
+      response: { transcription: { text: "Welcome", language: "english", duration: 2.5 } },
+    });
+    await mock.start();
+
+    const formData = new FormData();
+    formData.append("file", new Blob(["fake audio"], { type: "audio/wav" }), "test.wav");
+    formData.append("model", "whisper-1");
+
+    const res = await fetch(`${mock.url}/v1/audio/transcriptions`, {
+      method: "POST",
+      headers: { Authorization: "Bearer test" },
+      body: formData,
+    });
+    expect(res.status).toBe(200);
+    const data = await res.json();
+    expect(data.text).toBe("Welcome");
+    await mock.stop();
+  });
+
+  test("verbose transcription includes words and segments", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { endpoint: "transcription" },
+      response: {
+        transcription: {
+          text: "Welcome",
+          language: "english",
+          duration: 2.5,
+          words: [{ word: "Welcome", start: 0.0, end: 0.5 }],
+          segments: [{ id: 0, text: "Welcome", start: 0.0, end: 2.5 }],
+        },
+      },
+    });
+    await mock.start();
+
+    const formData = new FormData();
+    formData.append("file", new Blob(["fake audio"]), "test.wav");
+    formData.append("model", "whisper-1");
+    formData.append("response_format", "verbose_json");
+
+    const res = await fetch(`${mock.url}/v1/audio/transcriptions`, {
+      method: "POST",
+      headers: { Authorization: "Bearer test" },
+      body: formData,
+    });
+    const data = await res.json();
+    expect(data.task).toBe("transcribe");
+    expect(data.words).toHaveLength(1);
+    expect(data.segments).toHaveLength(1);
+    await mock.stop();
+  });
+});
+
+describe("video generation", () => {
+  test("video creation and status check", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "a guitar", endpoint: "video" },
+      response: {
+        video: { id: "vid_123", status: "completed", url: "https://example.com/video.mp4" },
+      },
+    });
+    await mock.start();
+
+    // Create
+    const create = await fetch(`${mock.url}/v1/videos`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+      body: JSON.stringify({ model: "sora-2", prompt: "a guitar" }),
+    });
+    const job = await create.json();
+    expect(job.id).toBe("vid_123");
+    expect(job.status).toBe("completed");
+
+    // Status check
+    const status = await fetch(`${mock.url}/v1/videos/vid_123`, {
+      headers: { Authorization: "Bearer test" },
+    });
+    const result = await status.json();
+    expect(result.status).toBe("completed");
+    expect(result.url).toBe("https://example.com/video.mp4");
+    await mock.stop();
+  });
+
+  test("video processing returns minimal response then status on GET", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "slow motion", endpoint: "video" },
+      response: {
+        video: { id: "vid_456", status: "processing", url: "https://example.com/slow.mp4" },
+      },
+    });
+    await mock.start();
+
+    const create = await fetch(`${mock.url}/v1/videos`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+      body: JSON.stringify({ model: "sora-2", prompt: "slow motion" }),
+    });
+    const job = await create.json();
+    expect(job.id).toBe("vid_456");
+    expect(job.status).toBe("processing");
+    expect(job.url).toBeUndefined();
+
+    const status = await fetch(`${mock.url}/v1/videos/vid_456`, {
+      headers: { Authorization: "Bearer test" },
+    });
+    const result = await status.json();
+    expect(result.id).toBe("vid_456");
+    expect(result.status).toBe("processing");
+    await mock.stop();
+  });
+
+  test("video status 404 for unknown id", async () => {
+    const mock = new LLMock({ port: 0 });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/videos/unknown`, {
+      headers: { Authorization: "Bearer test" },
+    });
+    expect(res.status).toBe(404);
+    await mock.stop();
+  });
+});
+
+describe("convenience methods", () => {
+  test("onImage creates fixture with correct endpoint", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.onImage("sunset", { image: { url: "sunset.png" } });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/images/generations`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+      body: JSON.stringify({ prompt: "sunset" }),
+    });
+    expect((await res.json()).data[0].url).toBe("sunset.png");
+    await mock.stop();
+  });
+
+  test("onSpeech creates fixture with correct endpoint", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.onSpeech("hello", { audio: "AAAA", format: "mp3" });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/audio/speech`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+      body: JSON.stringify({ input: "hello", model: "tts-1", voice: "alloy" }),
+    });
+    expect(res.headers.get("content-type")).toBe("audio/mpeg");
+    await mock.stop();
+  });
+
+  test("onTranscription creates fixture with correct endpoint", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.onTranscription({ transcription: { text: "hello world" } });
+    await mock.start();
+
+    const formData = new FormData();
+    formData.append("file", new Blob(["audio"]), "test.wav");
+    formData.append("model", "whisper-1");
+    const res = await fetch(`${mock.url}/v1/audio/transcriptions`, {
+      method: "POST",
+      headers: { Authorization: "Bearer t" },
+      body: formData,
+    });
+    expect((await res.json()).text).toBe("hello world");
+    await mock.stop();
+  });
+
+  test("onVideo creates fixture with correct endpoint", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.onVideo("dancing", { video: { id: "v1", status: "completed", url: "dance.mp4" } });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/videos`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+      body: JSON.stringify({ prompt: "dancing" }),
+    });
+    expect((await res.json()).id).toBe("v1");
+    await mock.stop();
+  });
+});
+
+describe("X-Test-Id isolation", () => {
+  test("X-Test-Id works for image endpoint", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "g", endpoint: "image", sequenceIndex: 0 },
+      response: { image: { url: "1.png" } },
+    });
+    mock.addFixture({
+      match: { userMessage: "g", endpoint: "image", sequenceIndex: 1 },
+      response: { image: { url: "2.png" } },
+    });
+    await mock.start();
+
+    const req = (testId: string) =>
+      fetch(`${mock.url}/v1/images/generations`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: "Bearer t",
+          "X-Test-Id": testId,
+        },
+        body: JSON.stringify({ model: "dall-e-3", prompt: "g" }),
+      }).then((r) => r.json());
+
+    const [a, b] = await Promise.all([req("A"), req("B")]);
+    expect(a.data[0].url).toBe("1.png");
+    expect(b.data[0].url).toBe("1.png"); // both get sequenceIndex 0
+
+    await mock.stop();
+  });
+});
+
+describe("endpoint cross-matching prevention", () => {
+  test("image fixture does not match chat request", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "guitar", endpoint: "image" },
+      response: { image: { url: "img.png" } },
+    });
+    mock.addFixture({
+      match: { userMessage: "guitar" },
+      response: { content: "Chat about guitars" },
+    });
+    await mock.start();
+
+    // Chat request should NOT match the image fixture
+    const chat = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+      body: JSON.stringify({
+        model: "gpt-4o",
+        messages: [{ role: "user", content: "guitar" }],
+        stream: false,
+      }),
+    });
+    const chatData = await chat.json();
+    expect(chatData.choices[0].message.content).toBe("Chat about guitars");
+
+    // Image request should match the image fixture
+    const img = await fetch(`${mock.url}/v1/images/generations`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+      body: JSON.stringify({ model: "dall-e-3", prompt: "guitar" }),
+    });
+    const imgData = await img.json();
+    expect(imgData.data[0].url).toBe("img.png");
+
+    await mock.stop();
+  });
+});
+
+describe("endpoint backfill on existing handlers", () => {
+  test("fixture with endpoint: chat matches chat completions", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "hello", endpoint: "chat" },
+      response: { content: "Hi there" },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+      body: JSON.stringify({
+        model: "gpt-4o",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      }),
+    });
+    const data = await res.json();
+    expect(data.choices[0].message.content).toBe("Hi there");
+    await mock.stop();
+  });
+
+  test("fixture with endpoint: embedding matches embeddings", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { inputText: "test input", endpoint: "embedding" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/embeddings`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer t" },
+      body: JSON.stringify({ model: "text-embedding-3-small", input: "test input" }),
+    });
+    const data = await res.json();
+    expect(data.data[0].embedding).toEqual([0.1, 0.2, 0.3]);
+    await mock.stop();
+  });
+});
+
+describe("text-to-speech", () => {
+  test("TTS returns audio bytes with correct content-type", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "hello world", endpoint: "speech" },
+      response: { audio: "AAAA", format: "mp3" },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/audio/speech`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+      body: JSON.stringify({ model: "tts-1", input: "hello world", voice: "alloy" }),
+    });
+    expect(res.status).toBe(200);
+    expect(res.headers.get("content-type")).toBe("audio/mpeg");
+    const buffer = await res.arrayBuffer();
+    expect(buffer.byteLength).toBeGreaterThan(0);
+    await mock.stop();
+  });
+
+  test("TTS respects format for content-type", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "test", endpoint: "speech" },
+      response: { audio: "AAAA", format: "opus" },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/audio/speech`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+      body: JSON.stringify({ model: "tts-1", input: "test", voice: "alloy" }),
+    });
+    expect(res.headers.get("content-type")).toBe("audio/opus");
+    await mock.stop();
+  });
+
+  test("TTS defaults to mp3 when no format specified", async () => {
+    const mock = new LLMock({ port: 0 });
+    mock.addFixture({
+      match: { userMessage: "default", endpoint: "speech" },
+      response: { audio: "AAAA" },
+    });
+    await mock.start();
+
+    const res = await fetch(`${mock.url}/v1/audio/speech`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", Authorization: "Bearer test" },
+      body: JSON.stringify({ model: "tts-1", input: "default", voice: "alloy" }),
+    });
+    expect(res.headers.get("content-type")).toBe("audio/mpeg");
+    await mock.stop();
+  });
+});
diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts
index 552f85c..9b3469e 100644
--- a/src/bedrock-converse.ts
+++ b/src/bedrock-converse.ts
@@ -271,6 +271,7 @@ export async function handleConverse(
   }
 
   const completionReq = converseToCompletionRequest(converseReq, modelId);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
@@ -480,6 +481,7 @@ export async function handleConverseStream(
   }
 
   const completionReq = converseToCompletionRequest(converseReq, modelId);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
diff --git a/src/bedrock.ts b/src/bedrock.ts
index 169aee6..fabd86a 100644
--- a/src/bedrock.ts
+++ b/src/bedrock.ts
@@ -315,6 +315,7 @@ export async function handleBedrock(
 
   // Convert to ChatCompletionRequest for fixture matching
   const completionReq = bedrockToCompletionRequest(bedrockReq, modelId);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
@@ -671,6 +672,7 @@ export async function handleBedrockStream(
   }
 
   const completionReq = bedrockToCompletionRequest(bedrockReq, modelId);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
diff --git a/src/cohere.ts b/src/cohere.ts
index 842b3ae..1d1dccf 100644
--- a/src/cohere.ts
+++ b/src/cohere.ts
@@ -465,6 +465,7 @@ export async function handleCohere(
 
   // Convert to ChatCompletionRequest for fixture matching
   const completionReq = cohereToCompletionRequest(cohereReq);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
diff --git a/src/embeddings.ts b/src/embeddings.ts
index 1d80a9b..6d1f947 100644
--- a/src/embeddings.ts
+++ b/src/embeddings.ts
@@ -85,6 +85,7 @@ export async function handleEmbeddings(
     model: embeddingReq.model,
     messages: [],
     embeddingInput: combinedInput,
+    _endpointType: "embedding",
   };
 
   const testId = getTestId(req);
diff --git a/src/gemini.ts b/src/gemini.ts
index 0b313dd..3c6529d 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -504,6 +504,7 @@ export async function handleGemini(
 
   // Convert to ChatCompletionRequest for fixture matching
   const completionReq = geminiToCompletionRequest(geminiReq, model, streaming);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
diff --git a/src/helpers.ts b/src/helpers.ts
index dac9160..325ac11 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -7,6 +7,10 @@ import type {
   ContentWithToolCallsResponse,
   ErrorResponse,
   EmbeddingResponse,
+  ImageResponse,
+  AudioResponse,
+  TranscriptionResponse,
+  VideoResponse,
   SSEChunk,
   ToolCall,
   ChatCompletion,
@@ -74,6 +78,33 @@ export function isEmbeddingResponse(r: FixtureResponse): r is EmbeddingResponse
   return "embedding" in r && Array.isArray((r as EmbeddingResponse).embedding);
 }
 
+export function isImageResponse(r: FixtureResponse): r is ImageResponse {
+  return (
+    ("image" in r && r.image != null) ||
+    ("images" in r && Array.isArray((r as ImageResponse).images))
+  );
+}
+
+export function isAudioResponse(r: FixtureResponse): r is AudioResponse {
+  return "audio" in r && typeof (r as AudioResponse).audio === "string";
+}
+
+export function isTranscriptionResponse(r: FixtureResponse): r is TranscriptionResponse {
+  return (
+    "transcription" in r &&
+    (r as TranscriptionResponse).transcription != null &&
+    typeof (r as TranscriptionResponse).transcription === "object"
+  );
+}
+
+export function isVideoResponse(r: FixtureResponse): r is VideoResponse {
+  return (
+    "video" in r &&
+    (r as VideoResponse).video != null &&
+    typeof (r as VideoResponse).video === "object"
+  );
+}
+
 export function buildTextChunks(
   content: string,
   model: string,
diff --git a/src/images.ts b/src/images.ts
new file mode 100644
index 0000000..cb9de09
--- /dev/null
+++ b/src/images.ts
@@ -0,0 +1,214 @@
+import type * as http from "node:http";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js";
+import { isImageResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+interface OpenAIImageRequest {
+  model?: string;
+  prompt: string;
+  n?: number;
+  size?: string;
+  response_format?: "url" | "b64_json";
+  [key: string]: unknown;
+}
+
+interface GeminiPredictRequest {
+  instances: Array<{ prompt: string }>;
+  parameters?: { sampleCount?: number };
+  [key: string]: unknown;
+}
+
+function buildSyntheticRequest(model: string, prompt: string): ChatCompletionRequest {
+  return {
+    model,
+    messages: [{ role: "user", content: prompt }],
+    _endpointType: "image",
+  };
+}
+
+export async function handleImages(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+  format: "openai" | "gemini" = "openai",
+  geminiModel?: string,
+): Promise<void> {
+  setCorsHeaders(res);
+  const path = req.url ?? "/v1/images/generations";
+  const method = req.method ?? "POST";
+
+  let model: string;
+  let prompt: string;
+
+  try {
+    const body = JSON.parse(raw);
+    if (format === "gemini") {
+      const geminiReq = body as GeminiPredictRequest;
+      prompt = geminiReq.instances?.[0]?.prompt ?? "";
+      model = geminiModel ?? "imagen";
+    } else {
+      const openaiReq = body as OpenAIImageRequest;
+      prompt = openaiReq.prompt ?? "";
+      model = openaiReq.model ?? "dall-e-3";
+    }
+  } catch {
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: { message: "Malformed JSON", type: "invalid_request_error", code: "invalid_json" },
+      }),
+    );
+    return;
+  }
+
+  const syntheticReq = buildSyntheticRequest(model, prompt);
+  const testId = getTestId(req);
+  const fixture = matchFixture(
+    fixtures,
+    syntheticReq,
+    journal.getFixtureMatchCountsForTest(testId),
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures, testId);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      { method, path, headers: flattenHeaders(req.headers), body: syntheticReq },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        syntheticReq,
+        format === "gemini" ? "gemini" : "openai",
+        req.url ?? "/v1/images/generations",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method,
+          path,
+          headers: flattenHeaders(req.headers),
+          body: syntheticReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: { message: strictMessage, type: "invalid_request_error", code: "no_fixture_match" },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  if (!isImageResponse(response)) {
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: 500, fixture },
+    });
+    writeErrorResponse(
+      res,
+      500,
+      JSON.stringify({
+        error: { message: "Fixture response is not an image type", type: "server_error" },
+      }),
+    );
+    return;
+  }
+
+  journal.add({
+    method,
+    path,
+    headers: flattenHeaders(req.headers),
+    body: syntheticReq,
+    response: { status: 200, fixture },
+  });
+
+  // Normalize to array of image items
+  const items = response.images ?? (response.image ? [response.image] : []);
+
+  if (format === "gemini") {
+    const predictions = items.map((item) => ({
+      bytesBase64Encoded: item.b64Json ?? "",
+      mimeType: "image/png" as const,
+    }));
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ predictions }));
+  } else {
+    const data = items.map((item) => {
+      const entry: Record<string, string> = {};
+      if (item.url) entry.url = item.url;
+      if (item.b64Json) entry.b64_json = item.b64Json;
+      if (item.revisedPrompt) entry.revised_prompt = item.revisedPrompt;
+      return entry;
+    });
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ created: Math.floor(Date.now() / 1000), data }));
+  }
+}
diff --git a/src/index.ts b/src/index.ts
index a5e9b29..4cb0cfb 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -69,6 +69,13 @@ export { handleWebSocketResponses } from "./ws-responses.js";
 export { handleWebSocketRealtime } from "./ws-realtime.js";
 export { handleWebSocketGeminiLive } from "./ws-gemini-live.js";
 
+// Multimedia handlers
+export { handleImages } from "./images.js";
+export { handleSpeech } from "./speech.js";
+export { handleTranscription } from "./transcription.js";
+export { handleVideoCreate, handleVideoStatus } from "./video.js";
+export type { VideoStateMap } from "./video.js";
+
 // Helpers
 export {
   flattenHeaders,
@@ -79,6 +86,10 @@ export {
   buildTextChunks,
   buildToolCallChunks,
   isEmbeddingResponse,
+  isImageResponse,
+  isAudioResponse,
+  isTranscriptionResponse,
+  isVideoResponse,
   generateDeterministicEmbedding,
   buildEmbeddingResponse,
 } from "./helpers.js";
@@ -249,4 +260,9 @@ export type {
   ChatCompletion,
   ChatCompletionChoice,
   ChatCompletionMessage,
+  ImageItem,
+  ImageResponse,
+  AudioResponse,
+  TranscriptionResponse,
+  VideoResponse,
 } from "./types.js";
diff --git a/src/llmock.ts b/src/llmock.ts
index 5bb532b..973be71 100644
--- a/src/llmock.ts
+++ b/src/llmock.ts
@@ -1,4 +1,5 @@
 import type {
+  AudioResponse,
   ChaosConfig,
   EmbeddingFixtureOpts,
   Fixture,
@@ -6,9 +7,12 @@ import type {
   FixtureMatch,
   FixtureOpts,
   FixtureResponse,
+  ImageResponse,
   MockServerOptions,
   Mountable,
   RecordConfig,
+  TranscriptionResponse,
+  VideoResponse,
 } from "./types.js";
 import { createServer, type ServerInstance } from "./server.js";
 import {
@@ -124,6 +128,34 @@ export class LLMock {
     return this.on({ toolCallId: id }, response, opts);
   }
 
+  onImage(prompt: string | RegExp, response: ImageResponse): this {
+    return this.addFixture({
+      match: { userMessage: prompt, endpoint: "image" },
+      response,
+    });
+  }
+
+  onSpeech(input: string | RegExp, response: AudioResponse): this {
+    return this.addFixture({
+      match: { userMessage: input, endpoint: "speech" },
+      response,
+    });
+  }
+
+  onTranscription(response: TranscriptionResponse): this {
+    return this.addFixture({
+      match: { endpoint: "transcription" },
+      response,
+    });
+  }
+
+  onVideo(prompt: string | RegExp, response: VideoResponse): this {
+    return this.addFixture({
+      match: { userMessage: prompt, endpoint: "video" },
+      response,
+    });
+  }
+
   // ---- Service mock convenience methods ----
 
   onSearch(pattern: string | RegExp, results: SearchResult[]): this {
@@ -252,6 +284,7 @@ export class LLMock {
     this.moderationFixtures.length = 0;
     if (this.serverInstance) {
       this.serverInstance.journal.clear();
+      this.serverInstance.videoStates.clear();
     }
     return this;
   }
diff --git a/src/messages.ts b/src/messages.ts
index 9b77e85..c58d85a 100644
--- a/src/messages.ts
+++ b/src/messages.ts
@@ -678,6 +678,7 @@ export async function handleMessages(
 
   // Convert to ChatCompletionRequest for fixture matching
   const completionReq = claudeToCompletionRequest(claudeReq);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
diff --git a/src/ollama.ts b/src/ollama.ts
index 1692054..ac0c987 100644
--- a/src/ollama.ts
+++ b/src/ollama.ts
@@ -388,6 +388,7 @@ export async function handleOllama(
 
   // Convert to ChatCompletionRequest for fixture matching
   const completionReq = ollamaToCompletionRequest(ollamaReq);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
@@ -646,6 +647,7 @@ export async function handleOllamaGenerate(
 
   // Convert to ChatCompletionRequest for fixture matching
   const completionReq = ollamaGenerateToCompletionRequest(generateReq);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
diff --git a/src/recorder.ts b/src/recorder.ts
index d5348a1..1be0156 100644
--- a/src/recorder.ts
+++ b/src/recorder.ts
@@ -134,7 +134,21 @@ export async function proxyAndRecord(
 
   let fixtureResponse: FixtureResponse;
 
-  if (collapsed) {
+  // TTS response — binary audio, not JSON
+  const isAudioResponse = ctString.toLowerCase().startsWith("audio/");
+  if (isAudioResponse && rawBuffer.length > 0) {
+    // Derive format from Content-Type (audio/mpeg→mp3, audio/opus→opus, etc.)
+    const audioFormat = ctString
+      .toLowerCase()
+      .replace("audio/", "")
+      .replace("mpeg", "mp3")
+      .split(";")[0]
+      .trim();
+    fixtureResponse = {
+      audio: rawBuffer.toString("base64"),
+      ...(audioFormat && audioFormat !== "mp3" ? { format: audioFormat } : {}),
+    };
+  } else if (collapsed) {
     // Streaming response — use collapsed result
     defaults.logger.warn(`Streaming response detected (${ctString}) — collapsing to fixture`);
     if (collapsed.truncated) {
@@ -348,6 +362,69 @@ function buildFixtureResponse(
         // Corrupted base64 or non-float32 data — fall through to error
       }
     }
+    // OpenAI image generation: { created, data: [{ url, b64_json, revised_prompt }] }
+    if (first.url || first.b64_json) {
+      const images = (obj.data as Array<Record<string, unknown>>).map((item) => ({
+        ...(item.url ? { url: String(item.url) } : {}),
+        ...(item.b64_json ? { b64Json: String(item.b64_json) } : {}),
+        ...(item.revised_prompt ? { revisedPrompt: String(item.revised_prompt) } : {}),
+      }));
+      if (images.length === 1) {
+        return { image: images[0] };
+      }
+      return { images };
+    }
+  }
+
+  // Gemini Imagen: { predictions: [...] }
+  if (Array.isArray(obj.predictions)) {
+    const images = (obj.predictions as Array<Record<string, unknown>>).map((p) => ({
+      ...(p.bytesBase64Encoded ? { b64Json: String(p.bytesBase64Encoded) } : {}),
+      ...(p.mimeType ? { mimeType: String(p.mimeType) } : {}),
+    }));
+    if (images.length === 1) {
+      return { image: images[0] };
+    }
+    return { images };
+  }
+
+  // OpenAI transcription: { text: "...", ... }
+  if (
+    typeof obj.text === "string" &&
+    (obj.task === "transcribe" || obj.language !== undefined || obj.duration !== undefined)
+  ) {
+    return {
+      transcription: {
+        text: obj.text as string,
+        ...(obj.language ? { language: String(obj.language) } : {}),
+        ...(obj.duration !== undefined ? { duration: Number(obj.duration) } : {}),
+        ...(Array.isArray(obj.words) ? { words: obj.words } : {}),
+        ...(Array.isArray(obj.segments) ? { segments: obj.segments } : {}),
+      },
+    };
+  }
+
+  // OpenAI video generation: { id, status, ... }
+  if (
+    typeof obj.id === "string" &&
+    typeof obj.status === "string" &&
+    (obj.status === "completed" || obj.status === "in_progress" || obj.status === "failed")
+  ) {
+    if (obj.status === "completed" && obj.url) {
+      return {
+        video: {
+          id: String(obj.id),
+          status: "completed" as const,
+          url: String(obj.url),
+        },
+      };
+    }
+    return {
+      video: {
+        id: String(obj.id),
+        status: obj.status === "failed" ? ("failed" as const) : ("processing" as const),
+      },
+    };
   }
 
   // Direct embedding: { embedding: [...] }
@@ -491,23 +568,34 @@ function buildFixtureResponse(
 /**
  * Derive fixture match criteria from the original request.
  */
+type EndpointType = "chat" | "image" | "speech" | "transcription" | "video" | "embedding";
+
 function buildFixtureMatch(request: ChatCompletionRequest): {
   userMessage?: string;
   inputText?: string;
+  endpoint?: EndpointType;
 } {
+  const match: { userMessage?: string; inputText?: string; endpoint?: EndpointType } = {};
+
+  // Include endpoint type for multimedia fixtures
+  if (request._endpointType && request._endpointType !== "chat") {
+    match.endpoint = request._endpointType as EndpointType;
+  }
+
   // Embedding request
   if (request.embeddingInput) {
-    return { inputText: request.embeddingInput };
+    match.inputText = request.embeddingInput;
+    return match;
   }
 
-  // Chat request — match on the last user message
+  // Chat/multimedia request — match on the last user message
   const lastUser = getLastMessageByRole(request.messages ?? [], "user");
   if (lastUser) {
     const text = getTextContent(lastUser.content);
     if (text) {
-      return { userMessage: text };
+      match.userMessage = text;
     }
   }
 
-  return {};
+  return match;
 }
diff --git a/src/responses.ts b/src/responses.ts
index 126e997..7d6946e 100644
--- a/src/responses.ts
+++ b/src/responses.ts
@@ -803,6 +803,7 @@ export async function handleResponses(
 
   // Convert to ChatCompletionRequest for fixture matching
   const completionReq = responsesToCompletionRequest(responsesReq);
+  completionReq._endpointType = "chat";
 
   const testId = getTestId(req);
   const fixture = matchFixture(
diff --git a/src/router.ts b/src/router.ts
index efc79c1..f235d50 100644
--- a/src/router.ts
+++ b/src/router.ts
@@ -1,4 +1,10 @@
 import type { ChatCompletionRequest, ChatMessage, ContentPart, Fixture } from "./types.js";
+import {
+  isImageResponse,
+  isAudioResponse,
+  isTranscriptionResponse,
+  isVideoResponse,
+} from "./helpers.js";
 
 export function getLastMessageByRole(messages: ChatMessage[], role: string): ChatMessage | null {
   for (let i = messages.length - 1; i >= 0; i--) {
@@ -41,6 +47,26 @@ export function matchFixture(
       if (!match.predicate(req)) continue;
     }
 
+    // endpoint — bidirectional filtering:
+    // 1. If fixture has endpoint set, only match requests of that type
+    // 2. If request has _endpointType but fixture doesn't, skip fixtures
+    //    whose response type is incompatible (prevents generic chat fixtures
+    //    from matching image/speech/video requests and causing 500s)
+    const reqEndpoint = effective._endpointType as string | undefined;
+    if (match.endpoint !== undefined) {
+      if (match.endpoint !== reqEndpoint) continue;
+    } else if (reqEndpoint && reqEndpoint !== "chat" && reqEndpoint !== "embedding") {
+      // Fixture has no endpoint restriction but request is multimedia —
+      // only match if the response type is compatible
+      const r = fixture.response;
+      const compatible =
+        (reqEndpoint === "image" && isImageResponse(r)) ||
+        (reqEndpoint === "speech" && isAudioResponse(r)) ||
+        (reqEndpoint === "transcription" && isTranscriptionResponse(r)) ||
+        (reqEndpoint === "video" && isVideoResponse(r));
+      if (!compatible) continue;
+    }
+
     // userMessage — match against the last user message content
     if (match.userMessage !== undefined) {
       const msg = getLastMessageByRole(effective.messages, "user");
diff --git a/src/server.ts b/src/server.ts
index 65bd499..f19f3b5 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -33,6 +33,10 @@ import { handleGemini } from "./gemini.js";
 import { handleBedrock, handleBedrockStream } from "./bedrock.js";
 import { handleConverse, handleConverseStream } from "./bedrock-converse.js";
 import { handleEmbeddings } from "./embeddings.js";
+import { handleImages } from "./images.js";
+import { handleSpeech } from "./speech.js";
+import { handleTranscription } from "./transcription.js";
+import { handleVideoCreate, handleVideoStatus, type VideoStateMap } from "./video.js";
 import { handleOllama, handleOllamaGenerate } from "./ollama.js";
 import { handleCohere } from "./cohere.js";
 import { handleSearch, type SearchFixture } from "./search.js";
@@ -52,6 +56,7 @@ export interface ServerInstance {
   journal: Journal;
   url: string;
   defaults: HandlerDefaults;
+  videoStates: VideoStateMap;
 }
 
 const COMPLETIONS_PATH = "/v1/chat/completions";
@@ -65,6 +70,12 @@ const COHERE_CHAT_PATH = "/v2/chat";
 const SEARCH_PATH = "/search";
 const RERANK_PATH = "/v2/rerank";
 const MODERATIONS_PATH = "/v1/moderations";
+const IMAGES_PATH = "/v1/images/generations";
+const SPEECH_PATH = "/v1/audio/speech";
+const TRANSCRIPTIONS_PATH = "/v1/audio/transcriptions";
+const VIDEOS_PATH = "/v1/videos";
+const VIDEOS_STATUS_RE = /^\/v1\/videos\/([^/]+)$/;
+const GEMINI_PREDICT_RE = /^\/v1beta\/models\/([^:]+):predict$/;
 const DEFAULT_CHUNK_SIZE = 20;
 
 const GEMINI_PATH_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/;
@@ -140,6 +151,7 @@ async function handleControlAPI(
   pathname: string,
   fixtures: Fixture[],
   journal: Journal,
+  videoStates: VideoStateMap,
 ): Promise<boolean> {
   if (!pathname.startsWith(CONTROL_PREFIX)) return false;
 
@@ -213,6 +225,7 @@ async function handleControlAPI(
   if (subPath === "/reset" && req.method === "POST") {
     fixtures.length = 0;
     journal.clear();
+    videoStates.clear();
     res.writeHead(200, { "Content-Type": "application/json" });
     res.end(JSON.stringify({ reset: true }));
     return true;
@@ -344,6 +357,7 @@ async function handleCompletions(
   }
 
   // Match fixture
+  body._endpointType = "chat";
   const testId = getTestId(req);
   const fixture = matchFixture(
     fixtures,
@@ -633,6 +647,7 @@ export async function createServer(
   }
 
   const journal = new Journal();
+  const videoStates: VideoStateMap = new Map();
 
   // Share journal and metrics registry with mounted services
   if (mounts) {
@@ -703,7 +718,7 @@ export async function createServer(
 
     // Control API — must be checked before mounts and path rewrites
     if (pathname.startsWith(CONTROL_PREFIX)) {
-      await handleControlAPI(req, res, pathname, fixtures, journal);
+      await handleControlAPI(req, res, pathname, fixtures, journal, videoStates);
       return;
     }
 
@@ -948,6 +963,136 @@ export async function createServer(
       return;
     }
 
+    // POST /v1/images/generations — OpenAI Image Generation API
+    if (pathname === IMAGES_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) => handleImages(req, res, raw, fixtures, journal, defaults, setCorsHeaders))
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /v1/audio/speech — OpenAI TTS API
+    if (pathname === SPEECH_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) => handleSpeech(req, res, raw, fixtures, journal, defaults, setCorsHeaders))
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /v1/audio/transcriptions — OpenAI Transcription API
+    if (pathname === TRANSCRIPTIONS_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleTranscription(req, res, raw, fixtures, journal, defaults, setCorsHeaders),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /v1/videos — Video Generation API
+    if (pathname === VIDEOS_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleVideoCreate(
+            req,
+            res,
+            raw,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+            videoStates,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // GET /v1/videos/{id} — Video Status Check
+    const videoStatusMatch = pathname.match(VIDEOS_STATUS_RE);
+    if (videoStatusMatch && req.method === "GET") {
+      const videoId = videoStatusMatch[1];
+      handleVideoStatus(req, res, videoId, journal, setCorsHeaders, videoStates);
+      return;
+    }
+
+    // POST /v1beta/models/{model}:predict — Gemini Imagen API
+    const geminiPredictMatch = pathname.match(GEMINI_PREDICT_RE);
+    if (geminiPredictMatch && req.method === "POST") {
+      const predictModel = geminiPredictMatch[1];
+      readBody(req)
+        .then((raw) =>
+          handleImages(
+            req,
+            res,
+            raw,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+            "gemini",
+            predictModel,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
     // POST /v1beta/models/{model}:(generateContent|streamGenerateContent) — Google Gemini
     const geminiMatch = pathname.match(GEMINI_PATH_RE);
     if (geminiMatch && req.method === "POST") {
@@ -1466,7 +1611,7 @@ export async function createServer(
         }
       }
 
-      resolve({ server, journal, url, defaults });
+      resolve({ server, journal, url, defaults, videoStates });
     });
   });
 }
diff --git a/src/speech.ts b/src/speech.ts
new file mode 100644
index 0000000..4245f72
--- /dev/null
+++ b/src/speech.ts
@@ -0,0 +1,186 @@
+import type * as http from "node:http";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js";
+import { isAudioResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+interface SpeechRequest {
+  model?: string;
+  input: string;
+  voice?: string;
+  response_format?: string;
+  speed?: number;
+  [key: string]: unknown;
+}
+
+const FORMAT_TO_CONTENT_TYPE: Record<string, string> = {
+  mp3: "audio/mpeg",
+  opus: "audio/opus",
+  aac: "audio/aac",
+  flac: "audio/flac",
+  wav: "audio/wav",
+  pcm: "audio/pcm",
+};
+
+export async function handleSpeech(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  setCorsHeaders(res);
+  const path = req.url ?? "/v1/audio/speech";
+  const method = req.method ?? "POST";
+
+  let speechReq: SpeechRequest;
+  try {
+    speechReq = JSON.parse(raw) as SpeechRequest;
+  } catch {
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: { message: "Malformed JSON", type: "invalid_request_error", code: "invalid_json" },
+      }),
+    );
+    return;
+  }
+
+  const syntheticReq: ChatCompletionRequest = {
+    model: speechReq.model ?? "tts-1",
+    messages: [{ role: "user", content: speechReq.input }],
+    _endpointType: "speech",
+  };
+
+  const testId = getTestId(req);
+  const fixture = matchFixture(
+    fixtures,
+    syntheticReq,
+    journal.getFixtureMatchCountsForTest(testId),
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures, testId);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      { method, path, headers: flattenHeaders(req.headers), body: syntheticReq },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        syntheticReq,
+        "openai",
+        req.url ?? "/v1/audio/speech",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method,
+          path,
+          headers: flattenHeaders(req.headers),
+          body: syntheticReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: { message: strictMessage, type: "invalid_request_error", code: "no_fixture_match" },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  if (!isAudioResponse(response)) {
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: 500, fixture },
+    });
+    writeErrorResponse(
+      res,
+      500,
+      JSON.stringify({
+        error: { message: "Fixture response is not an audio type", type: "server_error" },
+      }),
+    );
+    return;
+  }
+
+  journal.add({
+    method,
+    path,
+    headers: flattenHeaders(req.headers),
+    body: syntheticReq,
+    response: { status: 200, fixture },
+  });
+
+  const format = response.format ?? "mp3";
+  const contentType = FORMAT_TO_CONTENT_TYPE[format] ?? "audio/mpeg";
+  const audioBytes = Buffer.from(response.audio, "base64");
+
+  res.writeHead(200, { "Content-Type": contentType });
+  res.end(audioBytes);
+}
diff --git a/src/transcription.ts b/src/transcription.ts
new file mode 100644
index 0000000..affedcc
--- /dev/null
+++ b/src/transcription.ts
@@ -0,0 +1,184 @@
+import type * as http from "node:http";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js";
+import { isTranscriptionResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+/**
+ * Extract a named field value from a multipart/form-data body.
+ * Lightweight parser — scans for Content-Disposition headers
+ * to find simple string field values.
+ */
+function extractFormField(raw: string, fieldName: string): string | undefined {
+  const pattern = new RegExp(
+    `Content-Disposition:\\s*form-data;[^\\r\\n]*name="${fieldName}"[^\\r\\n]*\\r\\n\\r\\n([^\\r\\n]*)`,
+    "i",
+  );
+  const match = raw.match(pattern);
+  return match?.[1];
+}
+
+export async function handleTranscription(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  setCorsHeaders(res);
+  const path = req.url ?? "/v1/audio/transcriptions";
+  const method = req.method ?? "POST";
+
+  const model = extractFormField(raw, "model") ?? "whisper-1";
+  const responseFormat = extractFormField(raw, "response_format") ?? "json";
+
+  const syntheticReq: ChatCompletionRequest = {
+    model,
+    messages: [],
+    _endpointType: "transcription",
+  };
+
+  const testId = getTestId(req);
+  const fixture = matchFixture(
+    fixtures,
+    syntheticReq,
+    journal.getFixtureMatchCountsForTest(testId),
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures, testId);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      { method, path, headers: flattenHeaders(req.headers), body: syntheticReq },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        syntheticReq,
+        "openai",
+        req.url ?? "/v1/audio/transcriptions",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method,
+          path,
+          headers: flattenHeaders(req.headers),
+          body: syntheticReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+          code: "no_fixture_match",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  if (!isTranscriptionResponse(response)) {
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: 500, fixture },
+    });
+    writeErrorResponse(
+      res,
+      500,
+      JSON.stringify({
+        error: {
+          message: "Fixture response is not a transcription type",
+          type: "server_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  journal.add({
+    method,
+    path,
+    headers: flattenHeaders(req.headers),
+    body: syntheticReq,
+    response: { status: 200, fixture },
+  });
+
+  const t = response.transcription;
+  const useVerbose = responseFormat === "verbose_json" || t.words != null || t.segments != null;
+
+  if (useVerbose) {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(
+      JSON.stringify({
+        task: "transcribe",
+        language: t.language ?? "english",
+        duration: t.duration ?? 0,
+        text: t.text,
+        words: t.words ?? [],
+        segments: t.segments ?? [],
+      }),
+    );
+  } else {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ text: t.text }));
+  }
+}
diff --git a/src/types.ts b/src/types.ts
index ea64d8a..4d8a3f4 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -50,6 +50,8 @@ export interface ChatCompletionRequest {
   response_format?: { type: string; [key: string]: unknown };
   /** Embedding input text, set by the embeddings handler for fixture matching. */
   embeddingInput?: string;
+  /** Endpoint type, set by handlers for fixture endpoint filtering. */
+  _endpointType?: string;
   [key: string]: unknown;
 }
 
@@ -70,6 +72,7 @@ export interface FixtureMatch {
   predicate?: (req: ChatCompletionRequest) => boolean;
   /** Which occurrence of this match to respond to (0-indexed). Undefined means match any. */
   sequenceIndex?: number;
+  endpoint?: "chat" | "image" | "speech" | "transcription" | "video" | "embedding";
 }
 
 // Fixture response types
@@ -111,12 +114,50 @@ export interface EmbeddingResponse {
   embedding: number[];
 }
 
+export interface ImageItem {
+  url?: string;
+  b64Json?: string;
+  revisedPrompt?: string;
+}
+
+export interface ImageResponse {
+  image?: ImageItem;
+  images?: ImageItem[];
+}
+
+export interface AudioResponse {
+  audio: string;
+  format?: string;
+}
+
+export interface TranscriptionResponse {
+  transcription: {
+    text: string;
+    language?: string;
+    duration?: number;
+    words?: Array<{ word: string; start: number; end: number }>;
+    segments?: Array<{ id: number; text: string; start: number; end: number }>;
+  };
+}
+
+export interface VideoResponse {
+  video: {
+    id: string;
+    status: "processing" | "completed" | "failed";
+    url?: string;
+  };
+}
+
 export type FixtureResponse =
   | TextResponse
   | ToolCallResponse
   | ContentWithToolCallsResponse
   | ErrorResponse
-  | EmbeddingResponse;
+  | EmbeddingResponse
+  | ImageResponse
+  | AudioResponse
+  | TranscriptionResponse
+  | VideoResponse;
 
 // Streaming physics
 
@@ -165,6 +206,7 @@ export interface FixtureFileEntry {
     model?: string;
     responseFormat?: string;
     sequenceIndex?: number;
+    endpoint?: "chat" | "image" | "speech" | "transcription" | "video" | "embedding";
     // predicate not supported in JSON files
   };
   response: FixtureResponse;
diff --git a/src/video.ts b/src/video.ts
new file mode 100644
index 0000000..dfc4670
--- /dev/null
+++ b/src/video.ts
@@ -0,0 +1,238 @@
+import type * as http from "node:http";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults, VideoResponse } from "./types.js";
+import { isVideoResponse, isErrorResponse, flattenHeaders, getTestId } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import type { Journal } from "./journal.js";
+import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
+
+interface VideoRequest {
+  model?: string;
+  prompt: string;
+  [key: string]: unknown;
+}
+
+/** Stored video state for GET status checks. Key: `${testId}:${videoId}` */
+export type VideoStateMap = Map<string, VideoResponse["video"]>;
+
+export async function handleVideoCreate(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: HandlerDefaults,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+  videoStates: VideoStateMap,
+): Promise<void> {
+  setCorsHeaders(res);
+  const path = req.url ?? "/v1/videos";
+  const method = req.method ?? "POST";
+
+  let videoReq: VideoRequest;
+  try {
+    videoReq = JSON.parse(raw) as VideoRequest;
+  } catch {
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: { message: "Malformed JSON", type: "invalid_request_error", code: "invalid_json" },
+      }),
+    );
+    return;
+  }
+
+  const syntheticReq: ChatCompletionRequest = {
+    model: videoReq.model ?? "sora-2",
+    messages: [{ role: "user", content: videoReq.prompt }],
+    _endpointType: "video",
+  };
+
+  const testId = getTestId(req);
+  const fixture = matchFixture(
+    fixtures,
+    syntheticReq,
+    journal.getFixtureMatchCountsForTest(testId),
+    defaults.requestTransform,
+  );
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures, testId);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      { method, path, headers: flattenHeaders(req.headers), body: syntheticReq },
+      defaults.registry,
+      defaults.logger,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        syntheticReq,
+        "openai",
+        req.url ?? "/v1/videos",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method,
+          path,
+          headers: flattenHeaders(req.headers),
+          body: syntheticReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: { message: strictMessage, type: "invalid_request_error", code: "no_fixture_match" },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  if (!isVideoResponse(response)) {
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: 500, fixture },
+    });
+    writeErrorResponse(
+      res,
+      500,
+      JSON.stringify({
+        error: { message: "Fixture response is not a video type", type: "server_error" },
+      }),
+    );
+    return;
+  }
+
+  journal.add({
+    method,
+    path,
+    headers: flattenHeaders(req.headers),
+    body: syntheticReq,
+    response: { status: 200, fixture },
+  });
+
+  const video = response.video;
+  const created_at = Math.floor(Date.now() / 1000);
+
+  // Store for GET status checks
+  const stateKey = `${testId}:${video.id}`;
+  videoStates.set(stateKey, video);
+
+  if (video.status === "completed") {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ id: video.id, status: video.status, url: video.url, created_at }));
+  } else {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ id: video.id, status: video.status, created_at }));
+  }
+}
+
+export function handleVideoStatus(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  videoId: string,
+  journal: Journal,
+  setCorsHeaders: (res: http.ServerResponse) => void,
+  videoStates: VideoStateMap,
+): void {
+  setCorsHeaders(res);
+  const path = req.url ?? `/v1/videos/${videoId}`;
+  const method = req.method ?? "GET";
+
+  const testId = getTestId(req);
+  const stateKey = `${testId}:${videoId}`;
+  const video = videoStates.get(stateKey);
+
+  if (!video) {
+    journal.add({
+      method,
+      path,
+      headers: flattenHeaders(req.headers),
+      body: null,
+      response: { status: 404, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      404,
+      JSON.stringify({ error: { message: `Video ${videoId} not found`, type: "not_found" } }),
+    );
+    return;
+  }
+
+  journal.add({
+    method,
+    path,
+    headers: flattenHeaders(req.headers),
+    body: null,
+    response: { status: 200, fixture: null },
+  });
+
+  const created_at = Math.floor(Date.now() / 1000);
+  const body: Record<string, unknown> = {
+    id: video.id,
+    status: video.status,
+    created_at,
+  };
+  if (video.url) body.url = video.url;
+
+  res.writeHead(200, { "Content-Type": "application/json" });
+  res.end(JSON.stringify(body));
+}

Method	Path	Format
POST	/v1/images/generations	JSON (OpenAI)
POST	/v1beta/models/{model}:predict	JSON (Gemini Imagen)
Format	Content-Type	Description
mp3	audio/mpeg	Default format, widely supported
opus	audio/opus	Low latency, good for streaming
aac	audio/aac	Preferred for mobile devices
flac	audio/flac	Lossless compression
wav	audio/wav	Uncompressed, no decoding overhead
pcm	audio/pcm	Raw samples, 24kHz 16-bit signed little-endian
Method	Path	Format
POST	/v1/videos	JSON (create video job)
GET	/v1/videos/{id}	JSON (poll status)