Skip to content

Commit cf19b8a

Browse files
feat(e2e): real media generation e2e tests (#447)
* feat(e2e): add media generation infrastructure - Update aimock to latest (multimodal endpoint support) - Add video-gen to Feature type, ALL_FEATURES, and feature configs - Fix feature-support provider sets: image-gen (openai, grok), tts/transcription/video-gen (openai only). Remove fake chat-routed sets - Add media-providers.ts with adapter factories for all 4 media types (image, tts, transcription, video) following the existing providers.ts pattern * feat(e2e): add media generation API routes and server functions - Rewrite api.image.ts, api.tts.ts, api.transcription.ts to use media-providers adapters with testId/aimockPort support - Create HTTP stream variants (api.*.stream.ts) using toHttpResponse - Create api.video.ts + api.video.stream.ts for video generation - All routes read from body.data (connection adapter format) - Add server-functions.ts with createServerFn wrappers for fetcher transport mode. Video fetcher handles full create+poll lifecycle. * feat(e2e): add media generation UI and fixtures - Create ImageGenUI, TTSUI, TranscriptionUI, VideoGenUI components with mode prop (sse | http-stream | fetcher) for transport selection - Wire media features into $feature.tsx with MEDIA_FEATURES set - Rewrite fixtures: image-gen uses data URI url, tts uses base64 audio, transcription uses audio.mpeg filename match - Register transcription/video fixtures programmatically in global-setup (aimock JSON loader doesn't set match.endpoint needed by these routes) - Video fixture includes id + status for OpenAI SDK compatibility * feat(e2e): add media generation e2e test specs - Add mode param to featureUrl helper for transport selection - Add fillPrompt, fillTextInput, clickGenerate, waitForGenerationComplete helpers with React hydration wait and pressSequentially fallback - Rewrite image-gen, tts, transcription specs with 3 transport modes (sse, http-stream, fetcher) per provider - Create video-gen spec with 3 transport modes (60s timeout for polling) * ci: apply automated fixes * fix(e2e): fix CI failures in global-setup and hydration timing - Replace allowlist approach in global-setup with exclusion list to avoid missing fixture directories (caused one-shot-text and text-tool-text failures in CI) - Fix clickGenerate helper: wait for networkidle then verify the click actually triggered React by checking status leaves idle, with retry if hydration wasn't complete (fixes transcription idle state issue) * ci: apply automated fixes * ci: apply automated fixes --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent db5c3d3 commit cf19b8a

31 files changed

Lines changed: 1432 additions & 217 deletions

pnpm-lock.yaml

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

testing/e2e/fixtures/image-gen/basic.json

Lines changed: 4 additions & 2 deletions
Large diffs are not rendered by default.

testing/e2e/fixtures/transcription/basic.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
{
22
"fixtures": [
33
{
4-
"match": { "userMessage": "[transcription] transcribe the audio clip" },
4+
"match": { "userMessage": "audio.mpeg" },
55
"response": {
6-
"content": "Transcription result: I would like to buy a Fender Stratocaster please. The audio is clear with no background noise."
6+
"transcription": {
7+
"text": "I would like to buy a Fender Stratocaster please"
8+
}
79
}
810
}
911
]

testing/e2e/fixtures/tts/basic.json

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
{
22
"fixtures": [
33
{
4-
"match": {
5-
"userMessage": "[tts] generate speech for welcome to the guitar store"
6-
},
4+
"match": { "userMessage": "welcome to the guitar store" },
75
"response": {
8-
"content": "Audio generated successfully. The text 'Welcome to the guitar store' has been converted to speech using a warm, professional voice."
6+
"audio": "SGVsbG8gd29ybGQ="
97
}
108
}
119
]
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"fixtures": [
3+
{
4+
"match": { "userMessage": "a guitar being played in a store" },
5+
"response": {
6+
"video": {
7+
"url": "https://example.com/guitar-store.mp4",
8+
"duration": 10
9+
}
10+
}
11+
}
12+
]
13+
}

testing/e2e/global-setup.ts

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,51 @@ import { fileURLToPath } from 'url'
66
const __filename = fileURLToPath(import.meta.url)
77
const __dirname = path.dirname(__filename)
88

9+
/**
10+
* Directories to skip when loading JSON fixtures.
11+
* - 'recorded' is for record-mode output
12+
* - 'video-gen' uses programmatic registration (needs match.endpoint)
13+
*/
14+
const SKIP_FIXTURE_DIRS = new Set(['recorded', 'video-gen'])
15+
916
export default async function globalSetup() {
1017
const mock = new LLMock({ port: 4010, host: '127.0.0.1', logLevel: 'info' })
1118

19+
// Load all JSON fixture directories (except skipped ones)
1220
const fixturesDir = path.resolve(__dirname, 'fixtures')
1321
const entries = fs.readdirSync(fixturesDir, { withFileTypes: true })
1422
for (const entry of entries) {
15-
if (entry.isDirectory() && entry.name !== 'recorded') {
23+
if (entry.isDirectory() && !SKIP_FIXTURE_DIRS.has(entry.name)) {
1624
await mock.loadFixtureDir(path.join(fixturesDir, entry.name))
1725
}
1826
}
1927

28+
// Register media fixtures programmatically (require match.endpoint)
29+
registerMediaFixtures(mock)
30+
2031
await mock.start()
2132
console.log(`[aimock] started on port 4010`)
2233
;(globalThis as any).__aimock = mock
2334
}
35+
36+
function registerMediaFixtures(mock: LLMock) {
37+
// Transcription: onTranscription sets match.endpoint = "transcription"
38+
mock.onTranscription({
39+
transcription: {
40+
text: 'I would like to buy a Fender Stratocaster please',
41+
},
42+
})
43+
44+
// Video: onVideo sets match.endpoint = "video"
45+
// id + status are required for the OpenAI SDK's videos API to work:
46+
// - POST /v1/videos reads response.id for the job ID
47+
// - GET /v1/videos/{id} reads response.status to determine completion
48+
mock.onVideo('a guitar being played in a store', {
49+
video: {
50+
url: 'https://example.com/guitar-store.mp4',
51+
duration: 10,
52+
id: 'video-job-e2e',
53+
status: 'completed',
54+
},
55+
})
56+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import { useState } from 'react'
2+
import {
3+
useGenerateImage,
4+
fetchServerSentEvents,
5+
fetchHttpStream,
6+
} from '@tanstack/ai-react'
7+
import { generateImageFn } from '@/lib/server-functions'
8+
import type { ImageGenerationResult } from '@tanstack/ai'
9+
import type { Mode, Provider } from '@/lib/types'
10+
11+
interface ImageGenUIProps {
12+
provider: Provider
13+
mode: Mode
14+
testId?: string
15+
aimockPort?: number
16+
}
17+
18+
export function ImageGenUI({
19+
provider,
20+
mode,
21+
testId,
22+
aimockPort,
23+
}: ImageGenUIProps) {
24+
const [prompt, setPrompt] = useState('')
25+
26+
const connectionOptions = () => {
27+
const body = { provider, numberOfImages: 1, testId, aimockPort }
28+
29+
if (mode === 'sse') {
30+
return { connection: fetchServerSentEvents('/api/image'), body }
31+
}
32+
if (mode === 'http-stream') {
33+
return { connection: fetchHttpStream('/api/image/stream'), body }
34+
}
35+
return {
36+
fetcher: async (input: { prompt: string }) => {
37+
return generateImageFn({
38+
data: {
39+
prompt: input.prompt,
40+
provider,
41+
numberOfImages: 1,
42+
aimockPort,
43+
testId,
44+
},
45+
}) as Promise<ImageGenerationResult>
46+
},
47+
}
48+
}
49+
50+
const { generate, result, isLoading, error, status } =
51+
useGenerateImage(connectionOptions())
52+
53+
return (
54+
<div className="p-4 space-y-4">
55+
<div className="flex gap-2">
56+
<input
57+
data-testid="prompt-input"
58+
type="text"
59+
value={prompt}
60+
onChange={(e) => setPrompt(e.target.value)}
61+
placeholder="Describe the image..."
62+
className="flex-1 bg-gray-800 border border-gray-700 rounded px-3 py-2 text-sm"
63+
/>
64+
<button
65+
data-testid="generate-button"
66+
onClick={() => generate({ prompt })}
67+
disabled={!prompt.trim() || isLoading}
68+
className="px-4 py-2 bg-orange-500 text-white rounded text-sm font-medium disabled:opacity-50"
69+
>
70+
Generate
71+
</button>
72+
</div>
73+
<div data-testid="generation-status">
74+
{status === 'idle'
75+
? 'idle'
76+
: isLoading
77+
? 'loading'
78+
: error
79+
? 'error'
80+
: result
81+
? 'complete'
82+
: 'idle'}
83+
</div>
84+
{error && (
85+
<div data-testid="generation-error" className="text-red-400 text-sm">
86+
{error.message}
87+
</div>
88+
)}
89+
{result && (
90+
<div className="grid grid-cols-2 gap-4">
91+
{result.images.map((img, i) => (
92+
<img
93+
key={i}
94+
data-testid="generated-image"
95+
src={img.url || `data:image/png;base64,${img.b64Json}`}
96+
alt={`Generated ${i + 1}`}
97+
className="rounded border border-gray-700"
98+
/>
99+
))}
100+
</div>
101+
)}
102+
</div>
103+
)
104+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import { useState } from 'react'
2+
import {
3+
useGenerateSpeech,
4+
fetchServerSentEvents,
5+
fetchHttpStream,
6+
} from '@tanstack/ai-react'
7+
import { generateSpeechFn } from '@/lib/server-functions'
8+
import type { TTSResult } from '@tanstack/ai'
9+
import type { Mode, Provider } from '@/lib/types'
10+
11+
interface TTSUIProps {
12+
provider: Provider
13+
mode: Mode
14+
testId?: string
15+
aimockPort?: number
16+
}
17+
18+
export function TTSUI({ provider, mode, testId, aimockPort }: TTSUIProps) {
19+
const [text, setText] = useState('')
20+
21+
const connectionOptions = () => {
22+
const body = { provider, testId, aimockPort }
23+
24+
if (mode === 'sse') {
25+
return { connection: fetchServerSentEvents('/api/tts'), body }
26+
}
27+
if (mode === 'http-stream') {
28+
return { connection: fetchHttpStream('/api/tts/stream'), body }
29+
}
30+
return {
31+
fetcher: async (input: { text: string; voice?: string }) => {
32+
return generateSpeechFn({
33+
data: {
34+
text: input.text,
35+
voice: input.voice,
36+
provider,
37+
aimockPort,
38+
testId,
39+
},
40+
}) as Promise<TTSResult>
41+
},
42+
}
43+
}
44+
45+
const { generate, result, isLoading, error, status } =
46+
useGenerateSpeech(connectionOptions())
47+
48+
return (
49+
<div className="p-4 space-y-4">
50+
<div className="flex gap-2">
51+
<input
52+
data-testid="text-input"
53+
type="text"
54+
value={text}
55+
onChange={(e) => setText(e.target.value)}
56+
placeholder="Text to speak..."
57+
className="flex-1 bg-gray-800 border border-gray-700 rounded px-3 py-2 text-sm"
58+
/>
59+
<button
60+
data-testid="generate-button"
61+
onClick={() => generate({ text })}
62+
disabled={!text.trim() || isLoading}
63+
className="px-4 py-2 bg-orange-500 text-white rounded text-sm font-medium disabled:opacity-50"
64+
>
65+
Generate
66+
</button>
67+
</div>
68+
<div data-testid="generation-status">
69+
{status === 'idle'
70+
? 'idle'
71+
: isLoading
72+
? 'loading'
73+
: error
74+
? 'error'
75+
: result
76+
? 'complete'
77+
: 'idle'}
78+
</div>
79+
{error && (
80+
<div data-testid="generation-error" className="text-red-400 text-sm">
81+
{error.message}
82+
</div>
83+
)}
84+
{result && (
85+
<audio
86+
data-testid="generated-audio"
87+
src={`data:audio/${result.format || 'mp3'};base64,${result.audio}`}
88+
controls
89+
/>
90+
)}
91+
</div>
92+
)
93+
}

0 commit comments

Comments
 (0)