1212import { createClient , type SanityClient } from 'next-sanity' ;
1313import { apiVersion , dataset , projectId } from '@/sanity/lib/api' ;
1414import { generateSpeechFromScript } from '@/lib/services/elevenlabs' ;
15+ import { generatePerSceneAudio } from '@/lib/services/elevenlabs' ;
16+ import type { WordTimestamp } from '@/lib/utils/audio-timestamps' ;
1517import { uploadAudioToSanity } from '@/lib/services/sanity-upload' ;
1618import { getBRollForScenes } from '@/lib/services/pexels' ;
1719import { startBothRenders } from '@/lib/services/remotion' ;
@@ -24,6 +26,11 @@ interface VideoScene {
2426 visualDescription ?: string ;
2527 bRollKeywords ?: string [ ] ;
2628 durationEstimate ?: number ;
29+ sceneType ?: string ;
30+ code ?: { snippet : string ; language : string ; highlightLines ?: number [ ] } ;
31+ list ?: { items : string [ ] ; icon ?: string } ;
32+ comparison ?: { leftLabel : string ; rightLabel : string ; rows : { left : string ; right : string } [ ] } ;
33+ mockup ?: { deviceType : string ; screenContent : string } ;
2734}
2835
2936interface VideoScript {
@@ -129,14 +136,60 @@ export async function processVideoProduction(documentId: string): Promise<void>
129136 console . log ( `[VIDEO-PIPELINE] Updating status to "audio_gen"` ) ;
130137 await updateStatus ( client , documentId , { status : 'audio_gen' } ) ;
131138
132- // Step 4: Generate speech with ElevenLabs
139+ // Step 4: Generate per-scene audio with timestamps (or fallback to single blob)
133140 console . log ( `[VIDEO-PIPELINE] Generating TTS audio...` ) ;
134- const audioBuffer = await generateSpeechFromScript ( {
135- hook : script . hook ,
136- scenes : script . scenes ,
137- cta : script . cta ,
138- } ) ;
139- console . log ( `[VIDEO-PIPELINE] TTS audio generated: ${ audioBuffer . length } bytes` ) ;
141+ let audioBuffer : Buffer ;
142+ let audioDurationSeconds : number ;
143+ let sceneWordTimestamps : ( WordTimestamp [ ] | undefined ) [ ] = [ ] ;
144+
145+ try {
146+ console . log ( `[VIDEO-PIPELINE] Attempting per-scene audio generation with timestamps...` ) ;
147+ const perSceneResult = await generatePerSceneAudio ( {
148+ hook : script . hook ,
149+ scenes : script . scenes ,
150+ cta : script . cta ,
151+ } ) ;
152+
153+ // Concatenate all audio buffers into one combined buffer
154+ const allBuffers = [
155+ perSceneResult . hook . audioBuffer ,
156+ ...perSceneResult . scenes . map ( s => s . audioBuffer ) ,
157+ perSceneResult . cta . audioBuffer ,
158+ ] ;
159+ audioBuffer = Buffer . concat ( allBuffers ) ;
160+
161+ // Use actual duration from ElevenLabs (much more accurate than estimates)
162+ audioDurationSeconds = Math . ceil ( perSceneResult . totalDurationMs / 1000 ) ;
163+
164+ // Collect per-scene word timestamps for Remotion
165+ sceneWordTimestamps = perSceneResult . scenes . map ( s => s . wordTimestamps ) ;
166+
167+ console . log (
168+ `[VIDEO-PIPELINE] Per-scene audio generated: ${ allBuffers . length } segments, ` +
169+ `${ audioBuffer . length } bytes, ${ audioDurationSeconds } s total`
170+ ) ;
171+ } catch ( perSceneError ) {
172+ console . warn (
173+ `[VIDEO-PIPELINE] Per-scene audio failed, falling back to single blob: ` +
174+ `${ perSceneError instanceof Error ? perSceneError . message : String ( perSceneError ) } `
175+ ) ;
176+
177+ // Fallback: single blob without timestamps
178+ audioBuffer = await generateSpeechFromScript ( {
179+ hook : script . hook ,
180+ scenes : script . scenes ,
181+ cta : script . cta ,
182+ } ) ;
183+
184+ // Estimate duration from scene estimates (existing behavior)
185+ const estimatedDurationFromScenes = script . scenes . reduce (
186+ ( sum , s ) => sum + ( s . durationEstimate || 15 ) ,
187+ 0
188+ ) ;
189+ audioDurationSeconds = estimatedDurationFromScenes + 10 ;
190+ }
191+
192+ console . log ( `[VIDEO-PIPELINE] TTS audio: ${ audioBuffer . length } bytes, ${ audioDurationSeconds } s` ) ;
140193
141194 // Step 5: Upload audio to Sanity
142195 console . log ( `[VIDEO-PIPELINE] Uploading audio to Sanity...` ) ;
@@ -167,16 +220,7 @@ export async function processVideoProduction(documentId: string): Promise<void>
167220 bRollUrls [ sceneIndex ] = clip . videoUrl ;
168221 } ) ;
169222
170- // Step 8: Calculate audio duration from scene estimates (or estimate from buffer)
171- const estimatedDurationFromScenes = script . scenes . reduce (
172- ( sum , s ) => sum + ( s . durationEstimate || 15 ) ,
173- 0
174- ) ;
175- // Add ~5s for hook and ~5s for CTA
176- const audioDurationSeconds = estimatedDurationFromScenes + 10 ;
177- console . log ( `[VIDEO-PIPELINE] Estimated audio duration: ${ audioDurationSeconds } s` ) ;
178-
179- // Step 9: Fetch sponsor data if sponsorSlot is set
223+ // Step 8: Fetch sponsor data if sponsorSlot is set
180224 let sponsor : { name : string ; logoUrl ?: string ; message ?: string } | undefined ;
181225 if ( doc . sponsorSlot ?. _ref ) {
182226 console . log ( `[VIDEO-PIPELINE] Fetching sponsor data: ${ doc . sponsorSlot . _ref } ` ) ;
@@ -190,13 +234,16 @@ export async function processVideoProduction(documentId: string): Promise<void>
190234 }
191235 }
192236
193- // Step 10 : Start Remotion renders for both formats (no polling — returns immediately)
237+ // Step 9 : Start Remotion renders for both formats (no polling — returns immediately)
194238 console . log ( `[VIDEO-PIPELINE] Starting Remotion renders (main + short)...` ) ;
195239 const renderResults = await startBothRenders ( {
196240 audioUrl,
197241 script : {
198242 hook : script . hook ,
199- scenes : script . scenes ,
243+ scenes : script . scenes . map ( ( s , i ) => ( {
244+ ...s ,
245+ wordTimestamps : sceneWordTimestamps [ i ] ,
246+ } ) ) ,
200247 cta : script . cta ,
201248 } ,
202249 bRollUrls,
@@ -207,7 +254,7 @@ export async function processVideoProduction(documentId: string): Promise<void>
207254 `[VIDEO-PIPELINE] Renders started — mainRenderId: ${ renderResults . mainRenderId } , shortRenderId: ${ renderResults . shortRenderId } `
208255 ) ;
209256
210- // Step 11 : Store render IDs and set status to "rendering"
257+ // Step 10 : Store render IDs and set status to "rendering"
211258 // The check-renders cron will poll for completion, download, upload, and set video_gen.
212259 console . log ( `[VIDEO-PIPELINE] Storing render IDs and setting status to "rendering"` ) ;
213260 await updateStatus ( client , documentId , {
0 commit comments