@@ -645,56 +645,21 @@ async function generateCaptionsFromVTT(params: {
645645 const captions : CaptionData [ ] = [ ] ;
646646 const maxTimeGap = MAX_TIME_GAP_SECONDS ;
647647
648- // First, generate all word groups from all segments
649- const allWordGroups : Array < Array < TimedWord > > = [ ] ;
648+ // Generate word groups from segments and create captions directly
650649 syntheticSegments . forEach ( ( segment ) => {
651650 const words = segment . words ;
652651 const sentenceLength = segment . transcript . length ;
653652 const wordGroups = groupWordsByChars ( words , sentenceLength , maxChars , maxTimeGap ) ;
654653
654+ // Create captions directly from word groups
655655 wordGroups . forEach ( group => {
656- allWordGroups . push ( group ) ;
657- } ) ;
658- } ) ;
659-
660- // Now process each word group with context from previous/next word groups
661- allWordGroups . forEach ( ( currentGroup , groupIndex ) => {
662- const currentText = currentGroup . map ( w => w . word ) . join ( ' ' ) ;
663-
664- // Get previous word group text if available and within time gap
665- let previousText : string | null = null ;
666- if ( groupIndex > 0 ) {
667- const previousGroup = allWordGroups [ groupIndex - 1 ] ;
668- const timeGap = currentGroup [ 0 ] . start - previousGroup [ previousGroup . length - 1 ] . end ;
669- if ( timeGap < maxTimeGap ) {
670- previousText = previousGroup . map ( w => w . word ) . join ( ' ' ) ;
671- }
672- }
673-
674- // Get next word group text if available and within time gap
675- let nextText : string | null = null ;
676- if ( groupIndex < allWordGroups . length - 1 ) {
677- const nextGroup = allWordGroups [ groupIndex + 1 ] ;
678- const timeGap = nextGroup [ 0 ] . start - currentGroup [ currentGroup . length - 1 ] . end ;
679- if ( timeGap < maxTimeGap ) {
680- nextText = nextGroup . map ( w => w . word ) . join ( ' ' ) ;
656+ if ( group . length > 0 ) {
657+ captions . push ( {
658+ startTime : group [ 0 ] . start ,
659+ endTime : group [ group . length - 1 ] . end ,
660+ text : group . map ( w => w . word ) . join ( ' ' )
661+ } ) ;
681662 }
682- }
683-
684- // Build caption text with context
685- let text = currentText ;
686- if ( previousText || nextText ) {
687- const contextLines : string [ ] = [ ] ;
688- // if (previousText) contextLines.push(previousText);
689- contextLines . push ( currentText ) ;
690- // if (nextText) contextLines.push(nextText);
691- text = contextLines . join ( '\n' ) ;
692- }
693-
694- captions . push ( {
695- startTime : currentGroup [ 0 ] . start ,
696- endTime : currentGroup [ currentGroup . length - 1 ] . end ,
697- text
698663 } ) ;
699664 } ) ;
700665
0 commit comments