@@ -7,7 +7,7 @@ import type {
77} from "@hypr/plugin-transcription" ;
88
99import type { BatchPersistCallback } from "./transcript" ;
10- import { transformWordEntries } from "./utils" ;
10+ import { transformWordEntries , type WordEntry } from "./utils" ;
1111
1212import { type RuntimeSpeakerHint , type WordLike } from "~/stt/segment" ;
1313
@@ -39,7 +39,7 @@ export type BatchState = {
3939export type BatchActions = {
4040 handleBatchStarted : ( sessionId : string , phase ?: BatchPhase ) => void ;
4141 handleBatchCompleted : ( sessionId : string ) => void ;
42- handleBatchResponse : ( sessionId : string , response : BatchResponse ) => void ;
42+ handleBatchResponse : ( sessionId : string , response : BatchResponse ) => boolean ;
4343 handleBatchResponseStreamed : (
4444 sessionId : string ,
4545 event : BatchStreamEvent ,
@@ -57,6 +57,9 @@ export type BatchActions = {
5757 clearBatchPersist : ( sessionId : string ) => void ;
5858} ;
5959
60+ export const EMPTY_BATCH_TRANSCRIPT_ERROR =
61+ "No speech was detected in the audio." ;
62+
6063export const createBatchSlice = < T extends BatchState > (
6164 set : StoreApi < T > [ "setState" ] ,
6265 get : StoreApi < T > [ "getState" ] ,
@@ -112,7 +115,7 @@ export const createBatchSlice = <T extends BatchState>(
112115
113116 const [ words , hints ] = transformBatch ( response ) ;
114117 if ( ! words . length ) {
115- return ;
118+ return false ;
116119 }
117120
118121 persist ?.( words , hints , { mode : "replace" } ) ;
@@ -130,6 +133,8 @@ export const createBatchSlice = <T extends BatchState>(
130133 batchPreview : restPreview ,
131134 } ;
132135 } ) ;
136+
137+ return true ;
133138 } ,
134139
135140 handleBatchResponseStreamed : ( sessionId , event ) => {
@@ -284,13 +289,22 @@ function transformBatch(
284289
285290 response . results . channels . forEach ( ( channel , channelIndex ) => {
286291 const alternative = channel . alternatives [ 0 ] ;
287- if ( ! alternative || ! alternative . words || ! alternative . words . length ) {
292+ if ( ! alternative ) {
288293 return ;
289294 }
290295
291- const [ words , hints ] = transformWordEntries (
296+ const wordEntries = wordEntriesFromTranscript (
292297 alternative . words ,
293298 alternative . transcript ,
299+ {
300+ channel : channelIndex ,
301+ durationSeconds : getBatchDurationSeconds ( response ) ,
302+ } ,
303+ ) ;
304+
305+ const [ words , hints ] = transformWordEntries (
306+ wordEntries ,
307+ alternative . transcript ,
294308 channelIndex ,
295309 ) ;
296310
@@ -357,9 +371,19 @@ function mergeBatchPreview(
357371 return preview ;
358372 }
359373
360- const [ incomingWords , incomingHints ] = transformWordEntries (
374+ const wordEntries = wordEntriesFromTranscript (
361375 alternative . words ,
362376 alternative . transcript ,
377+ {
378+ channel : channelIndex ,
379+ startSeconds : response . start ,
380+ durationSeconds : response . duration ,
381+ } ,
382+ ) ;
383+
384+ const [ incomingWords , incomingHints ] = transformWordEntries (
385+ wordEntries ,
386+ alternative . transcript ,
363387 channelIndex ,
364388 ) ;
365389 if ( incomingWords . length === 0 ) {
@@ -440,3 +464,56 @@ function getBatchStreamPercentage(event: BatchStreamEvent): number {
440464 return 0 ;
441465 }
442466}
467+
468+ function wordEntriesFromTranscript (
469+ entries : WordEntry [ ] | null | undefined ,
470+ transcript : string ,
471+ {
472+ channel,
473+ startSeconds = 0 ,
474+ durationSeconds,
475+ } : {
476+ channel : number ;
477+ startSeconds ?: number ;
478+ durationSeconds ?: number ;
479+ } ,
480+ ) : WordEntry [ ] {
481+ if ( entries ?. length || ! transcript . trim ( ) ) {
482+ return entries ?? [ ] ;
483+ }
484+
485+ const tokens = transcript . trim ( ) . split ( / \s + / ) . filter ( Boolean ) ;
486+ if ( ! tokens . length ) {
487+ return [ ] ;
488+ }
489+
490+ const duration = Math . max (
491+ durationSeconds && Number . isFinite ( durationSeconds )
492+ ? durationSeconds
493+ : tokens . length * 0.4 ,
494+ tokens . length * 0.05 ,
495+ ) ;
496+
497+ return tokens . map ( ( token , index ) => ( {
498+ word : token ,
499+ punctuated_word : token ,
500+ start : startSeconds + ( index / tokens . length ) * duration ,
501+ end : startSeconds + ( ( index + 1 ) / tokens . length ) * duration ,
502+ channel,
503+ speaker : null ,
504+ } ) ) ;
505+ }
506+
507+ function getBatchDurationSeconds ( response : BatchResponse ) : number | undefined {
508+ const metadata = response . metadata ;
509+ if ( ! metadata || typeof metadata !== "object" || Array . isArray ( metadata ) ) {
510+ return undefined ;
511+ }
512+
513+ const duration = ( metadata as Record < string , unknown > ) . duration ;
514+ return typeof duration === "number" &&
515+ Number . isFinite ( duration ) &&
516+ duration > 0
517+ ? duration
518+ : undefined ;
519+ }
0 commit comments