@@ -32,7 +32,7 @@ export class SpeechToTextController {
3232 private streamWaveform : number [ ] = [ ] ;
3333 private isDecodingChunk = false ;
3434 private numberOfDecodedChunks = 0 ;
35- private numberOfDeletedChunks = 0 ;
35+ private isChunkDeleted = false ;
3636 private numOfChunks = 0 ;
3737
3838 // User callbacks
@@ -162,12 +162,12 @@ export class SpeechToTextController {
162162 }
163163 }
164164
165- private chunkWaveform ( waveform : number [ ] ) {
165+ private chunkWaveform ( waveform : number [ ] , streamingSlice ?: boolean ) {
166166 this . chunks = [ ] ;
167167 this . numOfChunks = Math . ceil ( waveform . length / this . windowSize ) ;
168168 for ( let i = 0 ; i < this . numOfChunks ; i ++ ) {
169169 let chunk ;
170- if ( i == 0 && this . numberOfDeletedChunks > 0 ) {
170+ if ( i == 0 && streamingSlice ) {
171171 chunk = waveform . slice (
172172 0 ,
173173 Math . min (
@@ -381,24 +381,21 @@ export class SpeechToTextController {
381381 this . streamWaveform = [ ] ;
382382 this . prevSeq = [ ] ;
383383 this . numberOfDecodedChunks = 0 ;
384+ this . isChunkDeleted = false ;
384385 this . decodedTranscribeCallback ( [ ] ) ;
385386 this . isGeneratingCallback ( true ) ;
386387 }
387388 this . streamWaveform = [ ...this . streamWaveform , ...waveform ] ;
388- this . chunkWaveform ( this . streamWaveform ) ;
389+ this . chunkWaveform ( this . streamWaveform , this . isChunkDeleted ) ;
389390 if ( ! this . isDecodingChunk && streamAction != 2 ) {
390391 this . isDecodingChunk = true ;
391392 while (
392- this . chunks . at ( - this . numOfChunks ) ?. length ==
393+ this . chunks . at ( 0 ) ?. length ==
393394 2 * this . overlapSeconds + this . windowSize ||
394395 ( this . numberOfDecodedChunks == 0 &&
395- this . chunks . at ( - this . numOfChunks ) ?. length ==
396- this . windowSize + this . overlapSeconds )
396+ this . chunks . at ( 0 ) ?. length == this . windowSize + this . overlapSeconds )
397397 ) {
398- let seq = await this . decodeChunk (
399- this . chunks . at ( - this . numOfChunks ) ! ,
400- audioLanguage
401- ) ;
398+ let seq = await this . decodeChunk ( this . chunks . at ( 0 ) ! , audioLanguage ) ;
402399 const numSpecialTokens = ( await this . getStartingTokenIds ( audioLanguage ) )
403400 . length ;
404401 // remove sos/eos token and 3 additional ones
@@ -418,28 +415,28 @@ export class SpeechToTextController {
418415 if ( this . seqs . length < 2 ) {
419416 continue ;
420417 }
418+ // remove data, which was processed and saved to this.seqs
419+ if ( this . numOfChunks > 2 ) {
420+ if ( ! this . isChunkDeleted ) {
421+ this . streamWaveform = this . streamWaveform . slice (
422+ - (
423+ this . streamWaveform . length -
424+ ( this . windowSize + this . overlapSeconds )
425+ )
426+ ) ;
427+ } else {
428+ this . streamWaveform = this . streamWaveform . slice (
429+ - ( this . streamWaveform . length - this . windowSize )
430+ ) ;
431+ }
432+ this . isChunkDeleted = true ;
433+ this . numOfChunks -- ;
434+ }
421435 }
422436 this . isDecodingChunk = false ;
423437 }
424- // remove data from waveform, which was processed and saved to this.seqs
425- while ( this . numOfChunks > 2 ) {
426- if ( this . numberOfDeletedChunks == 0 ) {
427- this . streamWaveform = this . streamWaveform . slice (
428- - (
429- this . streamWaveform . length -
430- ( this . windowSize + this . overlapSeconds )
431- )
432- ) ;
433- } else {
434- this . streamWaveform = this . streamWaveform . slice (
435- - ( this . streamWaveform . length - this . windowSize )
436- ) ;
437- }
438- this . numberOfDeletedChunks ++ ;
439- this . numOfChunks -- ;
440- }
441438 while ( this . numOfChunks > 0 && streamAction == STREAMING_ACTION . STOP ) {
442- let seq = await this . decodeChunk ( this . chunks . at ( - this . numOfChunks ) ! ) ;
439+ let seq = await this . decodeChunk ( this . chunks . at ( 0 ) ! ) ;
443440 if ( this . numberOfDecodedChunks == 0 ) {
444441 this . sequence = seq ;
445442 this . decodedTranscribeCallback ( seq ) ;
@@ -465,7 +462,7 @@ export class SpeechToTextController {
465462
466463 private async tokenIdsToText ( tokenIds : number [ ] ) : Promise < string > {
467464 try {
468- return this . nativeTokenizer . decode ( tokenIds ) ;
465+ return this . nativeTokenizer . decode ( tokenIds , true ) ;
469466 } catch ( e ) {
470467 this . onErrorCallback ?.(
471468 new Error ( `An error has ocurred when decoding the token ids: ${ e } ` )
0 commit comments