diff --git a/docs/docs/02-hooks/01-natural-language-processing/useSpeechToText.md b/docs/docs/02-hooks/01-natural-language-processing/useSpeechToText.md index 3256e2e88a..8876bf37eb 100644 --- a/docs/docs/02-hooks/01-natural-language-processing/useSpeechToText.md +++ b/docs/docs/02-hooks/01-natural-language-processing/useSpeechToText.md @@ -78,7 +78,7 @@ For more information on loading resources, take a look at [loading models](../.. | Field | Type | Description | | --------------------------- | ---------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `transcribe` | `(waveform: Float32Array \| number[], options?: DecodingOptions \| undefined) => Promise` | Starts a transcription process for a given input array, which should be a waveform at 16kHz. The second argument is an options object, e.g. `{ language: 'es' }` for multilingual models. Resolves a promise with the output transcription when the model is finished. Passing `number[]` is deprecated. | -| `stream` | `() => Promise` | Starts a streaming transcription process. Use in combination with `streamInsert` to feed audio chunks and `streamStop` to end the stream. Updates `committedTranscription` and `nonCommittedTranscription` as transcription progresses. | +| `stream` | `(options?: DecodingOptions \| undefined) => Promise` | Starts a streaming transcription process. Use in combination with `streamInsert` to feed audio chunks and `streamStop` to end the stream. The argument is an options object, e.g. `{ language: 'es' }` for multilingual models. Updates `committedTranscription` and `nonCommittedTranscription` as transcription progresses. | | `streamInsert` | `(waveform: Float32Array \| number[]) => void` | Inserts a chunk of audio data (sampled at 16kHz) into the ongoing streaming transcription. Call this repeatedly as new audio data becomes available. Passing `number[]` is deprecated. | | `streamStop` | `() => void` | Stops the ongoing streaming transcription process. | | `encode` | `(waveform: Float32Array \| number[]) => Promise` | Runs the encoding part of the model on the provided waveform. Passing `number[]` is deprecated. | diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useSpeechToText.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useSpeechToText.ts index 74ef2c96de..1c974ec52c 100644 --- a/packages/react-native-executorch/src/hooks/natural_language_processing/useSpeechToText.ts +++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useSpeechToText.ts @@ -1,7 +1,7 @@ import { useEffect, useCallback, useState } from 'react'; import { ETError, getError } from '../../Error'; import { SpeechToTextModule } from '../../modules/natural_language_processing/SpeechToTextModule'; -import { SpeechToTextModelConfig } from '../../types/stt'; +import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt'; export const useSpeechToText = ({ model, @@ -65,24 +65,29 @@ export const useSpeechToText = ({ [isReady, isGenerating, modelInstance] ); - const stream = useCallback(async () => { - if (!isReady) throw new Error(getError(ETError.ModuleNotLoaded)); - if (isGenerating) throw new Error(getError(ETError.ModelGenerating)); - setIsGenerating(true); - setCommittedTranscription(''); - setNonCommittedTranscription(''); - let transcription = ''; - try { - for await (const { committed, nonCommitted } of modelInstance.stream()) { - setCommittedTranscription((prev) => prev + committed); - setNonCommittedTranscription(nonCommitted); - transcription += committed; + const stream = useCallback( + async (options?: DecodingOptions) => { + if (!isReady) throw new Error(getError(ETError.ModuleNotLoaded)); + if (isGenerating) throw new Error(getError(ETError.ModelGenerating)); + setIsGenerating(true); + setCommittedTranscription(''); + setNonCommittedTranscription(''); + let transcription = ''; + try { + for await (const { committed, nonCommitted } of modelInstance.stream( + options + )) { + setCommittedTranscription((prev) => prev + committed); + setNonCommittedTranscription(nonCommitted); + transcription += committed; + } + } finally { + setIsGenerating(false); } - } finally { - setIsGenerating(false); - } - return transcription; - }, [isReady, isGenerating, modelInstance]); + return transcription; + }, + [isReady, isGenerating, modelInstance] + ); const wrapper = useCallback( any>(fn: T) => {