software-mansion · barhanc · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md b/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md
@@ -200,14 +200,18 @@ const result = await model.transcribe(audioBuffer, { verbose: true });
 
 ### Returns
 
-The hook returns an object with:
-
-- `transcribe(audio, options)`: One-shot transcription.
-- `stream(options)`: Async generator for streaming results.
-- `streamInsert(audio)`: Push audio to the stream buffer.
-- `streamStop()`: Finish the current stream.
-- `isGenerating`: Boolean indicating if the model is busy.
-- `loading`: Boolean indicating if the model is being loaded.
+The hook returns a [`SpeechToTextType`](../../06-api-reference/interfaces/SpeechToTextType.md) object containing:
+
+- `error`: `null | RnExecutorchError` - Contains the error message if the model failed to load.
+- `isReady`: `boolean` - Indicates whether the model has successfully loaded and is ready for inference.
+- `isGenerating`: `boolean` - Indicates whether the model is currently processing an inference.
+- `downloadProgress`: `number` - Tracks the progress of the model download process as a value between `0` and `1`.
+- `transcribe(audio, options)`: Starts a transcription process for a given input array, which should be a waveform at 16kHz. Returns a promise resolving to a [`TranscriptionResult`](../../06-api-reference/interfaces/TranscriptionResult.md).
+- `stream(options)`: Starts a streaming transcription process. Asynchronous generator that yields objects containing `committed` and `nonCommitted` transcriptions, both of type [`TranscriptionResult`](../../06-api-reference/interfaces/TranscriptionResult.md).
+- `streamInsert(audio)`: Inserts a chunk of audio data (sampled at 16kHz) into the ongoing streaming transcription.
+- `streamStop()`: Stops the ongoing streaming transcription process.
+- `encode(audio)`: Runs the encoding part of the model on the provided waveform. Returns a promise resolving to the encoded `Float32Array`.
+- `decode(tokens, encoderOutput)`: Runs the decoder of the model with the given tokens (`Int32Array`) and encoder output (`Float32Array`). Returns a promise resolving to the decoded `Float32Array`.
 
 ## Supported models
 

diff --git a/docs/docs/03-hooks/01-natural-language-processing/useVAD.md b/docs/docs/03-hooks/01-natural-language-processing/useVAD.md
@@ -56,8 +56,10 @@ You can fine-tune the streaming behavior via the `options` object:
 
 ```tsx
 import { useVAD, models } from 'react-native-executorch';
+import { AudioRecorder } from 'react-native-audio-api';
 
 const model = useVAD({ model: models.vad.fsmn_vad() });
+const recorder = new AudioRecorder();
 
 const startLiveVAD = async () => {
   // Start the continuous streaming listener
@@ -70,21 +72,25 @@ const startLiveVAD = async () => {
     },
   });
 
-  // Example: Hook into your audio recorder's data event
-  audioRecorder.on('data', (chunk: Float32Array) => {
-    model.streamInsert(chunk);
-  });
+  // Capture microphone input at 16kHz
+  recorder.onAudioReady(
+    { sampleRate: 16000, bufferLength: 1600, channelCount: 1 },
+    (chunk) => model.streamInsert(chunk.buffer.getChannelData(0))
+  );
+
+  await recorder.start();
 };
 
 const stopLiveVAD = () => {
+  recorder.stop();
   model.streamStop();
 };
 ```
 
 ### Arguments & Returns
 
 - **Arguments**: `useVAD` takes a [`VADProps`](../../06-api-reference/interfaces/VADProps.md) object containing the `model` and an optional `preventLoad` flag.
-- **Returns**: A [`VADType`](../../06-api-reference/interfaces/VADType.md) object providing `forward`, `stream`, `streamInsert`, and `streamStop` methods, along with `isReady` and `error` states.
+- **Returns**: A [`VADType`](../../06-api-reference/interfaces/VADType.md) object providing `forward`, `stream`, `streamInsert`, and `streamStop` methods, along with `error`, `isReady`, `isGenerating`, and `downloadProgress` states.
 
 ## Supported models
 

diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/SpeechToTextModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/SpeechToTextModule.md
@@ -98,20 +98,20 @@ const model = await SpeechToTextModule.fromModelName(
 AudioManager.setAudioSessionOptions({
   iosCategory: 'playAndRecord',
   iosMode: 'spokenAudio',
-  iosOptions: ['allowBluetooth', 'defaultToSpeaker'],
+  iosOptions: ['allowBluetoothHFP', 'defaultToSpeaker'],
 });
 await AudioManager.requestRecordingPermissions();
 
 // 2. Setup Audio Recorder
-const recorder = new AudioRecorder({
-  sampleRate: 16000,
-  channelCount: 1,
-});
+const recorder = new AudioRecorder();
 
-recorder.onAudioReady((chunk) => {
-  // Feed chunks directly into the model's buffer
-  model.streamInsert(chunk.buffer.getChannelData(0));
-});
+recorder.onAudioReady(
+  { sampleRate: 16000, bufferLength: 1600, channelCount: 1 },
+  (chunk) => {
+    // Feed chunks directly into the model's buffer
+    model.streamInsert(chunk.buffer.getChannelData(0));
+  }
+);
 
 await recorder.start();
 

diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
@@ -100,8 +100,8 @@ const tts = await TextToSpeechModule.fromModelName(
 const audioContext = new AudioContext({ sampleRate: 24000 });
 
 try {
+  tts.streamInsert('This is a streaming test, with a sample input.');
   for await (const chunk of tts.stream({
-    text: 'This is a streaming test, with a sample input.',
     speed: 1.0,
   })) {
     // Play each chunk sequentially
@@ -135,8 +135,10 @@ const tts = await TextToSpeechModule.fromModelName(
 const waveform = await tts.forward('həlˈO wˈɜɹld!', 1.0, false);
 
 // Or stream from phonemes
+tts.streamInsert(
+  'ɐ mˈæn hˌu dˈʌzᵊnt tɹˈʌst hɪmsˈɛlf, kæn nˈɛvəɹ ɹˈiᵊli tɹˈʌst ˈɛniwˌʌn ˈɛls.'
+);
 for await (const chunk of tts.stream({
-  text: 'ɐ mˈæn hˌu dˈʌzᵊnt tɹˈʌst hɪmsˈɛlf, kæn nˈɛvəɹ ɹˈiᵊli tɹˈʌst ˈɛniwˌʌn ˈɛls.',
   speed: 1.0,
   phonemize: false,
 })) {