diff --git a/apps/speech/App.tsx b/apps/speech/App.tsx
index ab036678e8..ddbfe3c983 100644
--- a/apps/speech/App.tsx
+++ b/apps/speech/App.tsx
@@ -5,6 +5,7 @@ import { SpeechToTextScreen } from './screens/SpeechToTextScreen';
 import ColorPalette from './colors';
 import ExecutorchLogo from './assets/executorch.svg';
 import { Quiz } from './screens/Quiz';
+import { TextToSpeechLLMScreen } from './screens/TextToSpeechLLMScreen';
 import { initExecutorch } from 'react-native-executorch';
 import { ExpoResourceFetcher } from '@react-native-executorch/expo-resource-fetcher';
 
@@ -14,7 +15,7 @@ initExecutorch({
 
 export default function App() {
   const [currentScreen, setCurrentScreen] = useState<
-    'menu' | 'speech-to-text' | 'text-to-speech' | 'quiz'
+    'menu' | 'speech-to-text' | 'text-to-speech' | 'quiz' | 'text-to-speech-llm'
   >('menu');
 
   const goToMenu = () => setCurrentScreen('menu');
@@ -31,6 +32,10 @@ export default function App() {
     return <Quiz onBack={goToMenu} />;
   }
 
+  if (currentScreen === 'text-to-speech-llm') {
+    return <TextToSpeechLLMScreen onBack={goToMenu} />;
+  }
+
   return (
     <View style={styles.container}>
       <ExecutorchLogo width={64} height={64} />
@@ -54,6 +59,12 @@ export default function App() {
         >
           <Text style={styles.buttonText}>Text to Speech - Quiz</Text>
         </TouchableOpacity>
+        <TouchableOpacity
+          style={styles.button}
+          onPress={() => setCurrentScreen('text-to-speech-llm')}
+        >
+          <Text style={styles.buttonText}>Text to Speech - LLM Streaming</Text>
+        </TouchableOpacity>
       </View>
     </View>
   );
diff --git a/apps/speech/screens/TextToSpeechLLMScreen.tsx b/apps/speech/screens/TextToSpeechLLMScreen.tsx
new file mode 100644
index 0000000000..04856cf3e7
--- /dev/null
+++ b/apps/speech/screens/TextToSpeechLLMScreen.tsx
@@ -0,0 +1,323 @@
+import React, { useEffect, useState, useRef } from 'react';
+import {
+  View,
+  Text,
+  StyleSheet,
+  TouchableOpacity,
+  ScrollView,
+} from 'react-native';
+import { SafeAreaProvider, SafeAreaView } from 'react-native-safe-area-context';
+import FontAwesome from '@expo/vector-icons/FontAwesome';
+import SWMIcon from '../assets/swm_icon.svg';
+import {
+  useLLM,
+  useTextToSpeech,
+  KOKORO_MEDIUM,
+  KOKORO_VOICE_AF_HEART,
+  LLAMA3_2_1B_QLORA,
+} from 'react-native-executorch';
+import {
+  AudioManager,
+  AudioContext,
+  AudioBuffer,
+  AudioBufferSourceNode,
+} from 'react-native-audio-api';
+
+interface TextToSpeechLLMProps {
+  onBack: () => void;
+}
+
+/**
+ * Converts an audio vector (Float32Array) to an AudioBuffer for playback
+ * @param audioVector - The generated audio samples from the model
+ * @param sampleRate - The sample rate (default: 24000 Hz for Kokoro)
+ * @returns AudioBuffer ready for playback
+ */
+const createAudioBufferFromVector = (
+  audioVector: Float32Array,
+  audioContext: AudioContext,
+  sampleRate: number = 24000
+): AudioBuffer => {
+  const audioBuffer = audioContext.createBuffer(
+    1,
+    audioVector.length,
+    sampleRate
+  );
+  const channelData = audioBuffer.getChannelData(0);
+  channelData.set(audioVector);
+
+  return audioBuffer;
+};
+
+export const TextToSpeechLLMScreen = ({ onBack }: TextToSpeechLLMProps) => {
+  const [displayText, setDisplayText] = useState('');
+  const [isTtsStreaming, setIsTtsStreaming] = useState(false);
+  const llm = useLLM({ model: LLAMA3_2_1B_QLORA });
+  const tts = useTextToSpeech({
+    model: KOKORO_MEDIUM,
+    voice: KOKORO_VOICE_AF_HEART,
+  });
+
+  const processedLengthRef = useRef(0);
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const sourceRef = useRef<AudioBufferSourceNode>(null);
+
+  useEffect(() => {
+    AudioManager.setAudioSessionOptions({
+      iosCategory: 'playAndRecord',
+      iosMode: 'spokenAudio',
+      iosOptions: ['defaultToSpeaker'],
+    });
+
+    audioContextRef.current = new AudioContext({ sampleRate: 24000 });
+    audioContextRef.current.suspend();
+
+    return () => {
+      audioContextRef.current?.close();
+      audioContextRef.current = null;
+    };
+  }, []);
+
+  // Update displayText gradually as response gets generated and insert new text chunks into TTS stream
+  useEffect(() => {
+    if (llm.response && tts.isReady) {
+      setDisplayText(llm.response);
+
+      const previousLength = processedLengthRef.current;
+      if (llm.response.length > previousLength && isTtsStreaming) {
+        const newChunk = llm.response.slice(previousLength);
+        tts.streamInsert(newChunk);
+        processedLengthRef.current = llm.response.length;
+      }
+    } else {
+      processedLengthRef.current = 0;
+    }
+  }, [llm.response, tts, isTtsStreaming]);
+
+  const handleGenerate = async () => {
+    setDisplayText('');
+    processedLengthRef.current = 0;
+    setIsTtsStreaming(true);
+
+    const startTTS = async () => {
+      try {
+        const audioContext = audioContextRef.current;
+        if (!audioContext) return;
+
+        if (audioContext.state === 'suspended') {
+          await audioContext.resume();
+        }
+
+        const onNext = async (audioVec: Float32Array) => {
+          return new Promise<void>((resolve) => {
+            const audioBuffer = createAudioBufferFromVector(
+              audioVec,
+              audioContext,
+              24000
+            );
+
+            const source = (sourceRef.current =
+              audioContext.createBufferSource());
+            source.buffer = audioBuffer;
+            source.connect(audioContext.destination);
+
+            source.onEnded = () => resolve();
+
+            source.start();
+          });
+        };
+
+        await tts.stream({
+          speed: 0.9,
+          stopAutomatically: false,
+          onNext,
+        });
+      } catch (e) {
+        console.error('TTS streaming error:', e);
+      } finally {
+        setIsTtsStreaming(false);
+      }
+    };
+
+    const ttsPromise = startTTS();
+
+    try {
+      await llm.sendMessage(
+        'Generate a short story about a robot learning to paint. The story should be around 200 words long.'
+      );
+    } catch (e) {
+      console.error('Generation failed:', e);
+    } finally {
+      tts.streamStop(false);
+      await ttsPromise;
+
+      if (
+        audioContextRef.current &&
+        audioContextRef.current.state === 'running'
+      ) {
+        await audioContextRef.current.suspend();
+      }
+    }
+  };
+
+  const handleStop = () => {
+    llm.interrupt();
+    tts.streamStop(true);
+    if (sourceRef.current) {
+      try {
+        sourceRef.current.stop();
+      } catch (e) {
+        // Source might have already stopped or disconnected
+      }
+    }
+  };
+
+  const isProcessing = llm.isGenerating || isTtsStreaming;
+  const isModelsReady = llm.isReady && tts.isReady;
+
+  const getModelStatus = () => {
+    if (llm.error) return `LLM Error: ${llm.error.message}`;
+    if (tts.error) return `TTS Error: ${tts.error.message}`;
+    if (!llm.isReady)
+      return `Loading LLM: ${(100 * llm.downloadProgress).toFixed(2)}%`;
+    if (!tts.isReady)
+      return `Loading TTS: ${(100 * tts.downloadProgress).toFixed(2)}%`;
+    if (isProcessing) return 'Generating/Streaming...';
+    return 'Ready';
+  };
+
+  return (
+    <SafeAreaProvider>
+      <SafeAreaView style={styles.container}>
+        <View style={styles.header}>
+          <TouchableOpacity style={styles.backButton} onPress={onBack}>
+            <FontAwesome name="chevron-left" size={20} color="#0f186e" />
+          </TouchableOpacity>
+          <SWMIcon width={60} height={60} />
+          <Text style={styles.headerText}>React Native ExecuTorch</Text>
+          <Text style={styles.headerText}>LLM to Speech Demo</Text>
+        </View>
+
+        <View style={styles.statusContainer}>
+          <Text>Status: {getModelStatus()}</Text>
+        </View>
+
+        <View style={styles.contentContainer}>
+          <Text style={styles.label}>Generated Story</Text>
+          <View style={styles.responseContainer}>
+            <ScrollView contentContainerStyle={styles.responseContent}>
+              <Text style={styles.responseText}>
+                {displayText ||
+                  (isModelsReady
+                    ? 'Press the button to generate a story and hear it spoken aloud.'
+                    : 'Please wait for models to load...')}
+              </Text>
+            </ScrollView>
+          </View>
+        </View>
+
+        <View style={styles.buttonContainer}>
+          {isProcessing ? (
+            <TouchableOpacity
+              style={[styles.actionButton, styles.stopButton]}
+              onPress={handleStop}
+            >
+              <FontAwesome name="stop" size={20} color="white" />
+              <Text style={styles.buttonText}>Stop Generation</Text>
+            </TouchableOpacity>
+          ) : (
+            <TouchableOpacity
+              disabled={!isModelsReady}
+              onPress={handleGenerate}
+              style={[styles.actionButton, !isModelsReady && styles.disabled]}
+            >
+              <FontAwesome name="magic" size={20} color="white" />
+              <Text style={styles.buttonText}>Generate & Stream Speech</Text>
+            </TouchableOpacity>
+          )}
+        </View>
+      </SafeAreaView>
+    </SafeAreaProvider>
+  );
+};
+
+const styles = StyleSheet.create({
+  container: {
+    flex: 1,
+    alignItems: 'center',
+    backgroundColor: 'white',
+    paddingHorizontal: 16,
+  },
+  header: {
+    alignItems: 'center',
+    position: 'relative',
+    width: '100%',
+  },
+  backButton: {
+    position: 'absolute',
+    left: 0,
+    top: 10,
+    padding: 10,
+    zIndex: 1,
+  },
+  headerText: {
+    fontSize: 22,
+    fontWeight: 'bold',
+    color: '#0f186e',
+  },
+  statusContainer: {
+    marginTop: 12,
+    alignItems: 'center',
+  },
+  contentContainer: {
+    width: '100%',
+    marginTop: 24,
+    flex: 1,
+    marginBottom: 24,
+  },
+  label: {
+    marginLeft: 12,
+    marginBottom: 4,
+    color: '#0f186e',
+    fontWeight: '600',
+  },
+  responseContainer: {
+    borderRadius: 12,
+    borderWidth: 1,
+    borderColor: '#0f186e',
+    flex: 1,
+  },
+  responseContent: {
+    padding: 12,
+  },
+  responseText: {
+    fontSize: 16,
+    color: '#333',
+    lineHeight: 24,
+  },
+  buttonContainer: {
+    marginBottom: 24,
+    width: '100%',
+  },
+  actionButton: {
+    backgroundColor: '#0f186e',
+    flexDirection: 'row',
+    justifyContent: 'center',
+    alignItems: 'center',
+    padding: 12,
+    borderRadius: 12,
+    gap: 8,
+  },
+  stopButton: {
+    backgroundColor: '#ff4444',
+  },
+  buttonText: {
+    color: 'white',
+    fontWeight: '600',
+    letterSpacing: -0.5,
+    fontSize: 16,
+  },
+  disabled: {
+    opacity: 0.5,
+  },
+});
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextToSpeech.md b/docs/docs/03-hooks/01-natural-language-processing/useTextToSpeech.md
index 10e9986dee..af0fe8e18c 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useTextToSpeech.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useTextToSpeech.md
@@ -87,7 +87,8 @@ The module provides two ways to generate speech using either raw text or pre-gen
 ### Using Text
 
 1.  [**`forward({ text, speed })`**](../../06-api-reference/interfaces/TextToSpeechType.md#forward): Generates the complete audio waveform at once. Returns a promise resolving to a `Float32Array`.
-2.  [**`stream({ text, speed, onNext, ... })`**](../../06-api-reference/interfaces/TextToSpeechType.md#stream): An async generator that yields chunks of audio as they are computed. This is ideal for reducing the "time to first audio" for long sentences.
+2.  [**`stream({speed, stopAutomatically, onNext, ...})`**](../../06-api-reference/interfaces/TextToSpeechType.md#stream): An async generator-like functionality (managed via callbacks like `onNext`) that yields chunks of audio as they are computed.
+    This is ideal for reducing the "time to first audio" for long sentences. You can also dynamically insert text during the generation process using `streamInsert(text)` and stop it with `streamStop(instant)`.
 
 ### Using Phonemes
 
diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
index 00fd04f53b..ec0919574c 100644
--- a/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
+++ b/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
@@ -52,14 +52,14 @@ The module provides two ways to generate speech using either raw text or pre-gen
 ### Using Text
 
 1.  [**`forward(text, speed)`**](../../06-api-reference/classes/TextToSpeechModule.md#forward): Generates the complete audio waveform at once. Returns a promise resolving to a `Float32Array`.
-2.  [**`stream({ text, speed })`**](../../06-api-reference/classes/TextToSpeechModule.md#stream): An async generator that yields chunks of audio as they are computed. This is ideal for reducing the "time to first audio" for long sentences.
+2.  [**`stream({ speed, stopAutomatically, onNext, ... })`**](../../06-api-reference/classes/TextToSpeechModule.md#stream): An async generator that yields chunks of audio as they are computed. This is ideal for reducing the "time to first audio" for long sentences. In contrast to `forward`, it enables inserting text chunks dynamically into processing buffer with [**`streamInsert(text)`**](../../06-api-reference/classes/TextToSpeechModule.md#streaminsert) and allows stopping generation early with [**`streamStop(instant)`**](../../06-api-reference/classes/TextToSpeechModule.md#streamstop).
 
 ### Using Phonemes
 
 If you have pre-computed phonemes (e.g., from an external dictionary or a custom G2P model), you can skip the internal phoneme generation step:
 
 1.  [**`forwardFromPhonemes(phonemes, speed)`**](../../06-api-reference/classes/TextToSpeechModule.md#forwardfromphonemes): Generates the complete audio waveform from a phoneme string.
-2.  [**`streamFromPhonemes({ phonemes, speed })`**](../../06-api-reference/classes/TextToSpeechModule.md#streamfromphonemes): Streams audio chunks generated from a phoneme string.
+2.  [**`streamFromPhonemes({ phonemes, speed, onNext, ... })`**](../../06-api-reference/classes/TextToSpeechModule.md#streamfromphonemes): Streams audio chunks generated from a phoneme string.
 
 :::note
 Since `forward` and `forwardFromPhonemes` process the entire input at once, they might take a significant amount of time to produce audio for long inputs.
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index 90241eac03..c13b8991dc 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -188,17 +188,19 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        promiseHostFunction<&Model::stream>,
                                        "stream"));
       addFunctions(JSI_EXPORT_FUNCTION(
-          ModelHostObject<Model>,
-          synchronousHostFunction<&Model::streamStop>,
+          ModelHostObject<Model>, synchronousHostFunction<&Model::streamStop>,
           "streamStop"));
       addFunctions(JSI_EXPORT_FUNCTION(
-          ModelHostObject<Model>,
-          promiseHostFunction<&Model::generateFromPhonemes>,
-          "generateFromPhonemes"));
-      addFunctions(JSI_EXPORT_FUNCTION(
-          ModelHostObject<Model>,
-          promiseHostFunction<&Model::streamFromPhonemes>,
-          "streamFromPhonemes"));
+          ModelHostObject<Model>, synchronousHostFunction<&Model::streamInsert>,
+          "streamInsert"));
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              promiseHostFunction<&Model::generateFromPhonemes>,
+                              "generateFromPhonemes"));
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              promiseHostFunction<&Model::streamFromPhonemes>,
+                              "streamFromPhonemes"));
     }
 
     if constexpr (meta::HasGenerateFromString<Model>) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Constants.h b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Constants.h
index 3bc7f7f835..6064191443 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Constants.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Constants.h
@@ -34,6 +34,10 @@ inline constexpr int32_t kSamplingRate =
     24000; // Corresponds to Kokoro's model audio frequency
 inline constexpr int32_t kSamplesPerMilisecond = kSamplingRate / 1000;
 
+// Special text characters
+inline const std::unordered_set<char> kEndOfSentenceCharacters = {'.', '?', '!',
+                                                                  ';'};
+
 // Special phonemes
 inline const std::unordered_set<char32_t> kEndOfSentencePhonemes = {
     U'.', U'?', U'!', U';', U'…'};
@@ -70,4 +74,4 @@ inline const std::unordered_map<char32_t, Token> kVocab = {
 inline constexpr Token kInvalidToken = -1;
 inline constexpr Token kPadToken = 0;
 
-} // namespace rnexecutorch::models::text_to_speech::kokoro::constants
+} // namespace rnexecutorch::models::text_to_speech::kokoro::constants
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.cpp
index d7da13b945..ea43f09d47 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.cpp
@@ -7,6 +7,7 @@
 #include <phonemis/utilities/string_utils.h>
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/data_processing/Sequential.h>
+#include <thread>
 
 namespace rnexecutorch::models::text_to_speech::kokoro {
 
@@ -103,9 +104,8 @@ Kokoro::generateFromPhonemesImpl(const std::u32string &phonemes, float speed) {
   return audio;
 }
 
-void Kokoro::streamFromPhonemesImpl(
-    const std::u32string &phonemes, float speed,
-    std::shared_ptr<jsi::Function> callback) {
+void Kokoro::streamFromPhonemesImpl(const std::u32string &phonemes, float speed,
+                                    std::shared_ptr<jsi::Function> callback) {
   auto nativeCallback = [this, callback](const std::vector<float> &audioVec) {
     if (this->isStreaming_) {
       this->callInvoker_->invokeAsync(
@@ -116,8 +116,6 @@ void Kokoro::streamFromPhonemesImpl(
     }
   };
 
-  isStreaming_ = true;
-
   // Use LATENCY strategy to minimize the time-to-first-audio for streaming
   auto subsentences =
       partitioner_.divide<Partitioner::Strategy::LATENCY>(phonemes);
@@ -152,8 +150,6 @@ void Kokoro::streamFromPhonemesImpl(
     // Push the audio right away to the JS side
     nativeCallback(std::move(audioPart));
   }
-
-  isStreaming_ = false;
 }
 
 std::vector<float> Kokoro::generate(std::string text, float speed) {
@@ -162,6 +158,10 @@ std::vector<float> Kokoro::generate(std::string text, float speed) {
                             "Kokoro: maximum input text size exceeded");
   }
 
+  if (text.empty()) {
+    return {};
+  }
+
   // G2P (Grapheme to Phoneme) conversion
   auto phonemes = phonemizer_.process(text);
 
@@ -171,24 +171,74 @@ std::vector<float> Kokoro::generate(std::string text, float speed) {
 std::vector<float> Kokoro::generateFromPhonemes(std::string phonemes,
                                                 float speed) {
   if (phonemes.empty()) {
-    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
-                            "Kokoro: phoneme string must not be empty");
+    return {};
   }
+
   return generateFromPhonemesImpl(
       phonemis::utilities::string_utils::utf8_to_u32string(phonemes), speed);
 }
 
-void Kokoro::stream(std::string text, float speed,
+void Kokoro::stream(float speed, bool stopOnEmptyBuffer,
                     std::shared_ptr<jsi::Function> callback) {
-  if (text.size() > params::kMaxTextSize) {
-    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
-                            "Kokoro: maximum input text size exceeded");
-  }
+  isStreaming_ = true;
+  stopOnEmptyBuffer_ = stopOnEmptyBuffer;
+
+  // The outer streaming loop is responsible for handling the input buffer.
+  // The extracted text is then passed to the inner loop, which performs a
+  // standard streaming on a fixed amount of input text.
+  while (isStreaming_) {
+    std::string text;
+
+    // Extract the code relying on input buffer for a separate mutex lock
+    // section.
+    {
+      std::scoped_lock<std::mutex> lock(inputTextBufferMutex_);
+      if (inputTextBuffer_.empty() && stopOnEmptyBuffer_) {
+        break;
+      }
+
+      // Try to find the most recent available end of sentence character.
+      size_t searchLimit =
+          std::min(inputTextBuffer_.size(), params::kMaxTextSize);
+      auto eosIt = std::find_first_of(
+          inputTextBuffer_.rbegin() + (inputTextBuffer_.size() - searchLimit),
+          inputTextBuffer_.rend(), constants::kEndOfSentenceCharacters.begin(),
+          constants::kEndOfSentenceCharacters.end());
+      size_t chunkSize = (eosIt != inputTextBuffer_.rend())
+                             ? std::distance(eosIt, inputTextBuffer_.rend())
+                             : 0;
+
+      // To maximize the quality of the speech, we try to avoid processing
+      // chunks which end in the middle of a sentence.
+      if (chunkSize > 0 ||
+          streamSkippedIterations >= params::kStreamMaxSkippedIterations) {
+        text = inputTextBuffer_.substr(0, chunkSize);
+        inputTextBuffer_.erase(0, chunkSize);
+        streamSkippedIterations = 0;
+      } else {
+        streamSkippedIterations++;
+      }
+    }
 
-  // G2P (Grapheme to Phoneme) conversion
-  auto phonemes = phonemizer_.process(text);
+    if (!text.empty()) {
+      // Now we proceed with a standard streaming logic for fixed-size input.
+      auto phonemes = phonemizer_.process(text);
+      streamFromPhonemesImpl(phonemes, speed, callback);
+    }
 
-  streamFromPhonemesImpl(phonemes, speed, callback);
+    // A little bit of pause to not overload the thread.
+    if (isStreaming_) {
+      std::this_thread::sleep_for(
+          std::chrono::milliseconds(params::kStreamPause));
+    }
+  }
+
+  {
+    std::scoped_lock<std::mutex> lock(inputTextBufferMutex_);
+    inputTextBuffer_.clear();
+    isStreaming_ = false;
+    streamSkippedIterations = 0;
+  }
 }
 
 void Kokoro::streamFromPhonemes(std::string phonemes, float speed,
@@ -197,12 +247,26 @@ void Kokoro::streamFromPhonemes(std::string phonemes, float speed,
     throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
                             "Kokoro: phoneme string must not be empty");
   }
+
+  isStreaming_ = true;
   streamFromPhonemesImpl(
       phonemis::utilities::string_utils::utf8_to_u32string(phonemes), speed,
       callback);
+  isStreaming_ = false;
 }
 
-void Kokoro::streamStop() noexcept { isStreaming_ = false; }
+void Kokoro::streamInsert(std::string textChunk) noexcept {
+  std::scoped_lock<std::mutex> lock(inputTextBufferMutex_);
+  inputTextBuffer_.append(textChunk);
+}
+
+void Kokoro::streamStop(bool instant) noexcept {
+  if (instant) {
+    isStreaming_ = false;
+  } else {
+    stopOnEmptyBuffer_ = true;
+  }
+}
 
 std::vector<float> Kokoro::synthesize(const std::u32string &phonemes,
                                       float speed, size_t paddingMs) {
@@ -220,8 +284,8 @@ std::vector<float> Kokoro::synthesize(const std::u32string &phonemes,
   const auto tokens = utils::tokenize(phonemes, {noTokens});
 
   // Select the appropriate voice vector
-  size_t voiceID = std::min({phonemes.size() - 1, noTokens - 1,
-                             voice_.size() - 1});
+  size_t voiceID =
+      std::min({phonemes.size() - 1, noTokens - 1, voice_.size() - 1});
   auto &voice = voice_[voiceID];
 
   // Initialize text mask
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.h b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.h
index 47fdab769b..e33631af61 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.h
@@ -25,27 +25,46 @@ class Kokoro {
          const std::string &synthesizerSource, const std::string &voiceSource,
          std::shared_ptr<react::CallInvoker> callInvoker);
 
-  // Processes the entire text at once, before sending back to the JS side.
+  /**
+   * Processes the entire text at once, before sending back to the JS side.
+   */
   std::vector<float> generate(std::string text, float speed = 1.F);
 
-  // Accepts pre-computed phonemes (as a UTF-8 IPA string) and synthesizes
-  // audio, bypassing the built-in phonemizer. This allows callers to use
-  // an external G2P system (e.g. the Python `phonemizer` library, espeak-ng,
-  // or any custom phonemizer).
+  /**
+   * Similar to generate(), but accepts pre-computed phonemes (as a UTF-8 IPA
+   * string) and synthesizes audio, bypassing the built-in phonemizer.
+   */
   std::vector<float> generateFromPhonemes(std::string phonemes,
                                           float speed = 1.F);
 
-  // Processes text in chunks, sending each chunk individualy to the JS side
-  // with asynchronous callbacks.
-  void stream(std::string text, float speed,
+  /**
+   * Processes text from inputTextBuffer_ in chunks, sending each chunk
+   * individualy to the JS side with asynchronous callbacks.
+   *
+   * Allows an incrementally expanded input by using an input text buffer.
+   */
+  void stream(float speed, bool stopOnEmptyBuffer,
               std::shared_ptr<jsi::Function> callback);
 
   // Streaming variant that accepts pre-computed phonemes instead of text.
   void streamFromPhonemes(std::string phonemes, float speed,
                           std::shared_ptr<jsi::Function> callback);
 
-  // Stops the streaming process
-  void streamStop() noexcept;
+  /**
+   * Updates the input streaming buffer by adding more text to be processed.
+   *
+   * @param text A new chunk of text, appended to the end of the input buffer.
+   */
+  void streamInsert(std::string textChunk) noexcept;
+
+  /**
+   * Stops the streaming process.
+   *
+   * @param instant If true, stops the streaming as soon as possible by
+   * switching the isStreaming_ flag. Otherwise allows to process the rest of
+   * the buffer first, by switching the stopOnEmptyBuffer_ flag.
+   */
+  void streamStop(bool instant) noexcept;
 
   std::size_t getMemoryLowerBound() const noexcept;
   void unload() noexcept;
@@ -80,8 +99,12 @@ class Kokoro {
   // Each row is a style vector for a given input token count.
   std::vector<std::array<float, constants::kVoiceRefSize>> voice_;
 
-  // Extra control variables
+  // Streaming state control variables
+  std::string inputTextBuffer_;
+  mutable std::mutex inputTextBufferMutex_;
   std::atomic<bool> isStreaming_{false};
+  std::atomic<bool> stopOnEmptyBuffer_{true};
+  int32_t streamSkippedIterations = 0;
 };
 } // namespace models::text_to_speech::kokoro
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Params.h b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Params.h
index f6b910b03f..f517db0318 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Params.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Params.h
@@ -20,6 +20,17 @@ namespace rnexecutorch::models::text_to_speech::kokoro::params {
  */
 inline constexpr size_t kMaxTextSize = 2048;
 
+/**
+ * A number of skipped streaming iterations after which we process the remaining
+ * input no matter how it looks like.
+ */
+inline constexpr int32_t kStreamMaxSkippedIterations = 3;
+
+/**
+ * A size of pause (in miliseconds) applied after each streaming iteration.
+ */
+inline constexpr int32_t kStreamPause = 200;
+
 /**
  * A set of punctation - pause values. Determines how much pause (silence) is
  * being added at the end of each calculated audio vector. This is primarly used
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
index 52ed60685f..426aafc1f3 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
@@ -84,6 +84,12 @@ target_link_options(opencv_deps INTERFACE -fopenmp -static-openmp)
 add_library(tokenizers_deps INTERFACE)
 target_include_directories(tokenizers_deps INTERFACE "${TOKENIZERS_DIR}")
 
+# Phonemis
+add_library(phonemis STATIC IMPORTED)
+  set_target_properties(phonemis PROPERTIES
+      IMPORTED_LOCATION "${ANDROID_THIRD_PARTY}/phonemis/${ANDROID_ABI}/libphonemis.a"
+  )
+
 # Source Definitions
 set(CORE_SOURCES
     ${RNEXECUTORCH_DIR}/models/BaseModel.cpp
@@ -218,6 +224,16 @@ add_rn_test(SpeechToTextTests integration/SpeechToTextTest.cpp
     LIBS tokenizers_deps z
 )
 
+add_rn_test(TextToSpeechTests integration/TextToSpeechTest.cpp
+    SOURCES
+        ${RNEXECUTORCH_DIR}/models/text_to_speech/kokoro/Kokoro.cpp
+        ${RNEXECUTORCH_DIR}/models/text_to_speech/kokoro/DurationPredictor.cpp
+        ${RNEXECUTORCH_DIR}/models/text_to_speech/kokoro/Synthesizer.cpp
+        ${RNEXECUTORCH_DIR}/models/text_to_speech/kokoro/Partitioner.cpp
+        ${RNEXECUTORCH_DIR}/models/text_to_speech/kokoro/Utils.cpp
+    LIBS phonemis
+)
+
 add_rn_test(LLMTests integration/LLMTest.cpp
     SOURCES
         ${RNEXECUTORCH_DIR}/models/llm/LLM.cpp
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextToSpeechTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextToSpeechTest.cpp
new file mode 100644
index 0000000000..017c9c3695
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextToSpeechTest.cpp
@@ -0,0 +1,123 @@
+#include "BaseModelTests.h"
+#include "utils/TestUtils.h"
+#include <algorithm>
+#include <gtest/gtest.h>
+#include <rnexecutorch/Error.h>
+#include <rnexecutorch/models/text_to_speech/kokoro/Kokoro.h>
+
+using namespace rnexecutorch;
+using namespace rnexecutorch::models::text_to_speech::kokoro;
+
+constexpr auto kValidLang = "en-us";
+constexpr auto kValidTaggerPath = "kokoro_en_tagger.json";
+constexpr auto kValidPhonemizerPath = "kokoro_us_lexicon.json";
+constexpr auto kValidDurationPath = "kokoro_duration_predictor.pte";
+constexpr auto kValidSynthesizerPath = "kokoro_synthesizer.pte";
+constexpr auto kValidVoicePath = "kokoro_af_heart.bin";
+
+namespace {
+bool isAudioValid(const std::vector<float> &audio) {
+  if (audio.empty()) {
+    return false;
+  }
+  // Check for non-silence (amplitude greater than an arbitrary small noise
+  // threshold)
+  return std::ranges::any_of(
+      audio, [](float sample) { return std::abs(sample) > 1e-4f; });
+}
+
+bool isAudioSimilar(const std::vector<float> &audio1,
+                    const std::vector<float> &audio2, float tolerance = 0.1f) {
+  if (audio1.empty() || audio2.empty()) {
+    return false;
+  }
+
+  double sumSqDiff = 0.0;
+  size_t steps = std::max(audio1.size(), audio2.size());
+
+  for (size_t i = 0; i < steps; ++i) {
+    size_t idx1 =
+        static_cast<size_t>((static_cast<float>(i) / steps) * audio1.size());
+    size_t idx2 =
+        static_cast<size_t>((static_cast<float>(i) / steps) * audio2.size());
+
+    float diff = audio1[idx1] - audio2[idx2];
+    sumSqDiff += diff * diff;
+  }
+
+  double rmse = std::sqrt(sumSqDiff / steps);
+  if (rmse >= tolerance) {
+    std::cerr << "Audio structural RMSE difference: " << rmse
+              << " (tolerance: " << tolerance << ")" << std::endl;
+    return false;
+  }
+  return true;
+}
+
+class KokoroTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    try {
+      model_ = std::make_unique<Kokoro>(
+          kValidLang, kValidTaggerPath, kValidPhonemizerPath,
+          kValidDurationPath, kValidSynthesizerPath, kValidVoicePath, nullptr);
+    } catch (...) {
+      model_ = nullptr;
+    }
+  }
+
+  std::unique_ptr<Kokoro> model_;
+};
+} // namespace
+
+TEST(TTSCtorTests, InvalidVoicePathThrows) {
+  EXPECT_THROW(Kokoro(kValidLang, kValidTaggerPath, kValidPhonemizerPath,
+                      kValidDurationPath, kValidSynthesizerPath,
+                      "nonexistent_voice.bin", nullptr),
+               RnExecutorchError);
+}
+
+TEST_F(KokoroTest, MaxTextSizeExceededThrows) {
+  if (!model_) {
+    GTEST_SKIP() << "Model assets not available, skipping test.";
+  }
+  std::string hugeText(10000, 'A'); // beyond params::kMaxTextSize
+  EXPECT_THROW(model_->generate(hugeText, 1.0f), RnExecutorchError);
+}
+
+TEST_F(KokoroTest, EmptyStringReturnsEmptyVector) {
+  if (!model_) {
+    GTEST_SKIP() << "Model assets not available, skipping test.";
+  }
+  auto result = model_->generate("", 1.0f);
+  EXPECT_TRUE(result.empty());
+}
+
+TEST_F(KokoroTest, GenerateReturnsValidAudio) {
+  if (!model_) {
+    GTEST_SKIP() << "Model assets not available, skipping test.";
+  }
+  auto result = model_->generate("Hello world! How are you doing?", 1.0f);
+  auto reference = test_utils::loadAudioFromFile("test_speech.raw");
+
+  ASSERT_FALSE(reference.empty())
+      << "Reference audio 'test_speech.raw' not found.";
+
+  // Compare against an audio waveform obtained from the original
+  // Kokoro model (PyTorch)
+  EXPECT_TRUE(isAudioSimilar(result, reference));
+}
+
+TEST_F(KokoroTest, GenerateSpeedAdjustsAudioLength) {
+  if (!model_) {
+    GTEST_SKIP() << "Model assets not available, skipping test.";
+  }
+  std::string text = "This is a sentence to test the speed modifications.";
+  auto resultNormal = model_->generate(text, 1.0f);
+  auto resultFast = model_->generate(text, 1.5f);
+
+  EXPECT_TRUE(isAudioValid(resultNormal));
+  EXPECT_TRUE(isAudioValid(resultFast));
+  // Fast speech should result in a noticeably shorter output waveform
+  EXPECT_LT(resultFast.size(), resultNormal.size());
+}
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/assets/test_speech.raw b/packages/react-native-executorch/common/rnexecutorch/tests/integration/assets/test_speech.raw
new file mode 100644
index 0000000000..2cf55af04f
Binary files /dev/null and b/packages/react-native-executorch/common/rnexecutorch/tests/integration/assets/test_speech.raw differ
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh b/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh
index c46fde9fa9..d12f8fbada 100755
--- a/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/run_tests.sh
@@ -29,6 +29,7 @@ TEST_EXECUTABLES=(
   "VADTests"
   "TokenizerModuleTests"
   "SpeechToTextTests"
+  "TextToSpeechTests"
   "LLMTests"
   "ImageSegmentationTests"
   "TextToImageTests"
@@ -41,6 +42,7 @@ TEST_EXECUTABLES=(
 # ============================================================================
 TEST_ASSETS=(
   "integration/assets/test_audio_float.raw"
+  "integration/assets/test_speech.raw"
   "integration/assets/we_are_software_mansion.jpg"
 )
 
@@ -58,6 +60,11 @@ MODELS=(
   "fsmn-vad_xnnpack.pte|https://huggingface.co/software-mansion/react-native-executorch-fsmn-vad/resolve/main/xnnpack/fsmn-vad_xnnpack.pte"
   "whisper_tiny_en_xnnpack.pte|https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny.en/resolve/v0.8.0/xnnpack/whisper_tiny_en_xnnpack.pte"
   "whisper_tokenizer.json|https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny.en/resolve/v0.8.0/tokenizer.json"
+  "kokoro_duration_predictor.pte|https://huggingface.co/software-mansion/react-native-executorch-kokoro/resolve/main/xnnpack/medium/duration_predictor.pte"
+  "kokoro_synthesizer.pte|https://huggingface.co/software-mansion/react-native-executorch-kokoro/resolve/main/xnnpack/medium/synthesizer.pte"
+  "kokoro_af_heart.bin|https://huggingface.co/software-mansion/react-native-executorch-kokoro/resolve/main/voices/af_heart.bin"
+  "kokoro_us_lexicon.json|https://huggingface.co/software-mansion/react-native-executorch-kokoro/resolve/main/phonemizer/us_merged.json"
+  "kokoro_en_tagger.json|https://huggingface.co/software-mansion/react-native-executorch-kokoro/resolve/main/phonemizer/tags.json"
   "smolLm2_135M_8da4w.pte|https://huggingface.co/software-mansion/react-native-executorch-smolLm-2/resolve/v0.6.0/smolLm-2-135M/quantized/smolLm2_135M_8da4w.pte"
   "smollm_tokenizer.json|https://huggingface.co/software-mansion/react-native-executorch-smolLm-2/resolve/v0.6.0/tokenizer.json"
   "deeplabV3_xnnpack_fp32.pte|https://huggingface.co/software-mansion/react-native-executorch-deeplab-v3/resolve/v0.6.0/xnnpack/deeplabV3_xnnpack_fp32.pte"
diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts
index 1d48aef34e..b5e03ceb59 100644
--- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts
+++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts
@@ -5,7 +5,6 @@ import {
   TextToSpeechInput,
   TextToSpeechPhonemeInput,
   TextToSpeechType,
-  TextToSpeechStreamingCallbacks,
   TextToSpeechStreamingInput,
   TextToSpeechStreamingPhonemeInput,
 } from '../../types/tts';
@@ -91,36 +90,11 @@ export const useTextToSpeech = ({
     [isReady, isGenerating, moduleInstance]
   );
 
-  // Shared streaming orchestration (guards + onBegin/onNext/onEnd lifecycle)
-  const runStream = useCallback(
-    async (
-      methodName: string,
-      generator: AsyncGenerator<Float32Array>,
-      callbacks: TextToSpeechStreamingCallbacks
-    ) => {
-      guardReady(methodName);
-      setIsGenerating(true);
-      try {
-        await callbacks.onBegin?.();
-        for await (const audio of generator) {
-          if (callbacks.onNext) {
-            await callbacks.onNext(audio);
-          }
-        }
-      } finally {
-        await callbacks.onEnd?.();
-        setIsGenerating(false);
-      }
-    },
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-    [isReady, isGenerating, moduleInstance]
-  );
-
   const forward = async (input: TextToSpeechInput) => {
     const instance = guardReady('forward');
     try {
       setIsGenerating(true);
-      return await instance.forward(input.text, input.speed ?? 1.0);
+      return await instance.forward(input.text ?? '', input.speed ?? 1.0);
     } finally {
       setIsGenerating(false);
     }
@@ -131,7 +105,7 @@ export const useTextToSpeech = ({
     try {
       setIsGenerating(true);
       return await instance.forwardFromPhonemes(
-        input.phonemes,
+        input.phonemes ?? '',
         input.speed ?? 1.0
       );
     } finally {
@@ -142,28 +116,67 @@ export const useTextToSpeech = ({
   const stream = useCallback(
     async (input: TextToSpeechStreamingInput) => {
       const instance = guardReady('stream');
-      await runStream(
-        'stream',
-        instance.stream({ text: input.text, speed: input.speed ?? 1.0 }),
-        input
-      );
+      setIsGenerating(true);
+      try {
+        if (input.text) {
+          instance.streamInsert(input.text);
+        }
+
+        await input.onBegin?.();
+        for await (const audio of instance.stream({
+          speed: input.speed ?? 1.0,
+          stopAutomatically: input.stopAutomatically ?? true,
+        })) {
+          if (input.onNext) {
+            await input.onNext(audio);
+          }
+        }
+      } finally {
+        await input.onEnd?.();
+        setIsGenerating(false);
+      }
     },
-    [guardReady, runStream]
+    [guardReady]
   );
 
   const streamFromPhonemes = useCallback(
     async (input: TextToSpeechStreamingPhonemeInput) => {
       const instance = guardReady('streamFromPhonemes');
-      await runStream(
-        'streamFromPhonemes',
-        instance.streamFromPhonemes({
-          phonemes: input.phonemes,
+      setIsGenerating(true);
+      try {
+        await input.onBegin?.();
+        for await (const audio of instance.streamFromPhonemes({
+          phonemes: input.phonemes ?? '',
           speed: input.speed ?? 1.0,
-        }),
-        input
-      );
+        })) {
+          if (input.onNext) {
+            await input.onNext(audio);
+          }
+        }
+      } finally {
+        await input.onEnd?.();
+        setIsGenerating(false);
+      }
+    },
+    [guardReady]
+  );
+
+  const streamInsert = useCallback(
+    (text: string) => {
+      if (moduleInstance) {
+        moduleInstance.streamInsert(text);
+      }
+    },
+    [moduleInstance]
+  );
+
+  const streamStop = useCallback(
+    (instant: boolean = true) => {
+      if (moduleInstance) {
+        moduleInstance.streamStop(instant);
+      }
     },
-    [guardReady, runStream]
+    [moduleInstance]
   );
 
   return {
@@ -174,7 +187,8 @@ export const useTextToSpeech = ({
     forwardFromPhonemes,
     stream,
     streamFromPhonemes,
-    streamStop: () => moduleInstance?.streamStop(),
+    streamInsert,
+    streamStop,
     downloadProgress,
   };
 };
diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextToSpeechModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextToSpeechModule.ts
index a12285057b..fb27dd29ba 100644
--- a/packages/react-native-executorch/src/modules/natural_language_processing/TextToSpeechModule.ts
+++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextToSpeechModule.ts
@@ -17,6 +17,7 @@ import { Logger } from '../../common/Logger';
  */
 export class TextToSpeechModule {
   private nativeModule: any;
+  private isStreaming: boolean = false;
 
   private constructor(nativeModule: unknown) {
     this.nativeModule = nativeModule;
@@ -143,17 +144,23 @@ export class TextToSpeechModule {
   }
 
   /**
-   * Shared streaming implementation. Wraps a native streaming call in an
-   * async generator that yields Float32Array audio chunks as they arrive.
+   * Starts a streaming synthesis session. Yields audio chunks as they are generated.
+   *
+   * @param input - Input object containing text and optional speed.
+   * @returns An async generator yielding Float32Array audio chunks.
    */
-  private async *streamImpl(
-    nativeCall: (cb: (audio: number[]) => void) => Promise<void>
-  ): AsyncGenerator<Float32Array> {
+  public async *stream({
+    speed,
+    stopAutomatically,
+  }: TextToSpeechStreamingInput): AsyncGenerator<Float32Array> {
+    // Stores computed audio segments
     const queue: Float32Array[] = [];
 
     let waiter: (() => void) | null = null;
-    let finished = false;
     let error: unknown;
+    let nativeStreamFinished = false;
+
+    this.isStreaming = true;
 
     const wake = () => {
       waiter?.();
@@ -162,46 +169,36 @@ export class TextToSpeechModule {
 
     (async () => {
       try {
-        await nativeCall((audio: number[]) => {
-          queue.push(new Float32Array(audio));
-          wake();
-        });
-        finished = true;
+        await this.nativeModule.stream(
+          speed,
+          stopAutomatically,
+          (audio: number[]) => {
+            queue.push(new Float32Array(audio));
+            wake();
+          }
+        );
+        nativeStreamFinished = true;
         wake();
       } catch (e) {
         error = e;
-        finished = true;
+        nativeStreamFinished = true;
         wake();
       }
     })();
 
-    while (true) {
+    while (this.isStreaming) {
       if (queue.length > 0) {
         yield queue.shift()!;
-        if (finished && queue.length === 0) {
+        if (nativeStreamFinished && queue.length === 0) {
           return;
         }
         continue;
       }
       if (error) throw error;
-      if (finished) return;
       await new Promise<void>((r) => (waiter = r));
     }
   }
 
-  /**
-   * Starts a streaming synthesis session. Yields audio chunks as they are generated.
-   *
-   * @param input - Input object containing text and optional speed.
-   * @returns An async generator yielding Float32Array audio chunks.
-   */
-  public async *stream({
-    text,
-    speed,
-  }: TextToSpeechStreamingInput): AsyncGenerator<Float32Array> {
-    yield* this.streamImpl((cb) => this.nativeModule.stream(text, speed, cb));
-  }
-
   /**
    * Starts a streaming synthesis session from pre-computed phonemes.
    * Bypasses the built-in phonemizer, allowing use of external G2P systems.
@@ -213,16 +210,68 @@ export class TextToSpeechModule {
     phonemes,
     speed,
   }: TextToSpeechStreamingPhonemeInput): AsyncGenerator<Float32Array> {
-    yield* this.streamImpl((cb) =>
-      this.nativeModule.streamFromPhonemes(phonemes, speed, cb)
-    );
+    const queue: Float32Array[] = [];
+
+    let waiter: (() => void) | null = null;
+    let error: unknown;
+    let nativeStreamFinished = false;
+
+    const wake = () => {
+      waiter?.();
+      waiter = null;
+    };
+
+    (async () => {
+      try {
+        await this.nativeModule.streamFromPhonemes(
+          phonemes,
+          speed,
+          (audio: number[]) => {
+            queue.push(new Float32Array(audio));
+            wake();
+          }
+        );
+        nativeStreamFinished = true;
+        wake();
+      } catch (e) {
+        error = e;
+        nativeStreamFinished = true;
+        wake();
+      }
+    })();
+
+    while (this.isStreaming) {
+      if (queue.length > 0) {
+        yield queue.shift()!;
+        if (nativeStreamFinished && queue.length === 0) {
+          return;
+        }
+        continue;
+      }
+      if (error) throw error;
+      await new Promise<void>((r) => (waiter = r));
+    }
+  }
+
+  /**
+   * Inserts new text chunk into the buffer to be processed in streaming mode.
+   */
+  public streamInsert(textChunk: string): void {
+    this.nativeModule.streamInsert(textChunk);
   }
 
   /**
    * Stops the streaming process if there is any ongoing.
+   *
+   * * @param instant If true, stops the streaming as soon as possible. Otherwise
+   *                  allows the module to complete processing for the remains of the buffer.
    */
-  public streamStop(): void {
-    this.nativeModule.streamStop();
+  public streamStop(instant: boolean = true): void {
+    this.nativeModule.streamStop(instant);
+
+    if (instant) {
+      this.isStreaming = false;
+    }
   }
 
   /**
diff --git a/packages/react-native-executorch/src/types/tts.ts b/packages/react-native-executorch/src/types/tts.ts
index 097f35976a..e515d13b42 100644
--- a/packages/react-native-executorch/src/types/tts.ts
+++ b/packages/react-native-executorch/src/types/tts.ts
@@ -93,7 +93,7 @@ export interface TextToSpeechProps extends TextToSpeechConfig {
  * @property {number} [speed] - optional speed argument - the higher it is, the faster the speech becomes
  */
 export interface TextToSpeechInput {
-  text: string;
+  text?: string;
   speed?: number;
 }
 
@@ -179,10 +179,18 @@ export interface TextToSpeechType {
     input: TextToSpeechStreamingPhonemeInput
   ) => Promise<void>;
 
+  /**
+   * Inserts new text chunk into the buffer to be processed in streaming mode.
+   */
+  streamInsert: (textChunk: string) => void;
+
   /**
    * Interrupts and stops the currently active audio generation stream.
+   *
+   * @param instant If true, stops the streaming as soon as possible. Otherwise
+   *                allows the module to complete processing for the remains of the buffer.
    */
-  streamStop: () => void;
+  streamStop: (instant?: boolean) => void;
 }
 
 /**
@@ -207,10 +215,16 @@ export interface TextToSpeechStreamingCallbacks {
  * Actions such as playing the audio should happen within the onNext callback.
  * Callbacks can be both synchronous or asynchronous.
  *
+ * Enables an incrementally expanded input, in other words adding
+ * new text chunks with streamInsert() as the streaming is running.
+ *
  * @category Types
+ * @property {boolean} [stopAutomatically] - If true, streaming will stop automatically when the buffer is empty.
  */
 export interface TextToSpeechStreamingInput
-  extends TextToSpeechInput, TextToSpeechStreamingCallbacks {}
+  extends TextToSpeechInput, TextToSpeechStreamingCallbacks {
+  stopAutomatically?: boolean;
+}
 
 /**
  * Streaming input definition for pre-computed phonemes.
diff --git a/packages/react-native-executorch/third-party/android/libs/phonemis/arm64-v8a/libphonemis.a b/packages/react-native-executorch/third-party/android/libs/phonemis/arm64-v8a/libphonemis.a
index ab5d24b339..5a38707580 100644
Binary files a/packages/react-native-executorch/third-party/android/libs/phonemis/arm64-v8a/libphonemis.a and b/packages/react-native-executorch/third-party/android/libs/phonemis/arm64-v8a/libphonemis.a differ
diff --git a/packages/react-native-executorch/third-party/android/libs/phonemis/x86_64/libphonemis.a b/packages/react-native-executorch/third-party/android/libs/phonemis/x86_64/libphonemis.a
index 6f98f2aaca..2306d4647a 100644
Binary files a/packages/react-native-executorch/third-party/android/libs/phonemis/x86_64/libphonemis.a and b/packages/react-native-executorch/third-party/android/libs/phonemis/x86_64/libphonemis.a differ
diff --git a/packages/react-native-executorch/third-party/ios/libs/phonemis/physical-arm64-release/libphonemis.a b/packages/react-native-executorch/third-party/ios/libs/phonemis/physical-arm64-release/libphonemis.a
index 2b79e22edf..78f5169308 100644
Binary files a/packages/react-native-executorch/third-party/ios/libs/phonemis/physical-arm64-release/libphonemis.a and b/packages/react-native-executorch/third-party/ios/libs/phonemis/physical-arm64-release/libphonemis.a differ
diff --git a/packages/react-native-executorch/third-party/ios/libs/phonemis/simulator-arm64-debug/libphonemis.a b/packages/react-native-executorch/third-party/ios/libs/phonemis/simulator-arm64-debug/libphonemis.a
index c8dba0c3d4..ccf1d2fa64 100644
Binary files a/packages/react-native-executorch/third-party/ios/libs/phonemis/simulator-arm64-debug/libphonemis.a and b/packages/react-native-executorch/third-party/ios/libs/phonemis/simulator-arm64-debug/libphonemis.a differ