From 27f7919e3c259226de274a45cb5d36b3a0e94642 Mon Sep 17 00:00:00 2001
From: Jakub Mroz <115979017+jakmro@users.noreply.github.com>
Date: Mon, 15 Sep 2025 12:45:29 +0200
Subject: [PATCH 1/2] fix: add sstream include and update namespace usage
 (#601)

## Description

Changes:
- add missing `<sstream>` include
- update namespace usage

### Introduces a breaking change?

- [ ] Yes
- [x] No

### Type of change

- [x] Bug fix (change which fixes an issue)
- [ ] New feature (change which adds functionality)
- [ ] Documentation update (improves or adds clarity to existing
documentation)
- [x] Other (chores, tests, code style improvements etc.)

### Tested on

- [x] iOS
- [x] Android

### Checklist

- [x] I have performed a self-review of my code
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I have updated the documentation accordingly
- [x] My changes generate no new warnings
---
 .../models/speech_to_text/SpeechToText.cpp    |  3 +++
 .../models/speech_to_text/SpeechToText.h      |  8 ++-----
 .../models/speech_to_text/asr/ASR.cpp         |  3 +++
 .../models/speech_to_text/asr/ASR.h           | 23 ++++++++++---------
 .../stream/HypothesisBuffer.cpp               |  2 ++
 .../speech_to_text/stream/HypothesisBuffer.h  | 14 +++++------
 .../stream/OnlineASRProcessor.cpp             |  3 +++
 .../stream/OnlineASRProcessor.h               | 15 +++++-------
 8 files changed, 37 insertions(+), 34 deletions(-)
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp
index d444b9c914..e7d485c3b9 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp
@@ -5,6 +5,9 @@
 namespace rnexecutorch::models::speech_to_text {
 
 using namespace ::executorch::extension;
+using namespace asr;
+using namespace types;
+using namespace stream;
 
 SpeechToText::SpeechToText(const std::string &encoderSource,
                            const std::string &decoderSource,
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h
index a6f3779e4d..d28e80d0db 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h
@@ -6,10 +6,6 @@ namespace rnexecutorch {
 
 namespace models::speech_to_text {
 
-using namespace asr;
-using namespace types;
-using namespace stream;
-
 class SpeechToText {
 public:
   explicit SpeechToText(const std::string &encoderSource,
@@ -35,14 +31,14 @@ class SpeechToText {
   std::unique_ptr<BaseModel> encoder;
   std::unique_ptr<BaseModel> decoder;
   std::unique_ptr<TokenizerModule> tokenizer;
-  std::unique_ptr<ASR> asr;
+  std::unique_ptr<asr::ASR> asr;
 
   std::shared_ptr<OwningArrayBuffer>
   makeOwningBuffer(std::span<const float> vectorView) const;
 
   // Stream
   std::shared_ptr<react::CallInvoker> callInvoker;
-  std::unique_ptr<OnlineASRProcessor> processor;
+  std::unique_ptr<stream::OnlineASRProcessor> processor;
   bool isStreaming;
   bool readyToProcess;
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp
index 5a56f2d7e9..d0f965cb39 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp
@@ -1,4 +1,5 @@
 #include <random>
+#include <sstream>
 
 #include "ASR.h"
 #include "executorch/extension/tensor/tensor_ptr.h"
@@ -8,6 +9,8 @@
 
 namespace rnexecutorch::models::speech_to_text::asr {
 
+using namespace types;
+
 ASR::ASR(const models::BaseModel *encoder, const models::BaseModel *decoder,
          const TokenizerModule *tokenizer)
     : encoder(encoder), decoder(decoder), tokenizer(tokenizer),
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h
index 605052363f..20180ebe46 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h
@@ -8,15 +8,14 @@
 
 namespace rnexecutorch::models::speech_to_text::asr {
 
-using namespace types;
-
 class ASR {
 public:
   explicit ASR(const models::BaseModel *encoder,
                const models::BaseModel *decoder,
                const TokenizerModule *tokenizer);
-  std::vector<Segment> transcribe(std::span<const float> waveform,
-                                  const DecodingOptions &options) const;
+  std::vector<types::Segment>
+  transcribe(std::span<const float> waveform,
+             const types::DecodingOptions &options) const;
   std::vector<float> encode(std::span<const float> waveform) const;
   std::vector<float> decode(std::span<int32_t> tokens,
                             std::span<float> encoderOutput) const;
@@ -43,16 +42,18 @@ class ASR {
   // Number of mel frames output by the encoder (derived from input spectrogram)
   constexpr static int32_t kNumFrames = 1500;
 
-  std::vector<int32_t> getInitialSequence(const DecodingOptions &options) const;
-  GenerationResult generate(std::span<const float> waveform, float temperature,
-                            const DecodingOptions &options) const;
-  std::vector<Segment>
+  std::vector<int32_t>
+  getInitialSequence(const types::DecodingOptions &options) const;
+  types::GenerationResult generate(std::span<const float> waveform,
+                                   float temperature,
+                                   const types::DecodingOptions &options) const;
+  std::vector<types::Segment>
   generateWithFallback(std::span<const float> waveform,
-                       const DecodingOptions &options) const;
-  std::vector<Segment>
+                       const types::DecodingOptions &options) const;
+  std::vector<types::Segment>
   calculateWordLevelTimestamps(std::span<const int32_t> tokens,
                                std::span<const float> waveform) const;
-  std::vector<Word>
+  std::vector<types::Word>
   estimateWordLevelTimestampsLinear(std::span<const int32_t> tokens,
                                     int32_t start, int32_t end) const;
   float getCompressionRatio(const std::string &text) const;
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.cpp
index 3e4d6a7cab..31806c1268 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.cpp
@@ -2,6 +2,8 @@
 
 namespace rnexecutorch::models::speech_to_text::stream {
 
+using namespace types;
+
 void HypothesisBuffer::insert(std::span<const Word> newWords, float offset) {
   this->fresh.clear();
   for (const auto &word : newWords) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.h
index ea4e73328e..cfa11fd665 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.h
@@ -7,21 +7,19 @@
 
 namespace rnexecutorch::models::speech_to_text::stream {
 
-using namespace types;
-
 class HypothesisBuffer {
 public:
-  void insert(std::span<const Word> newWords, float offset);
-  std::deque<Word> flush();
+  void insert(std::span<const types::Word> newWords, float offset);
+  std::deque<types::Word> flush();
   void popCommitted(float time);
-  std::deque<Word> complete() const;
+  std::deque<types::Word> complete() const;
 
 private:
   float lastCommittedTime = 0.0f;
 
-  std::deque<Word> committedInBuffer;
-  std::deque<Word> buffer;
-  std::deque<Word> fresh;
+  std::deque<types::Word> committedInBuffer;
+  std::deque<types::Word> buffer;
+  std::deque<types::Word> fresh;
 };
 
 } // namespace rnexecutorch::models::speech_to_text::stream
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp
index 63cffd67cd..c6a99e9a2a 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp
@@ -4,6 +4,9 @@
 
 namespace rnexecutorch::models::speech_to_text::stream {
 
+using namespace asr;
+using namespace types;
+
 OnlineASRProcessor::OnlineASRProcessor(const ASR *asr) : asr(asr) {}
 
 void OnlineASRProcessor::insertAudioChunk(std::span<const float> audio) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h
index 403cf87d1a..c50b562711 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h
@@ -6,31 +6,28 @@
 
 namespace rnexecutorch::models::speech_to_text::stream {
 
-using namespace asr;
-using namespace types;
-
 class OnlineASRProcessor {
 public:
-  explicit OnlineASRProcessor(const ASR *asr);
+  explicit OnlineASRProcessor(const asr::ASR *asr);
 
   void insertAudioChunk(std::span<const float> audio);
-  ProcessResult processIter(const DecodingOptions &options);
+  types::ProcessResult processIter(const types::DecodingOptions &options);
   std::string finish();
 
   std::vector<float> audioBuffer;
 
 private:
-  const ASR *asr;
+  const asr::ASR *asr;
   constexpr static int32_t kSamplingRate = 16000;
 
   HypothesisBuffer hypothesisBuffer;
   float bufferTimeOffset = 0.0f;
-  std::vector<Word> committed;
+  std::vector<types::Word> committed;
 
-  void chunkCompletedSegment(std::span<const Segment> res);
+  void chunkCompletedSegment(std::span<const types::Segment> res);
   void chunkAt(float time);
 
-  std::string toFlush(const std::deque<Word> &words) const;
+  std::string toFlush(const std::deque<types::Word> &words) const;
 };
 
 } // namespace rnexecutorch::models::speech_to_text::stream

From 417857f648edf12b915278dff207348963de91f6 Mon Sep 17 00:00:00 2001
From: jakmro <jakub.mroz@swmansion.com>
Date: Mon, 15 Sep 2025 12:50:25 +0200
Subject: [PATCH 2/2] bump react-native-executorch version to 0.5.5

---
 packages/react-native-executorch/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/react-native-executorch/package.json b/packages/react-native-executorch/package.json
index 7807e420eb..11f433b5fd 100644
--- a/packages/react-native-executorch/package.json
+++ b/packages/react-native-executorch/package.json
@@ -1,6 +1,6 @@
 {
   "name": "react-native-executorch",
-  "version": "0.5.4",
+  "version": "0.5.5",
   "description": "An easy way to run AI models in React Native with ExecuTorch",
   "source": "./src/index.ts",
   "main": "./lib/module/index.js",