11#include " HypothesisBuffer.h"
2+ #include " Params.h"
3+ #include " Utils.h"
4+ #include < cmath>
5+ #include < rnexecutorch/Log.h>
26
37namespace rnexecutorch ::models::speech_to_text::whisper::stream {
48
59void HypothesisBuffer::insert (std::span<const Word> newWords, float offset) {
10+ rnexecutorch::log (rnexecutorch::LOG_LEVEL::Info,
11+ " [HypothesisBuffer] Inserting " +
12+ std::to_string (newWords.size ()) +
13+ " words with offset " + std::to_string (offset) + " s." );
14+
615 fresh_.clear ();
716 for (const auto &word : newWords) {
817 const float newStart = word.start + offset;
9- if (newStart > lastCommittedTime_ - 0 .5f ) {
18+ // Only accept words that start after or near the last committed time to
19+ // avoid stale data
20+ if (newStart > lastCommittedTime_ - params::kStreamFreshThreshold ) {
1021 fresh_.emplace_back (word.content , newStart, word.end + offset);
1122 }
1223 }
24+ rnexecutorch::log (rnexecutorch::LOG_LEVEL::Info,
25+ " [HypothesisBuffer] Filtered " +
26+ std::to_string (fresh_.size ()) +
27+ " words into 'fresh' buffer." );
1328
1429 if (!fresh_.empty () && !committedInBuffer_.empty ()) {
1530 const float a = fresh_.front ().start ;
16- if (std::fabs (a - lastCommittedTime_) < 1 .0f ) {
31+ // Check for overlap with already committed history to avoid duplicates in
32+ // the stream
33+ if (std::fabs (a - lastCommittedTime_) < 2 .0f ) {
1734 const size_t cn = committedInBuffer_.size ();
1835 const size_t nn = fresh_.size ();
19- const std::size_t maxCheck = std::min<std::size_t >({cn, nn, 5 });
20- for (size_t i = 1 ; i <= maxCheck; i++) {
21- std::string c;
22- for (auto it = committedInBuffer_.cend () - i;
23- it != committedInBuffer_.cend (); ++it) {
24- if (!c.empty ()) {
25- c += ' ' ;
26- }
27- c += it->content ;
28- }
29-
30- std::string tail;
31- auto it = fresh_.cbegin ();
32- for (size_t k = 0 ; k < i; k++, it++) {
33- if (!tail.empty ()) {
34- tail += ' ' ;
35- }
36- tail += it->content ;
37- }
38-
39- if (c == tail) {
40- fresh_.erase (fresh_.begin (), fresh_.begin () + i);
41- break ;
42- }
36+
37+ rnexecutorch::log (
38+ rnexecutorch::LOG_LEVEL::Info,
39+ " [HypothesisBuffer] Checking for overlap. cn=" + std::to_string (cn) +
40+ " , nn=" + std::to_string (nn) +
41+ " , maxCheck=" + std::to_string (params::kStreamMaxOverlapSize ));
42+
43+ size_t overlapSize = utils::findLargestOverlapingFragment (
44+ committedInBuffer_, fresh_, params::kStreamMaxOverlapSize ,
45+ params::kStreamMaxOverlapTimestampDiff );
46+
47+ if (overlapSize > 0 ) {
48+ rnexecutorch::log (rnexecutorch::LOG_LEVEL::Info,
49+ " [HypothesisBuffer] Detected overlap of " +
50+ std::to_string (overlapSize) +
51+ " words with committed history. Erasing "
52+ " duplicates from 'fresh'." );
53+ fresh_.erase (fresh_.begin (), fresh_.begin () + overlapSize);
4354 }
4455 }
4556 }
@@ -48,6 +59,8 @@ void HypothesisBuffer::insert(std::span<const Word> newWords, float offset) {
4859std::deque<Word> HypothesisBuffer::flush () {
4960 std::deque<Word> commit;
5061
62+ // Find stable prefix: words that haven't changed between last and current
63+ // iteration
5164 while (!fresh_.empty () && !buffer_.empty ()) {
5265 if (fresh_.front ().content != buffer_.front ().content ) {
5366 break ;
@@ -59,19 +72,36 @@ std::deque<Word> HypothesisBuffer::flush() {
5972
6073 if (!commit.empty ()) {
6174 lastCommittedTime_ = commit.back ().end ;
75+ rnexecutorch::log (rnexecutorch::LOG_LEVEL::Info,
76+ " [HypothesisBuffer] Found stable prefix. Committing " +
77+ std::to_string (commit.size ()) +
78+ " words. New lastCommittedTime: " +
79+ std::to_string (lastCommittedTime_) + " s." );
6280 }
6381
82+ // Current 'fresh' (remaining) becomes the new 'buffer' for next iteration
83+ // comparison
6484 buffer_ = std::move (fresh_);
6585 fresh_.clear ();
86+
6687 committedInBuffer_.insert (committedInBuffer_.end (), commit.begin (),
6788 commit.end ());
89+
6890 return commit;
6991}
7092
7193void HypothesisBuffer::popCommitted (float time) {
94+ size_t count = 0 ;
7295 while (!committedInBuffer_.empty () &&
7396 committedInBuffer_.front ().end <= time) {
7497 committedInBuffer_.pop_front ();
98+ count++;
99+ }
100+ if (count > 0 ) {
101+ rnexecutorch::log (rnexecutorch::LOG_LEVEL::Info,
102+ " [HypothesisBuffer] Popped " + std::to_string (count) +
103+ " old words from committed history up to " +
104+ std::to_string (time) + " s." );
75105 }
76106}
77107
@@ -81,6 +111,8 @@ void HypothesisBuffer::reset() {
81111 buffer_.clear ();
82112 fresh_.clear ();
83113 committedInBuffer_.clear ();
114+
115+ lastCommittedTime_ = 0 .f ;
84116}
85117
86118} // namespace rnexecutorch::models::speech_to_text::whisper::stream
0 commit comments