Skip to content

Commit 7b1e6ff

Browse files
committed
Remove special tokens
1 parent f42351b commit 7b1e6ff

File tree

2 files changed

+10
-7
lines changed
  • packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/whisper

2 files changed

+10
-7
lines changed

packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/whisper/ASR.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,6 @@ std::vector<Segment> ASR::calculateWordLevelTimestamps(
344344
if (words.size()) {
345345
Segment seg;
346346
seg.words = std::move(words);
347-
// seg.tokens = {}; // WTF ?
348347
seg.tokens = tokens;
349348
seg.avgLogprob = avgLogProb;
350349
seg.temperature = temperature;
@@ -409,8 +408,12 @@ ASR::estimateWordLevelTimestampsLinear(std::span<const uint64_t> tokens,
409408
std::vector<std::string> wordsStr;
410409
std::string word;
411410
while (iss >> word) {
412-
wordsStr.emplace_back(" ");
413-
wordsStr.back().append(word);
411+
// Detect special tokens such as [BLANK_AUDIO] by searching for square
412+
// bracket
413+
if (word.find('[') == std::string::npos) {
414+
wordsStr.emplace_back(" ");
415+
wordsStr.back().append(word);
416+
}
414417
}
415418

416419
size_t numChars = 0;

packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/whisper/OnlineASR.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ ProcessResult OnlineASR::process(const DecodingOptions &options) {
9292
// (assuming some fixed words per second frequency).
9393
const float freshDuration = newEnd - establishedEnd;
9494
const float epsilon = std::max(
95-
0.F, 0.8F * (freshDuration -
96-
static_cast<float>(noNewWords /
97-
params::kStreamWordsPerSecond)));
95+
0.F, 0.85F * (freshDuration -
96+
static_cast<float>(noNewWords /
97+
params::kStreamWordsPerSecond)));
9898
const float beforeScaleStart = hypothesisBuffer_.fresh_[i].start;
9999
const float beforeScaleEnd = hypothesisBuffer_.fresh_[i].end;
100100
float scale = (freshDuration - epsilon) / (newEnd - newBegin);
@@ -134,7 +134,7 @@ ProcessResult OnlineASR::process(const DecodingOptions &options) {
134134
std::vector<Word> OnlineASR::finish() {
135135
// We always push the last remaining hypothesis, even if it's not
136136
// confirmed in second iteration.
137-
auto remaining = hypothesisBuffer_.hypothesis_;
137+
std::deque<Word> remaining = hypothesisBuffer_.hypothesis_;
138138

139139
reset();
140140

0 commit comments

Comments
 (0)