Skip to content

Commit dad4a9d

Browse files
author
Mateusz Kopciński
committed
fix to corrupted utf-8 encoding for special characters in transcription
1 parent 0cfc2e5 commit dad4a9d

File tree

4 files changed

+41
-14
lines changed

4 files changed

+41
-14
lines changed

packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,15 @@ inline jsi::Value getJsiValue(const std::vector<int32_t> &vec,
280280
return {runtime, array};
281281
}
282282

283+
inline jsi::Value getJsiValue(const std::vector<char> &vec,
284+
jsi::Runtime &runtime) {
285+
jsi::Array array(runtime, vec.size());
286+
for (size_t i = 0; i < vec.size(); i++) {
287+
array.setValueAtIndex(runtime, i, jsi::Value(static_cast<char>(vec[i])));
288+
}
289+
return {runtime, array};
290+
}
291+
283292
inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
284293
return {runtime, val};
285294
}

packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <thread>
22

33
#include "SpeechToText.h"
4+
#include <vector>
45

56
namespace rnexecutorch::models::speech_to_text {
67

@@ -41,7 +42,7 @@ SpeechToText::decode(std::span<int32_t> tokens,
4142
return std::make_shared<OwningArrayBuffer>(decoderOutput);
4243
}
4344

44-
std::string SpeechToText::transcribe(std::span<float> waveform,
45+
std::vector<char> SpeechToText::transcribe(std::span<float> waveform,
4546
std::string languageOption) const {
4647
std::vector<Segment> segments =
4748
this->asr->transcribe(waveform, DecodingOptions(languageOption));
@@ -60,7 +61,9 @@ std::string SpeechToText::transcribe(std::span<float> waveform,
6061
transcription += word.content;
6162
}
6263
}
63-
return transcription;
64+
65+
std::vector<char> charVector(transcription.begin(), transcription.end());
66+
return charVector;
6467
}
6568

6669
size_t SpeechToText::getMemoryLowerBound() const noexcept {
@@ -74,13 +77,13 @@ void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
7477
throw std::runtime_error("Streaming is already in progress");
7578
}
7679

77-
auto nativeCallback = [this, callback](const std::string &committed,
78-
const std::string &nonCommitted,
80+
auto nativeCallback = [this, callback](const std::vector<char> &committedVec,
81+
const std::vector<char> &nonCommittedVec,
7982
bool isDone) {
8083
this->callInvoker->invokeAsync(
81-
[callback, committed, nonCommitted, isDone](jsi::Runtime &rt) {
82-
callback->call(rt, jsi::String::createFromUtf8(rt, committed),
83-
jsi::String::createFromUtf8(rt, nonCommitted),
84+
[callback, committedVec, nonCommittedVec, isDone](jsi::Runtime &rt) {
85+
callback->call(rt, rnexecutorch::jsi_conversion::getJsiValue(committedVec, rt),
86+
rnexecutorch::jsi_conversion::getJsiValue(nonCommittedVec, rt),
8487
jsi::Value(isDone));
8588
});
8689
};
@@ -94,12 +97,18 @@ void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
9497
}
9598
ProcessResult res =
9699
this->processor->processIter(DecodingOptions(languageOption));
97-
nativeCallback(res.committed, res.nonCommitted, false);
100+
101+
std::vector<char> committedVec(res.committed.begin(), res.committed.end());
102+
std::vector<char> nonCommittedVec(res.nonCommitted.begin(), res.nonCommitted.end());
103+
104+
nativeCallback(committedVec, nonCommittedVec, false);
98105
this->readyToProcess = false;
99106
}
100107

101108
std::string committed = this->processor->finish();
102-
nativeCallback(committed, "", true);
109+
std::vector<char> committedVec(committed.begin(), committed.end());
110+
111+
nativeCallback(committedVec, {}, true);
103112

104113
this->resetStreamState();
105114
}

packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/SpeechToText.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include "rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h"
4+
#include <vector>
45

56
namespace rnexecutorch {
67

@@ -17,7 +18,7 @@ class SpeechToText {
1718
std::shared_ptr<OwningArrayBuffer> encode(std::span<float> waveform) const;
1819
std::shared_ptr<OwningArrayBuffer>
1920
decode(std::span<int32_t> tokens, std::span<float> encoderOutput) const;
20-
std::string transcribe(std::span<float> waveform,
21+
std::vector<char> transcribe(std::span<float> waveform,
2122
std::string languageOption) const;
2223

2324
size_t getMemoryLowerBound() const noexcept;

packages/react-native-executorch/src/modules/natural_language_processing/SpeechToTextModule.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ export class SpeechToTextModule {
77

88
private modelConfig!: SpeechToTextModelConfig;
99

10+
private textDecoder = new TextDecoder("utf-8", {
11+
fatal: false,
12+
ignoreBOM: true,
13+
});
14+
1015
public async load(
1116
model: SpeechToTextModelConfig,
1217
onDownloadProgressCallback: (progress: number) => void = () => {}
@@ -87,8 +92,8 @@ export class SpeechToTextModule {
8792
);
8893
waveform = new Float32Array(waveform);
8994
}
90-
91-
return this.nativeModule.transcribe(waveform, options.language || '');
95+
const transcriptionBytes = await this.nativeModule.transcribe(waveform, options.language || '');
96+
return this.textDecoder.decode(new Uint8Array(transcriptionBytes));
9297
}
9398

9499
public async *stream(
@@ -109,8 +114,11 @@ export class SpeechToTextModule {
109114
(async () => {
110115
try {
111116
await this.nativeModule.stream(
112-
(committed: string, nonCommitted: string, isDone: boolean) => {
113-
queue.push({ committed, nonCommitted });
117+
(committed: number[], nonCommitted: number[], isDone: boolean) => {
118+
queue.push({
119+
committed: this.textDecoder.decode(new Uint8Array(committed)),
120+
nonCommitted: this.textDecoder.decode(new Uint8Array(nonCommitted))
121+
});
114122
if (isDone) {
115123
finished = true;
116124
}

0 commit comments

Comments
 (0)