Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
namespace rnexecutorch::models::speech_to_text {

using namespace ::executorch::extension;
using namespace asr;
using namespace types;
using namespace stream;

SpeechToText::SpeechToText(const std::string &encoderSource,
const std::string &decoderSource,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ namespace rnexecutorch {

namespace models::speech_to_text {

using namespace asr;
using namespace types;
using namespace stream;

class SpeechToText {
public:
explicit SpeechToText(const std::string &encoderSource,
Expand All @@ -35,14 +31,14 @@ class SpeechToText {
std::unique_ptr<BaseModel> encoder;
std::unique_ptr<BaseModel> decoder;
std::unique_ptr<TokenizerModule> tokenizer;
std::unique_ptr<ASR> asr;
std::unique_ptr<asr::ASR> asr;

std::shared_ptr<OwningArrayBuffer>
makeOwningBuffer(std::span<const float> vectorView) const;

// Stream
std::shared_ptr<react::CallInvoker> callInvoker;
std::unique_ptr<OnlineASRProcessor> processor;
std::unique_ptr<stream::OnlineASRProcessor> processor;
bool isStreaming;
bool readyToProcess;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <random>
#include <sstream>

#include "ASR.h"
#include "executorch/extension/tensor/tensor_ptr.h"
Expand All @@ -8,6 +9,8 @@

namespace rnexecutorch::models::speech_to_text::asr {

using namespace types;

ASR::ASR(const models::BaseModel *encoder, const models::BaseModel *decoder,
const TokenizerModule *tokenizer)
: encoder(encoder), decoder(decoder), tokenizer(tokenizer),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@

namespace rnexecutorch::models::speech_to_text::asr {

using namespace types;

class ASR {
public:
explicit ASR(const models::BaseModel *encoder,
const models::BaseModel *decoder,
const TokenizerModule *tokenizer);
std::vector<Segment> transcribe(std::span<const float> waveform,
const DecodingOptions &options) const;
std::vector<types::Segment>
transcribe(std::span<const float> waveform,
const types::DecodingOptions &options) const;
std::vector<float> encode(std::span<const float> waveform) const;
std::vector<float> decode(std::span<int32_t> tokens,
std::span<float> encoderOutput) const;
Expand All @@ -43,16 +42,18 @@ class ASR {
// Number of mel frames output by the encoder (derived from input spectrogram)
constexpr static int32_t kNumFrames = 1500;

std::vector<int32_t> getInitialSequence(const DecodingOptions &options) const;
GenerationResult generate(std::span<const float> waveform, float temperature,
const DecodingOptions &options) const;
std::vector<Segment>
std::vector<int32_t>
getInitialSequence(const types::DecodingOptions &options) const;
types::GenerationResult generate(std::span<const float> waveform,
float temperature,
const types::DecodingOptions &options) const;
std::vector<types::Segment>
generateWithFallback(std::span<const float> waveform,
const DecodingOptions &options) const;
std::vector<Segment>
const types::DecodingOptions &options) const;
std::vector<types::Segment>
calculateWordLevelTimestamps(std::span<const int32_t> tokens,
std::span<const float> waveform) const;
std::vector<Word>
std::vector<types::Word>
estimateWordLevelTimestampsLinear(std::span<const int32_t> tokens,
int32_t start, int32_t end) const;
float getCompressionRatio(const std::string &text) const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

namespace rnexecutorch::models::speech_to_text::stream {

using namespace types;

void HypothesisBuffer::insert(std::span<const Word> newWords, float offset) {
this->fresh.clear();
for (const auto &word : newWords) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,19 @@

namespace rnexecutorch::models::speech_to_text::stream {

using namespace types;

class HypothesisBuffer {
public:
void insert(std::span<const Word> newWords, float offset);
std::deque<Word> flush();
void insert(std::span<const types::Word> newWords, float offset);
std::deque<types::Word> flush();
void popCommitted(float time);
std::deque<Word> complete() const;
std::deque<types::Word> complete() const;

private:
float lastCommittedTime = 0.0f;

std::deque<Word> committedInBuffer;
std::deque<Word> buffer;
std::deque<Word> fresh;
std::deque<types::Word> committedInBuffer;
std::deque<types::Word> buffer;
std::deque<types::Word> fresh;
};

} // namespace rnexecutorch::models::speech_to_text::stream
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

namespace rnexecutorch::models::speech_to_text::stream {

using namespace asr;
using namespace types;

OnlineASRProcessor::OnlineASRProcessor(const ASR *asr) : asr(asr) {}

void OnlineASRProcessor::insertAudioChunk(std::span<const float> audio) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,28 @@

namespace rnexecutorch::models::speech_to_text::stream {

using namespace asr;
using namespace types;

class OnlineASRProcessor {
public:
explicit OnlineASRProcessor(const ASR *asr);
explicit OnlineASRProcessor(const asr::ASR *asr);

void insertAudioChunk(std::span<const float> audio);
ProcessResult processIter(const DecodingOptions &options);
types::ProcessResult processIter(const types::DecodingOptions &options);
std::string finish();

std::vector<float> audioBuffer;

private:
const ASR *asr;
const asr::ASR *asr;
constexpr static int32_t kSamplingRate = 16000;

HypothesisBuffer hypothesisBuffer;
float bufferTimeOffset = 0.0f;
std::vector<Word> committed;
std::vector<types::Word> committed;

void chunkCompletedSegment(std::span<const Segment> res);
void chunkCompletedSegment(std::span<const types::Segment> res);
void chunkAt(float time);

std::string toFlush(const std::deque<Word> &words) const;
std::string toFlush(const std::deque<types::Word> &words) const;
};

} // namespace rnexecutorch::models::speech_to_text::stream
2 changes: 1 addition & 1 deletion packages/react-native-executorch/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "react-native-executorch",
"version": "0.5.4",
"version": "0.5.5",
"description": "An easy way to run AI models in React Native with ExecuTorch",
"source": "./src/index.ts",
"main": "./lib/module/index.js",
Expand Down