Skip to content

Commit c168535

Browse files
msluszniakradko93barhancchmjkb
authored
chore: Release v0.8.3 (#1072)
## Summary Patch release v0.8.3 — cherry-picks the following bug fixes from `main` into `release/0.8`: - fix: add mutex to VoiceActivityDetection to prevent race between `generate()` and `unload()` (#1056) - fix: prevent apps from crashing when LLMs are loaded (#1063) - fix: add inference mutex to Text Embedding and Text-to-Image (#1060) ## Checklist - [x] Commits cherry-picked from `main` in chronological order - [x] Version bumped to `0.8.3` in `packages/react-native-executorch/package.json` 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Radek Czemerys <7029942+radko93@users.noreply.github.com> Co-authored-by: Bartosz Hanc <bartosz.hanc02@gmail.com> Co-authored-by: Jakub Chmura <92989966+chmjkb@users.noreply.github.com>
1 parent 2c17be8 commit c168535

File tree

9 files changed

+35
-5
lines changed

9 files changed

+35
-5
lines changed

packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,9 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
375375
// We need to dispatch a thread if we want the function to be
376376
// asynchronous. In this thread all accesses to jsi::Runtime need to
377377
// be done via the callInvoker.
378-
threads::GlobalThreadPool::detach([this, promise,
378+
threads::GlobalThreadPool::detach([model = this->model,
379+
callInvoker = this->callInvoker,
380+
promise,
379381
argsConverted =
380382
std::move(argsConverted)]() {
381383
try {

packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,14 @@ TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) {
3535
return {.inputIds = inputIds64, .attentionMask = attentionMask};
3636
}
3737

38+
void TextEmbeddings::unload() noexcept {
39+
std::scoped_lock lock(inference_mutex_);
40+
BaseModel::unload();
41+
}
42+
3843
std::shared_ptr<OwningArrayBuffer>
3944
TextEmbeddings::generate(const std::string input) {
45+
std::scoped_lock lock(inference_mutex_);
4046
auto preprocessed = preprocess(input);
4147

4248
std::vector<int32_t> tokenIdsShape = {

packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
4+
#include <mutex>
45
#include <rnexecutorch/TokenizerModule.h>
56
#include <rnexecutorch/models/embeddings/BaseEmbeddings.h>
67

@@ -20,8 +21,10 @@ class TextEmbeddings final : public BaseEmbeddings {
2021
[[nodiscard(
2122
"Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
2223
generate(const std::string input);
24+
void unload() noexcept;
2325

2426
private:
27+
mutable std::mutex inference_mutex_;
2528
std::vector<std::vector<int32_t>> inputShapes;
2629
TokenIdsWithAttentionMask preprocess(const std::string &input);
2730
std::unique_ptr<TokenizerModule> tokenizer;

packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ using executorch::runtime::Error;
2020
LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
2121
std::vector<std::string> capabilities,
2222
std::shared_ptr<react::CallInvoker> callInvoker)
23-
: BaseModel(modelSource, callInvoker, Module::LoadMode::File) {
23+
: BaseModel(modelSource, callInvoker, Module::LoadMode::Mmap) {
2424

2525
if (capabilities.empty()) {
2626
runner_ =
@@ -42,8 +42,12 @@ LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
4242
throw RnExecutorchError(loadResult, "Failed to load LLM runner");
4343
}
4444

45-
memorySizeLowerBound = fs::file_size(fs::path(modelSource)) +
46-
fs::file_size(fs::path(tokenizerSource));
45+
// I am purposefully not adding file size of the model here. The reason is
46+
// that Hermes would crash the app if we try to alloc too much memory here.
47+
// Also, given we're using mmap, the true memory consumption of a model is not
48+
// really equal to the size of the model. The size of the tokenizer file is a
49+
// hint to the GC that this object might be worth getting rid of.
50+
memorySizeLowerBound = fs::file_size(fs::path(tokenizerSource));
4751
}
4852

4953
std::string LLM::generate(std::string input,

packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ std::shared_ptr<OwningArrayBuffer>
5858
TextToImage::generate(std::string input, int32_t imageSize,
5959
size_t numInferenceSteps, int32_t seed,
6060
std::shared_ptr<jsi::Function> callback) {
61+
std::scoped_lock lock(inference_mutex_);
6162
setImageSize(imageSize);
6263
setSeed(seed);
6364

@@ -137,6 +138,7 @@ size_t TextToImage::getMemoryLowerBound() const noexcept {
137138
}
138139

139140
void TextToImage::unload() noexcept {
141+
std::scoped_lock lock(inference_mutex_);
140142
encoder->unload();
141143
unet->unload();
142144
decoder->unload();

packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <memory>
4+
#include <mutex>
45
#include <string>
56
#include <vector>
67

@@ -49,6 +50,7 @@ class TextToImage final {
4950
static constexpr float guidanceScale = 7.5f;
5051
static constexpr float latentsScale = 0.18215f;
5152
bool interrupted = false;
53+
mutable std::mutex inference_mutex_;
5254

5355
std::shared_ptr<react::CallInvoker> callInvoker;
5456
std::unique_ptr<Scheduler> scheduler;

packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,14 @@ VoiceActivityDetection::preprocess(std::span<float> waveform) const {
5454
return frameBuffer;
5555
}
5656

57+
void VoiceActivityDetection::unload() noexcept {
58+
std::scoped_lock lock(inference_mutex_);
59+
BaseModel::unload();
60+
}
61+
5762
std::vector<types::Segment>
5863
VoiceActivityDetection::generate(std::span<float> waveform) const {
64+
std::scoped_lock lock(inference_mutex_);
5965

6066
auto windowedInput = preprocess(waveform);
6167
auto [chunksNumber, remainder] = std::div(

packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <executorch/extension/tensor/tensor.h>
66
#include <executorch/extension/tensor/tensor_ptr.h>
77
#include <executorch/runtime/core/evalue.h>
8+
#include <mutex>
89
#include <span>
910

1011
#include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
@@ -23,7 +24,11 @@ class VoiceActivityDetection : public BaseModel {
2324
[[nodiscard("Registered non-void function")]] std::vector<types::Segment>
2425
generate(std::span<float> waveform) const;
2526

27+
void unload() noexcept;
28+
2629
private:
30+
mutable std::mutex inference_mutex_;
31+
2732
std::vector<std::array<float, constants::kPaddedWindowSize>>
2833
preprocess(std::span<float> waveform) const;
2934
std::vector<types::Segment> postprocess(const std::vector<float> &scores,

packages/react-native-executorch/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "react-native-executorch",
3-
"version": "0.8.2",
3+
"version": "0.8.3",
44
"description": "An easy way to run AI models in React Native with ExecuTorch",
55
"source": "./src/index.ts",
66
"main": "./lib/module/index.js",

0 commit comments

Comments
 (0)