fix: add inference mutex to Text Embedding and Text-to-Image (#1060)

barhanc · web-flow · commit 26f88f64f5ff · 2026-04-10T10:55:52.000+02:00
## Description Adds thread-safety to Text Embeddings and Text-to-Image models mirroring what was already done for models inheriting from VisionModel and VAD. ### Introduces a breaking change? - [ ] Yes - [x] No ### Type of change - [x] Bug fix (change which fixes an issue) - [ ] New feature (change which adds functionality) - [ ] Documentation update (improves or adds clarity to existing documentation) - [ ] Other (chores, tests, code style improvements etc.) ### Tested on - [x] iOS - [x] Android ### Testing instructions Use the following app screen and try to trigger the race condition before fix and verify that it doesn't occur after applying the fix. You can use `adb logcat | grep -E "FATAL|SIGSEGV|backtrace"` to observe the error on Android. ```ts import React, { useState } from 'react'; import { Button, ScrollView, Text, View } from 'react-native'; import { BK_SDM_TINY_VPRED_512, TextToImageModule, } from 'react-native-executorch'; import { CLIP_VIT_BASE_PATCH32_TEXT, TextEmbeddingsModule, } from 'react-native-executorch'; import { FSMN_VAD, VADModule } from 'react-native-executorch'; const DELAY_MS = 50; // tune this so that forward() is running when delete() is called const MODEL_VAD = { name: 'VAD', load: (onProgress: (p: number) => void) => VADModule.fromModelName(FSMN_VAD, onProgress), input: () => new Float32Array(16000 * 300), }; const MODEL_TEXT_EMBEDDINGS = { name: 'TextEmbeddings', load: (onProgress: (p: number) => void) => TextEmbeddingsModule.fromModelName(CLIP_VIT_BASE_PATCH32_TEXT, onProgress), input: () => 'hello world', }; const MODEL_TEXT_TO_IMAGE = { name: 'TextToImage', load: (onProgress: (p: number) => void) => TextToImageModule.fromModelName(BK_SDM_TINY_VPRED_512, onProgress), input: () => 'a red apple', }; const MODEL = MODEL_TEXT_EMBEDDINGS; export default function RaceTest() { const [lines, setLines] = useState<string[]>([]); const [downloadProgress, setDownloadProgress] = useState<number | null>(null); const log = (line: string) => setLines((prev) => [line, ...prev]); const run = async () => { setLines([]); setDownloadProgress(null); log(`model: ${MODEL.name}`); log('loading'); const model = await MODEL.load((p) => setDownloadProgress(p)); setDownloadProgress(null); log('running forward()'); const result = model.forward(MODEL.input()); log(`waiting ${DELAY_MS} ms`); await new Promise((r) => setTimeout(r, DELAY_MS)); log('calling delete()'); model.delete(); try { await result; log('forward() completed successfully'); } catch (e: any) { log('error: ' + (e?.message ?? String(e))); } }; return ( <View> <Button title="Run Race Test" onPress={run} /> {downloadProgress !== null && ( <Text>downloading: {Math.round(downloadProgress * 100)}%</Text> )} <ScrollView> {lines.map((l, i) => ( <Text key={i}>{l}</Text> ))} </ScrollView> </View> ); } ``` ### Screenshots  ### Related issues #1055 ### Checklist - [x] I have performed a self-review of my code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have updated the documentation accordingly - [x] My changes generate no new warnings ### Additional notes
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -375,7 +375,9 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
             // We need to dispatch a thread if we want the function to be
             // asynchronous. In this thread all accesses to jsi::Runtime need to
             // be done via the callInvoker.
-            threads::GlobalThreadPool::detach([this, promise,
+            threads::GlobalThreadPool::detach([model = this->model,
+                                               callInvoker = this->callInvoker,
+                                               promise,
                                                argsConverted =
                                                    std::move(argsConverted)]() {
               try {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
@@ -35,8 +35,14 @@ TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) {
   return {.inputIds = inputIds64, .attentionMask = attentionMask};
 }
 
+void TextEmbeddings::unload() noexcept {
+  std::scoped_lock lock(inference_mutex_);
+  BaseModel::unload();
+}
+
 std::shared_ptr<OwningArrayBuffer>
 TextEmbeddings::generate(const std::string input) {
+  std::scoped_lock lock(inference_mutex_);
   auto preprocessed = preprocess(input);
 
   std::vector<int32_t> tokenIdsShape = {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
+#include <mutex>
 #include <rnexecutorch/TokenizerModule.h>
 #include <rnexecutorch/models/embeddings/BaseEmbeddings.h>
 
@@ -20,8 +21,10 @@ class TextEmbeddings final : public BaseEmbeddings {
   [[nodiscard(
       "Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
   generate(const std::string input);
+  void unload() noexcept;
 
 private:
+  mutable std::mutex inference_mutex_;
   std::vector<std::vector<int32_t>> inputShapes;
   TokenIdsWithAttentionMask preprocess(const std::string &input);
   std::unique_ptr<TokenizerModule> tokenizer;
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp
@@ -58,6 +58,7 @@ std::shared_ptr<OwningArrayBuffer>
 TextToImage::generate(std::string input, int32_t imageSize,
                       size_t numInferenceSteps, int32_t seed,
                       std::shared_ptr<jsi::Function> callback) {
+  std::scoped_lock lock(inference_mutex_);
   setImageSize(imageSize);
   setSeed(seed);
 
@@ -137,6 +138,7 @@ size_t TextToImage::getMemoryLowerBound() const noexcept {
 }
 
 void TextToImage::unload() noexcept {
+  std::scoped_lock lock(inference_mutex_);
   encoder->unload();
   unet->unload();
   decoder->unload();
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <memory>
+#include <mutex>
 #include <string>
 #include <vector>
 
@@ -49,6 +50,7 @@ class TextToImage final {
   static constexpr float guidanceScale = 7.5f;
   static constexpr float latentsScale = 0.18215f;
   bool interrupted = false;
+  mutable std::mutex inference_mutex_;
 
   std::shared_ptr<react::CallInvoker> callInvoker;
   std::unique_ptr<Scheduler> scheduler;