software-mansion
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp‎
Lines changed: 6 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h‎
Lines changed: 2 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp‎
Lines changed: 52 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h‎
Lines changed: 26 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/data_processing/FileUtils.h‎
Lines changed: 16 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/data_processing/FileUtils.h‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h‎
Lines changed: 20 additions & 7 deletions b/‎packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h‎
Lines changed: 20 additions & 7 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 28 additions & 3 deletions b/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 28 additions & 3 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h‎
Lines changed: 13 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h‎
Lines changed: 13 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp‎
Lines changed: 3 additions & 4 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/BaseModel.cpp‎
Lines changed: 3 additions & 4 deletions
@@ -1,5 +1,6 @@
 #include "RnExecutorchInstaller.h"
 
+#include <rnexecutorch/TokenizerModule.h>
 #include <rnexecutorch/host_objects/JsiConversions.h>
 #include <rnexecutorch/models/classification/Classification.h>
 #include <rnexecutorch/models/image_segmentation/ImageSegmentation.h>
@@ -42,6 +43,11 @@ void RnExecutorchInstaller::injectJSIBindings(
       *jsiRuntime, "loadExecutorchModule",
       RnExecutorchInstaller::loadModel<BaseModel>(jsiRuntime, jsCallInvoker,
                                                   "loadExecutorchModule"));
+
+  jsiRuntime->global().setProperty(
+      *jsiRuntime, "loadTokenizerModule",
+      RnExecutorchInstaller::loadModel<TokenizerModule>(
+          jsiRuntime, jsCallInvoker, "loadTokenizerModule"));
 }
 
 } // namespace rnexecutorch
@@ -28,6 +28,8 @@ REGISTER_CONSTRUCTOR(ObjectDetection, std::string,
                      std::shared_ptr<react::CallInvoker>);
 REGISTER_CONSTRUCTOR(BaseModel, std::string,
                      std::shared_ptr<react::CallInvoker>);
+REGISTER_CONSTRUCTOR(TokenizerModule, std::string,
+                     std::shared_ptr<react::CallInvoker>);
 
 using namespace facebook;
 
 
@@ -0,0 +1,52 @@
+#include "TokenizerModule.h"
+#include <executorch/extension/module/module.h>
+#include <filesystem>
+#include <rnexecutorch/Log.h>
+#include <rnexecutorch/data_processing/FileUtils.h>
+
+namespace rnexecutorch {
+using namespace facebook;
+
+TokenizerModule::TokenizerModule(
+    std::string source, std::shared_ptr<react::CallInvoker> callInvoker)
+    : memorySizeLowerBound(std::filesystem::file_size(source)),
+      tokenizer(tokenizers::Tokenizer::FromBlobJSON(
+          fileutils::loadBytesFromFile(source))) {}
+
+void TokenizerModule::ensureTokenizerLoaded(
+    const std::string &methodName) const {
+  if (!tokenizer) {
+    throw std::runtime_error(
+        methodName + " function was called on an uninitialized tokenizer!");
+  }
+}
+
+std::vector<int32_t> TokenizerModule::encode(std::string s) const {
+  ensureTokenizerLoaded("encode");
+  return tokenizer->Encode(s);
+}
+
+std::string TokenizerModule::decode(std::vector<int32_t> vec,
+                                    bool skipSpecialTokens) const {
+  ensureTokenizerLoaded("decode");
+  return tokenizer->Decode(vec, skipSpecialTokens);
+}
+
+size_t TokenizerModule::getVocabSize() const {
+  ensureTokenizerLoaded("getVocabSize");
+  return tokenizer->GetVocabSize();
+}
+
+std::string TokenizerModule::idToToken(int32_t tokenId) const {
+  ensureTokenizerLoaded("idToToken");
+  return tokenizer->IdToToken(tokenId);
+}
+
+int32_t TokenizerModule::tokenToId(std::string token) const {
+  ensureTokenizerLoaded("tokenToId");
+  return tokenizer->TokenToId(token);
+}
+std::size_t TokenizerModule::getMemoryLowerBound() const noexcept {
+  return memorySizeLowerBound;
+}
+} // namespace rnexecutorch
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <ReactCommon/CallInvoker.h>
+#include <string>
+#include <tokenizers-cpp/tokenizers_cpp.h>
+
+namespace rnexecutorch {
+using namespace facebook;
+
+class TokenizerModule {
+public:
+  explicit TokenizerModule(std::string source,
+                           std::shared_ptr<react::CallInvoker> callInvoker);
+  std::vector<int32_t> encode(std::string s) const;
+  std::string decode(std::vector<int32_t> vec, bool skipSpecialTokens) const;
+  std::string idToToken(int32_t tokenId) const;
+  int32_t tokenToId(std::string token) const;
+  std::size_t getVocabSize() const;
+  std::size_t getMemoryLowerBound() const noexcept;
+
+private:
+  void ensureTokenizerLoaded(const std::string &methodName) const;
+  std::unique_ptr<tokenizers::Tokenizer> tokenizer;
+  const std::size_t memorySizeLowerBound{0};
+};
+} // namespace rnexecutorch
@@ -1,6 +1,8 @@
 #pragma once
 
 #include <chrono>
+#include <filesystem>
+#include <fstream>
 #include <string>
 
 namespace rnexecutorch::fileutils {
@@ -11,4 +13,18 @@ inline std::string getTimeID() {
                             .count());
 }
 
+inline std::string loadBytesFromFile(const std::string &path) {
+  std::ifstream fs(path, std::ios::in | std::ios::binary);
+  if (fs.fail()) {
+    throw std::runtime_error("Failed to open tokenizer file");
+  }
+  std::string data;
+  fs.seekg(0, std::ios::end);
+  size_t size = static_cast<size_t>(fs.tellg());
+  fs.seekg(0, std::ios::beg);
+  data.resize(size);
+  fs.read(data.data(), size);
+  return data;
+};
+
 } // namespace rnexecutorch::fileutils
@@ -41,6 +41,21 @@ inline std::string getValue<std::string>(const jsi::Value &val,
   return val.getString(runtime).utf8(runtime);
 }
 
+template <>
+inline std::vector<int32_t>
+getValue<std::vector<int32_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
+  jsi::Array array = val.asObject(runtime).asArray(runtime);
+  size_t length = array.size(runtime);
+  std::vector<int32_t> result;
+  result.reserve(length);
+
+  for (size_t i = 0; i < length; ++i) {
+    jsi::Value element = array.getValueAtIndex(runtime, i);
+    result.push_back(getValue<int32_t>(element, runtime));
+  }
+  return result;
+}
+
 template <>
 inline JSTensorViewIn getValue<JSTensorViewIn>(const jsi::Value &val,
                                                jsi::Runtime &runtime) {
@@ -182,20 +197,18 @@ getJsiValue(const std::vector<std::shared_ptr<OwningArrayBuffer>> &vec,
   return jsi::Value(runtime, array);
 }
 
-inline jsi::Value
-getJsiValue(const std::vector<std::shared_ptr<JSTensorViewOut>> &vec,
-            jsi::Runtime &runtime) {
+inline jsi::Value getJsiValue(const std::vector<JSTensorViewOut> &vec,
+                              jsi::Runtime &runtime) {
   jsi::Array array(runtime, vec.size());
   for (size_t i = 0; i < vec.size(); i++) {
     jsi::Object tensorObj(runtime);
 
-    tensorObj.setProperty(runtime, "sizes",
-                          getJsiValue(vec[i]->sizes, runtime));
+    tensorObj.setProperty(runtime, "sizes", getJsiValue(vec[i].sizes, runtime));
 
     tensorObj.setProperty(runtime, "scalarType",
-                          jsi::Value(static_cast<int>(vec[i]->scalarType)));
+                          jsi::Value(static_cast<int>(vec[i].scalarType)));
 
-    jsi::ArrayBuffer arrayBuffer(runtime, vec[i]->dataPtr);
+    jsi::ArrayBuffer arrayBuffer(runtime, vec[i].dataPtr);
     tensorObj.setProperty(runtime, "dataPtr", arrayBuffer);
 
     array.setValueAtIndex(runtime, i, tensorObj);
 
@@ -7,7 +7,7 @@
 
 #include <ReactCommon/CallInvoker.h>
 
-#include <rnexecutorch/Log.h>
+#include <rnexecutorch/TokenizerModule.h>
 #include <rnexecutorch/host_objects/JSTensorViewOut.h>
 #include <rnexecutorch/host_objects/JsiConversions.h>
 #include <rnexecutorch/jsi/JsiHostObject.h>
@@ -45,6 +45,31 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        promiseHostFunction<&Model::generate>,
                                        "generate"));
     }
+
+    if constexpr (meta::HasEncode<Model>) {
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       promiseHostFunction<&Model::encode>,
+                                       "encode"));
+    }
+
+    if constexpr (meta::SameAs<Model, TokenizerModule>) {
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       promiseHostFunction<&Model::encode>,
+                                       "encode"));
+
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       promiseHostFunction<&Model::decode>,
+                                       "decode"));
+      addFunctions(JSI_EXPORT_FUNCTION(
+          ModelHostObject<Model>, promiseHostFunction<&Model::getVocabSize>,
+          "getVocabSize"));
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       promiseHostFunction<&Model::idToToken>,
+                                       "idToToken"));
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       promiseHostFunction<&Model::tokenToId>,
+                                       "tokenToId"));
+    }
   }
 
   // A generic host function that resolves a promise with a result of a
@@ -76,8 +101,8 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
             std::thread([this, promise,
                          argsConverted = std::move(argsConverted)]() {
               try {
-                auto result =
-                    std::apply(std::bind_front(FnPtr, model), argsConverted);
+                auto result = std::apply(std::bind_front(FnPtr, model),
+                                         std::move(argsConverted));
                 // The result is copied. It should either be quickly copiable,
                 // or passed with a shared_ptr.
                 callInvoker->invokeAsync([promise,
 
@@ -14,6 +14,11 @@ constexpr std::size_t getArgumentCount(R (Model::*f)(Types...)) {
   return sizeof...(Types);
 }
 
+template <typename Model, typename R, typename... Types>
+constexpr std::size_t getArgumentCount(R (Model::*f)(Types...) const) {
+  return sizeof...(Types);
+}
+
 template <typename... Types, std::size_t... I>
 std::tuple<Types...> fillTupleFromArgs(std::index_sequence<I...>,
                                        const jsi::Value *args,
@@ -34,4 +39,12 @@ std::tuple<Types...> createArgsTupleFromJsi(R (Model::*f)(Types...),
   return fillTupleFromArgs<Types...>(std::index_sequence_for<Types...>{}, args,
                                      runtime);
 }
+
+template <typename Model, typename R, typename... Types>
+std::tuple<Types...> createArgsTupleFromJsi(R (Model::*f)(Types...) const,
+                                            const jsi::Value *args,
+                                            jsi::Runtime &runtime) {
+  return fillTupleFromArgs<Types...>(std::index_sequence_for<Types...>{}, args,
+                                     runtime);
+}
 } // namespace rnexecutorch::meta
@@ -8,11 +8,24 @@ namespace rnexecutorch::meta {
 template <typename T, typename Base>
 concept DerivedFromOrSameAs = std::is_base_of_v<Base, T>;
 
+template <typename T, typename Base>
+concept SameAs = std::is_same_v<Base, T>;
+
 template <typename T>
 concept HasGenerate = requires(T t) {
   { &T::generate };
 };
 
+template <typename T>
+concept HasEncode = requires(T t) {
+  { &T::encode };
+};
+
+template <typename T>
+concept HasDecode = requires(T t) {
+  { &T::decode };
+};
+
 template <typename T>
 concept IsNumeric = std::is_arithmetic_v<T>;
 
 
@@ -83,7 +83,7 @@ BaseModel::getAllInputShapes(std::string methodName) {
   return output;
 }
 
-std::vector<std::shared_ptr<JSTensorViewOut>>
+std::vector<JSTensorViewOut>
 BaseModel::forwardJS(const std::vector<JSTensorViewIn> tensorViewVec) {
   if (!module) {
     throw std::runtime_error("Model not loaded: Cannot perform forward pass");
@@ -114,7 +114,7 @@ BaseModel::forwardJS(const std::vector<JSTensorViewIn> tensorViewVec) {
   }
 
   auto &outputs = result.get();
-  std::vector<std::shared_ptr<JSTensorViewOut>> output;
+  std::vector<JSTensorViewOut> output;
   output.reserve(outputs.size());
 
   // Convert ET outputs to a vector of JSTensorViewOut which are later
@@ -125,8 +125,7 @@ BaseModel::forwardJS(const std::vector<JSTensorViewIn> tensorViewVec) {
     size_t bufferSize = outputTensor.numel() * outputTensor.element_size();
     auto buffer = std::make_shared<OwningArrayBuffer>(bufferSize);
     std::memcpy(buffer->data(), outputTensor.const_data_ptr(), bufferSize);
-    auto jsTensor = std::make_shared<JSTensorViewOut>(
-        sizes, outputTensor.scalar_type(), buffer);
+    auto jsTensor = JSTensorViewOut(sizes, outputTensor.scalar_type(), buffer);
     output.emplace_back(jsTensor);
   }
   return output;