diff --git a/packages/react-native-executorch/android/CMakeLists.txt b/packages/react-native-executorch/android/CMakeLists.txt
index 96164c49c6..d35311d7a4 100644
--- a/packages/react-native-executorch/android/CMakeLists.txt
+++ b/packages/react-native-executorch/android/CMakeLists.txt
@@ -14,4 +14,28 @@ set(COMMON_CPP_DIR "${CMAKE_SOURCE_DIR}/../common")
 set(LIBS_DIR "${CMAKE_SOURCE_DIR}/../third-party/android/libs")
 set(INCLUDE_DIR "${CMAKE_SOURCE_DIR}/../third-party/include")
 
+# FIXME: Below u can see miserable attempts of trying to link tokenizers-cpp 
+# directly into react-native-executorch instead of it being linked against ExecuTorch
+# and then transitively to our library. Please go back to this when we bump ET runtime to the next version.  
+# The problem with directly linking tokenizers-cpp using a submodule is that we get unresolved symbols for 
+# some android logging libraries, which are referenced by sentencepiece.
+
+# set(TOKENIZERS_CPP_DIR "${CMAKE_SOURCE_DIR}/../../../third-party/tokenizers-cpp")
+# add_subdirectory("${TOKENIZERS_CPP_DIR}" tokenizers-cpp)
+
+# # Link Android log library to sentencepiece targets
+# if(TARGET sentencepiece-static)
+#   target_link_libraries(sentencepiece-static INTERFACE log)
+# endif()
+# if(TARGET sentencepiece_train-static)
+#   target_link_libraries(sentencepiece_train-static INTERFACE log)
+# endif()
+
+# # Link log library to sentencepiece executables
+# foreach(exe spm_encode spm_decode spm_normalize spm_train spm_export_vocab)
+#   if(TARGET ${exe})
+#     target_link_libraries(${exe} log)
+#   endif()
+# endforeach()
+
 add_subdirectory("${ANDROID_CPP_DIR}")
\ No newline at end of file
diff --git a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt b/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt
deleted file mode 100644
index 04205ddcca..0000000000
--- a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt
+++ /dev/null
@@ -1,63 +0,0 @@
-package com.swmansion.rnexecutorch
-
-import android.util.Log
-import com.facebook.react.bridge.Promise
-import com.facebook.react.bridge.ReactApplicationContext
-import org.pytorch.executorch.extension.llm.LlmCallback
-import org.pytorch.executorch.extension.llm.LlmModule
-
-class LLM(
-  reactContext: ReactApplicationContext,
-) : NativeLLMSpec(reactContext),
-  LlmCallback {
-  private var llmModule: LlmModule? = null
-
-  override fun getName(): String = NAME
-
-  override fun initialize() {
-    super.initialize()
-  }
-
-  override fun onResult(result: String) {
-    emitOnToken(result)
-  }
-
-  override fun onStats(tps: Float) {
-    Log.d("rn_executorch", "TPS: $tps")
-  }
-
-  override fun loadLLM(
-    modelSource: String,
-    tokenizerSource: String,
-    promise: Promise,
-  ) {
-    try {
-      llmModule = LlmModule(modelSource, tokenizerSource, 0.7f)
-      promise.resolve("Model loaded successfully")
-    } catch (e: Exception) {
-      promise.reject("Model loading failed", e.message)
-    }
-  }
-
-  override fun forward(
-    input: String,
-    promise: Promise,
-  ) {
-    Thread {
-      llmModule!!.generate(input, this)
-      promise.resolve("Inference completed successfully")
-    }.start()
-  }
-
-  override fun interrupt() {
-    llmModule!!.stop()
-  }
-
-  override fun releaseResources() {
-    llmModule = null
-  }
-
-  companion object {
-    const val NAME = "LLM"
-  }
-}
diff --git a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt b/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt
index bcc8bed198..2deade1291 100644
--- a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt
+++ b/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt
@@ -14,9 +14,7 @@ class RnExecutorchPackage : TurboReactPackage() {
     name: String,
     reactContext: ReactApplicationContext,
   ): NativeModule? =
-    if (name == LLM.NAME) {
-      LLM(reactContext)
-    } else if (name == SpeechToText.NAME) {
+    if (name == SpeechToText.NAME) {
       SpeechToText(reactContext)
     } else if (name == OCR.NAME) {
       OCR(reactContext)
@@ -31,16 +29,6 @@ class RnExecutorchPackage : TurboReactPackage() {
   override fun getReactModuleInfoProvider(): ReactModuleInfoProvider =
     ReactModuleInfoProvider {
       val moduleInfos: MutableMap<String, ReactModuleInfo> = HashMap()
-      moduleInfos[LLM.NAME] =
-        ReactModuleInfo(
-          LLM.NAME,
-          LLM.NAME,
-          false, // canOverrideExistingModule
-          false, // needsEagerInit
-          true, // hasConstants
-          false, // isCxxModule
-          true,
-        )
       moduleInfos[SpeechToText.NAME] =
         ReactModuleInfo(
           SpeechToText.NAME,
diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp
index 84aa984003..2fc736ae88 100644
--- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp
@@ -5,6 +5,7 @@
 #include <rnexecutorch/models/classification/Classification.h>
 #include <rnexecutorch/models/image_embeddings/ImageEmbeddings.h>
 #include <rnexecutorch/models/image_segmentation/ImageSegmentation.h>
+#include <rnexecutorch/models/llm/LLM.h>
 #include <rnexecutorch/models/object_detection/ObjectDetection.h>
 #include <rnexecutorch/models/style_transfer/StyleTransfer.h>
 #include <rnexecutorch/models/text_embeddings/TextEmbeddings.h>
@@ -55,10 +56,15 @@ void RnExecutorchInstaller::injectJSIBindings(
       *jsiRuntime, "loadImageEmbeddings",
       RnExecutorchInstaller::loadModel<ImageEmbeddings>(
           jsiRuntime, jsCallInvoker, "loadImageEmbeddings"));
+
   jsiRuntime->global().setProperty(
       *jsiRuntime, "loadTextEmbeddings",
       RnExecutorchInstaller::loadModel<TextEmbeddings>(
           jsiRuntime, jsCallInvoker, "loadTextEmbeddings"));
+
+  jsiRuntime->global().setProperty(*jsiRuntime, "loadLLM",
+                                   RnExecutorchInstaller::loadModel<LLM>(
+                                       jsiRuntime, jsCallInvoker, "loadLLM"));
 }
 
 } // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
index 69fb579b53..743129fece 100644
--- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
+++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
@@ -30,9 +30,12 @@ REGISTER_CONSTRUCTOR(BaseModel, std::string,
                      std::shared_ptr<react::CallInvoker>);
 REGISTER_CONSTRUCTOR(TokenizerModule, std::string,
                      std::shared_ptr<react::CallInvoker>);
-REGISTER_CONSTRUCTOR(ImageEmbeddings, std::string, std::shared_ptr<react::CallInvoker>);
+REGISTER_CONSTRUCTOR(ImageEmbeddings, std::string,
+                     std::shared_ptr<react::CallInvoker>);
 REGISTER_CONSTRUCTOR(TextEmbeddings, std::string, std::string,
                      std::shared_ptr<react::CallInvoker>);
+REGISTER_CONSTRUCTOR(LLM, std::string, std::string,
+                     std::shared_ptr<react::CallInvoker>);
 
 using namespace facebook;
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp b/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp
index 1773042925..4bbfc4e389 100644
--- a/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp
@@ -5,7 +5,6 @@
 
 #include <ada/ada.h>
 
-#include <rnexecutorch/Log.h>
 #include <rnexecutorch/RnExecutorchInstaller.h>
 #include <rnexecutorch/data_processing/FileUtils.h>
 #include <rnexecutorch/data_processing/base64.h>
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index 089044e4ec..2392a861f6 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -41,6 +41,14 @@ inline std::string getValue<std::string>(const jsi::Value &val,
   return val.getString(runtime).utf8(runtime);
 }
 
+template <>
+inline std::shared_ptr<jsi::Function>
+getValue<std::shared_ptr<jsi::Function>>(const jsi::Value &val,
+                                         jsi::Runtime &runtime) {
+  return std::make_shared<jsi::Function>(
+      val.asObject(runtime).asFunction(runtime));
+}
+
 template <>
 inline std::vector<int32_t>
 getValue<std::vector<int32_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index fa3ec3bab7..ca50fa2a45 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -3,6 +3,7 @@
 #include <cstdio>
 #include <string>
 #include <tuple>
+#include <type_traits>
 #include <vector>
 
 #include <ReactCommon/CallInvoker.h>
@@ -15,6 +16,7 @@
 #include <rnexecutorch/metaprogramming/FunctionHelpers.h>
 #include <rnexecutorch/metaprogramming/TypeConcepts.h>
 #include <rnexecutorch/models/BaseModel.h>
+#include <rnexecutorch/models/llm/LLM.h>
 
 namespace rnexecutorch {
 
@@ -70,6 +72,60 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        promiseHostFunction<&Model::tokenToId>,
                                        "tokenToId"));
     }
+
+    if constexpr (meta::SameAs<Model, LLM>) {
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       promiseHostFunction<&Model::generate>,
+                                       "generate"));
+
+      addFunctions(JSI_EXPORT_FUNCTION(
+          ModelHostObject<Model>, synchronousHostFunction<&Model::interrupt>,
+          "interrupt"));
+
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>, unload, "unload"));
+    }
+  }
+
+  // A generic host function that runs synchronously, works analogously to the
+  // generic promise host function.
+  template <auto FnPtr> JSI_HOST_FUNCTION(synchronousHostFunction) {
+    constexpr std::size_t functionArgCount = meta::getArgumentCount(FnPtr);
+    if (functionArgCount != count) {
+      char errorMessage[100];
+      std::snprintf(errorMessage, sizeof(errorMessage),
+                    "Argument count mismatch, was expecting: %zu but got: %zu",
+                    functionArgCount, count);
+      throw jsi::JSError(runtime, errorMessage);
+    }
+
+    try {
+      auto argsConverted = meta::createArgsTupleFromJsi(FnPtr, args, runtime);
+
+      if constexpr (std::is_void_v<decltype(std::apply(
+                        std::bind_front(FnPtr, model), argsConverted))>) {
+        // For void functions, just call the function and return undefined
+        std::apply(std::bind_front(FnPtr, model), std::move(argsConverted));
+        return jsi::Value::undefined();
+      } else {
+        // For non-void functions, capture the result and convert it
+        auto result =
+            std::apply(std::bind_front(FnPtr, model), std::move(argsConverted));
+        return jsiconversion::getJsiValue(std::move(result), runtime);
+      }
+    } catch (const std::runtime_error &e) {
+      // This catch should be merged with the next one
+      // (std::runtime_error inherits from std::exception) HOWEVER react
+      // native has broken RTTI which breaks proper exception type
+      // checking. Remove when the following change is present in our
+      // version:
+      // https://github.com/facebook/react-native/commit/3132cc88dd46f95898a756456bebeeb6c248f20e
+      throw jsi::JSError(runtime, e.what());
+    } catch (const std::exception &e) {
+      throw jsi::JSError(runtime, e.what());
+    } catch (...) {
+      throw jsi::JSError(runtime, "Unknown error in synchronous function");
+    }
   }
 
   // A generic host function that resolves a promise with a result of a
@@ -101,15 +157,28 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
             std::thread([this, promise,
                          argsConverted = std::move(argsConverted)]() {
               try {
-                auto result = std::apply(std::bind_front(FnPtr, model),
-                                         std::move(argsConverted));
-                // The result is copied. It should either be quickly copiable,
-                // or passed with a shared_ptr.
-                callInvoker->invokeAsync([promise,
-                                          result](jsi::Runtime &runtime) {
-                  promise->resolve(
-                      jsiconversion::getJsiValue(std::move(result), runtime));
-                });
+                if constexpr (std::is_void_v<decltype(std::apply(
+                                  std::bind_front(FnPtr, model),
+                                  argsConverted))>) {
+                  // For void functions, just call the function and resolve with
+                  // undefined
+                  std::apply(std::bind_front(FnPtr, model),
+                             std::move(argsConverted));
+                  callInvoker->invokeAsync([promise](jsi::Runtime &runtime) {
+                    promise->resolve(jsi::Value::undefined());
+                  });
+                } else {
+                  // For non-void functions, capture the result and convert it
+                  auto result = std::apply(std::bind_front(FnPtr, model),
+                                           std::move(argsConverted));
+                  // The result is copied. It should either be quickly copiable,
+                  // or passed with a shared_ptr.
+                  callInvoker->invokeAsync([promise,
+                                            result](jsi::Runtime &runtime) {
+                    promise->resolve(
+                        jsiconversion::getJsiValue(std::move(result), runtime));
+                  });
+                }
               } catch (const std::runtime_error &e) {
                 // This catch should be merged with the next two
                 // (std::runtime_error and jsi::JSError inherits from
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
new file mode 100644
index 0000000000..43dc8b5941
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
@@ -0,0 +1,57 @@
+#include "LLM.h"
+
+#include <executorch/extension/tensor/tensor.h>
+#include <filesystem>
+
+namespace rnexecutorch {
+using namespace facebook;
+using executorch::extension::TensorPtr;
+using executorch::runtime::Error;
+
+LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
+         std::shared_ptr<react::CallInvoker> callInvoker)
+    : runner(std::make_unique<example::Runner>(modelSource, tokenizerSource)),
+      callInvoker(callInvoker) {
+  auto loadResult = runner->load();
+  if (loadResult != Error::Ok) {
+    throw std::runtime_error("Failed to load LLM runner, error code: " +
+                             std::to_string(static_cast<int>(loadResult)));
+  }
+  memorySizeLowerBound =
+      std::filesystem::file_size(std::filesystem::path(modelSource)) +
+      std::filesystem::file_size(std::filesystem::path(tokenizerSource));
+}
+
+void LLM::generate(std::string input, std::shared_ptr<jsi::Function> callback) {
+  if (!runner || !runner->is_loaded()) {
+    throw std::runtime_error("Runner is not loaded");
+  }
+
+  // Create a native callback that will invoke the JS callback on the JS thread
+  auto nativeCallback = [this, callback](const std::string &token) {
+    callInvoker->invokeAsync([callback, token](jsi::Runtime &runtime) {
+      callback->call(runtime, jsi::String::createFromUtf8(runtime, token));
+    });
+  };
+
+  auto error = runner->generate(input, nativeCallback, {}, false);
+  if (error != executorch::runtime::Error::Ok) {
+    throw std::runtime_error("Failed to generate text, error code: " +
+                             std::to_string(static_cast<int>(error)));
+  }
+}
+
+void LLM::interrupt() {
+  if (!runner || !runner->is_loaded()) {
+    throw std::runtime_error("Can't interrupt a model that's not loaded!");
+  }
+  runner->stop();
+}
+
+std::size_t LLM::getMemoryLowerBound() const noexcept {
+  return memorySizeLowerBound;
+}
+
+void LLM::unload() noexcept { runner.reset(nullptr); }
+
+} // namespace rnexecutorch
\ No newline at end of file
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h
new file mode 100644
index 0000000000..f946409d95
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include <ReactCommon/CallInvoker.h>
+#include <jsi/jsi.h>
+#include <runner/runner.h>
+
+namespace rnexecutorch {
+using namespace facebook;
+
+class LLM {
+public:
+  explicit LLM(const std::string &modelSource,
+               const std::string &tokenizerSource,
+               std::shared_ptr<react::CallInvoker> callInvoker);
+
+  void generate(std::string input, std::shared_ptr<jsi::Function> callback);
+  void interrupt();
+  void unload() noexcept;
+  std::size_t getMemoryLowerBound() const noexcept;
+
+private:
+  size_t memorySizeLowerBound;
+  std::unique_ptr<example::Runner> runner;
+  std::shared_ptr<react::CallInvoker> callInvoker;
+};
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h b/packages/react-native-executorch/common/runner/irunner.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h
rename to packages/react-native-executorch/common/runner/irunner.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp b/packages/react-native-executorch/common/runner/runner.cpp
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp
rename to packages/react-native-executorch/common/runner/runner.cpp
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h b/packages/react-native-executorch/common/runner/runner.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h
rename to packages/react-native-executorch/common/runner/runner.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp b/packages/react-native-executorch/common/runner/sampler.cpp
similarity index 97%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp
rename to packages/react-native-executorch/common/runner/sampler.cpp
index 7ba8152889..e156b9f70e 100644
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp
+++ b/packages/react-native-executorch/common/runner/sampler.cpp
@@ -184,9 +184,10 @@ template <typename T> int32_t Sampler::sample(T *logits) {
 }
 
 template int32_t Sampler::sample<float>(float *logits);
-template int32_t Sampler::sample<exec_aten::Half>(exec_aten::Half *logits);
 template int32_t
-Sampler::sample<exec_aten::BFloat16>(exec_aten::BFloat16 *logits);
+Sampler::sample<executorch::aten::Half>(executorch::aten::Half *logits);
+template int32_t
+Sampler::sample<executorch::aten::BFloat16>(executorch::aten::BFloat16 *logits);
 
 } // namespace llm
 } // namespace extension
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h b/packages/react-native-executorch/common/runner/sampler.h
similarity index 91%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h
rename to packages/react-native-executorch/common/runner/sampler.h
index 0b29ca9fcb..03d3d09a01 100644
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h
+++ b/packages/react-native-executorch/common/runner/sampler.h
@@ -19,18 +19,19 @@
 #endif
 
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/platform/compiler.h>
 
 namespace executorch {
 namespace extension {
 namespace llm {
 // A simple llama2 sampler.
 
-template <typename T> struct ProbIndex {
+template <typename T> struct ET_EXPERIMENTAL ProbIndex {
   T prob;
   int32_t index;
 }; // struct used when sorting probabilities during top-p sampling
 
-class Sampler {
+class ET_EXPERIMENTAL Sampler {
 public:
   Sampler(int32_t vocab_size, float temperature, float topp,
           unsigned long long rng_seed);
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h b/packages/react-native-executorch/common/runner/stats.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h
rename to packages/react-native-executorch/common/runner/stats.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp b/packages/react-native-executorch/common/runner/text_decoder_runner.cpp
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp
rename to packages/react-native-executorch/common/runner/text_decoder_runner.cpp
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h b/packages/react-native-executorch/common/runner/text_decoder_runner.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h
rename to packages/react-native-executorch/common/runner/text_decoder_runner.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp b/packages/react-native-executorch/common/runner/text_prefiller.cpp
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp
rename to packages/react-native-executorch/common/runner/text_prefiller.cpp
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h b/packages/react-native-executorch/common/runner/text_prefiller.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h
rename to packages/react-native-executorch/common/runner/text_prefiller.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h b/packages/react-native-executorch/common/runner/text_token_generator.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h
rename to packages/react-native-executorch/common/runner/text_token_generator.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h b/packages/react-native-executorch/common/runner/util.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h
rename to packages/react-native-executorch/common/runner/util.h
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/Info.plist b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/Info.plist
index aaba93b395..6a6c556899 100644
--- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/Info.plist
+++ b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/Info.plist
@@ -8,7 +8,7 @@
 			<key>BinaryPath</key>
 			<string>ExecutorchLib.framework/ExecutorchLib</string>
 			<key>LibraryIdentifier</key>
-			<string>ios-arm64-simulator</string>
+			<string>ios-arm64</string>
 			<key>LibraryPath</key>
 			<string>ExecutorchLib.framework</string>
 			<key>SupportedArchitectures</key>
@@ -17,14 +17,12 @@
 			</array>
 			<key>SupportedPlatform</key>
 			<string>ios</string>
-			<key>SupportedPlatformVariant</key>
-			<string>simulator</string>
 		</dict>
 		<dict>
 			<key>BinaryPath</key>
 			<string>ExecutorchLib.framework/ExecutorchLib</string>
 			<key>LibraryIdentifier</key>
-			<string>ios-arm64</string>
+			<string>ios-arm64-simulator</string>
 			<key>LibraryPath</key>
 			<string>ExecutorchLib.framework</string>
 			<key>SupportedArchitectures</key>
@@ -33,6 +31,8 @@
 			</array>
 			<key>SupportedPlatform</key>
 			<string>ios</string>
+			<key>SupportedPlatformVariant</key>
+			<string>simulator</string>
 		</dict>
 	</array>
 	<key>CFBundlePackageType</key>
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib
index 999fc322ef..3acc9408d9 100755
Binary files a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib and b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib differ
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h
deleted file mode 100644
index 4332cf811d..0000000000
--- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#import <Foundation/Foundation.h>
-
-@interface HuggingFaceTokenizer : NSObject
-
-- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath;
-- (NSArray<NSNumber *> *)encode:(NSString *)text;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
-    skipSpecialTokens:(BOOL)skipSpecialTokens;
-- (NSUInteger)getVocabSize;
-- (NSString *)idToToken:(NSInteger)tokenId;
-- (NSInteger)tokenToId:(NSString *)token;
-
-@end
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h
deleted file mode 100644
index d8638cfa6a..0000000000
--- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#import <UIKit/UIKit.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain;
-
-NS_SWIFT_NAME(Runner)
-@interface LLaMARunner : NSObject
-
-- (instancetype)initWithModelPath:(NSString *)filePath
-                    tokenizerPath:(NSString *)tokenizerPath;
-- (BOOL)isLoaded;
-- (BOOL)loadWithError:(NSError **)error;
-- (BOOL)generate:(NSString *)prompt
-    withTokenCallback:(nullable void (^)(NSString *))callback
-                error:(NSError **)error;
-- (void)stop;
-
-+ (instancetype)new NS_UNAVAILABLE;
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist
index c2ea3f7cff..9bd01dc468 100644
Binary files a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist and b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist differ
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib
index 4a824ff959..43df0d606e 100755
Binary files a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib and b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib differ
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h
deleted file mode 100644
index 4332cf811d..0000000000
--- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#import <Foundation/Foundation.h>
-
-@interface HuggingFaceTokenizer : NSObject
-
-- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath;
-- (NSArray<NSNumber *> *)encode:(NSString *)text;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
-    skipSpecialTokens:(BOOL)skipSpecialTokens;
-- (NSUInteger)getVocabSize;
-- (NSString *)idToToken:(NSInteger)tokenId;
-- (NSInteger)tokenToId:(NSString *)token;
-
-@end
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h
deleted file mode 100644
index d8638cfa6a..0000000000
--- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#import <UIKit/UIKit.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain;
-
-NS_SWIFT_NAME(Runner)
-@interface LLaMARunner : NSObject
-
-- (instancetype)initWithModelPath:(NSString *)filePath
-                    tokenizerPath:(NSString *)tokenizerPath;
-- (BOOL)isLoaded;
-- (BOOL)loadWithError:(NSError **)error;
-- (BOOL)generate:(NSString *)prompt
-    withTokenCallback:(nullable void (^)(NSString *))callback
-                error:(NSError **)error;
-- (void)stop;
-
-+ (instancetype)new NS_UNAVAILABLE;
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist
index 9cb7cdb653..a1af60af6d 100644
Binary files a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist and b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist differ
diff --git a/packages/react-native-executorch/ios/RnExecutorch/LLM.h b/packages/react-native-executorch/ios/RnExecutorch/LLM.h
deleted file mode 100644
index 5047919a48..0000000000
--- a/packages/react-native-executorch/ios/RnExecutorch/LLM.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#import <RnExecutorchSpec/RnExecutorchSpec.h>
-
-@interface LLM : NativeLLMSpecBase <NativeLLMSpec>
-
-@end
diff --git a/packages/react-native-executorch/ios/RnExecutorch/LLM.mm b/packages/react-native-executorch/ios/RnExecutorch/LLM.mm
deleted file mode 100644
index 33971f755c..0000000000
--- a/packages/react-native-executorch/ios/RnExecutorch/LLM.mm
+++ /dev/null
@@ -1,78 +0,0 @@
-#import "LLM.h"
-#import <ExecutorchLib/LLaMARunner.h>
-
-@implementation LLM {
-  LLaMARunner *runner;
-}
-
-- (instancetype)init {
-  self = [super init];
-
-  return self;
-}
-
-RCT_EXPORT_MODULE()
-
-- (void)onResult:(NSString *)token prompt:(NSString *)prompt {
-  if ([token isEqualToString:prompt]) {
-    return;
-  }
-
-  dispatch_async(dispatch_get_main_queue(), ^{
-    [self emitOnToken:token];
-  });
-}
-
-- (void)loadLLM:(NSString *)modelSource
-    tokenizerSource:(NSString *)tokenizerSource
-            resolve:(RCTPromiseResolveBlock)resolve
-             reject:(RCTPromiseRejectBlock)reject {
-  @try {
-    self->runner = [[LLaMARunner alloc] initWithModelPath:modelSource
-                                            tokenizerPath:tokenizerSource];
-
-    resolve(@"Model and tokenizer loaded successfully");
-    return;
-  } @catch (NSException *exception) {
-    [self releaseResources];
-    reject(@"Model or tokenizer loading failed", exception.reason, nil);
-    return;
-  }
-}
-
-- (void)forward:(NSString *)input
-        resolve:(RCTPromiseResolveBlock)resolve
-         reject:(RCTPromiseRejectBlock)reject {
-
-  dispatch_async(
-      dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-        NSError *error = nil;
-        [self->runner generate:input
-             withTokenCallback:^(NSString *token) {
-               [self onResult:token prompt:input];
-             }
-                         error:&error];
-
-        if (error) {
-          reject(@"error_in_generation", error.localizedDescription, nil);
-          return;
-        }
-        resolve(@"Inference completed successfully");
-        return;
-      });
-}
-
-- (void)interrupt {
-  [self->runner stop];
-}
-
-- (void)releaseResources {
-  self->runner = nil;
-}
-
-- (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
-    (const facebook::react::ObjCTurboModule::InitParams &)params {
-  return std::make_shared<facebook::react::NativeLLMSpecJSI>(params);
-}
-
-@end
diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_coreml-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_coreml-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml_ios.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_coreml_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_coreml_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_mps-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_mps-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps_ios.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_mps_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_mps_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack_ios.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libexecutorch-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libexecutorch-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libexecutorch-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libexecutorch-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libexecutorch_ios.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libexecutorch_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libexecutorch_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libexecutorch_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_custom-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_custom-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_custom_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_custom_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_optimized-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_optimized-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_optimized_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_optimized_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_portable-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_portable-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_portable_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_portable_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_quantized-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_quantized-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_quantized_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_quantized_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a
new file mode 100644
index 0000000000..69cc738f00
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a differ
diff --git a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/libtokenizers_c.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a
similarity index 74%
rename from packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/libtokenizers_c.a
rename to packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a
index 873866fbcc..86280b1c5c 100644
Binary files a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/libtokenizers_c.a and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a differ
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a
new file mode 100644
index 0000000000..6c99b48d6f
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a differ
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a
new file mode 100644
index 0000000000..f3aa9203d9
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a differ
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a
new file mode 100644
index 0000000000..a24e87cedf
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a differ
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a
new file mode 100644
index 0000000000..338db91c8b
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a differ
diff --git a/packages/react-native-executorch/react-native-executorch.podspec b/packages/react-native-executorch/react-native-executorch.podspec
index e6d2b2f7ea..90897f3817 100644
--- a/packages/react-native-executorch/react-native-executorch.podspec
+++ b/packages/react-native-executorch/react-native-executorch.podspec
@@ -13,7 +13,8 @@ Pod::Spec.new do |s|
   s.platforms    = { :ios => min_ios_version_supported }
   s.source       = { :git => "https://github.com/software-mansion/react-native-executorch.git", :tag => "#{s.version}" }
 
-  et_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs', __dir__)
+  et_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs/executorch', __dir__)
+  tokenizers_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs/tokenizers-cpp', __dir__)
 
   s.user_target_xcconfig = {
     "HEADER_SEARCH_PATHS" => "$(PODS_TARGET_SRCROOT)/third-party/include",
@@ -31,7 +32,10 @@ Pod::Spec.new do |s|
       "-force_load \"#{et_binaries_path}\"/libexecutorch_ios.a", 
       "-force_load \"#{et_binaries_path}\"/libkernels_custom_ios.a", 
       "-force_load \"#{et_binaries_path}\"/libkernels_optimized_ios.a", 
-      "-force_load \"#{et_binaries_path}\"/libkernels_quantized_ios.a"
+      "-force_load \"#{et_binaries_path}\"/libkernels_quantized_ios.a",
+      "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_cpp.a\"",
+      "\"#{tokenizers_binaries_path}/physical-arm64-release/libsentencepiece.a\"",
+      "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_c.a\""
     ].join(' '),
       
     "OTHER_LDFLAGS[sdk=iphonesimulator*][arch=*]" => [
@@ -47,7 +51,10 @@ Pod::Spec.new do |s|
       "-force_load \"#{et_binaries_path}\"/libexecutorch_simulator.a", 
       "-force_load \"#{et_binaries_path}\"/libkernels_custom_simulator.a", 
       "-force_load \"#{et_binaries_path}\"/libkernels_optimized_simulator.a", 
-      "-force_load \"#{et_binaries_path}\"/libkernels_quantized_simulator.a"
+      "-force_load \"#{et_binaries_path}\"/libkernels_quantized_simulator.a",
+      "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_cpp.a\"",
+      "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libsentencepiece.a\"",
+      "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_c.a\""
     ].join(' '),
 
     'EXCLUDED_ARCHS[sdk=iphonesimulator*]' => 'x86_64',
diff --git a/packages/react-native-executorch/src/controllers/LLMController.ts b/packages/react-native-executorch/src/controllers/LLMController.ts
index 622081ad31..de32b33158 100644
--- a/packages/react-native-executorch/src/controllers/LLMController.ts
+++ b/packages/react-native-executorch/src/controllers/LLMController.ts
@@ -1,4 +1,3 @@
-import { EventSubscription } from 'react-native';
 import { ResourceSource } from '../types/common';
 import { ResourceFetcher } from '../utils/ResourceFetcher';
 import { ETError, getError } from '../Error';
@@ -12,15 +11,14 @@ import {
   SPECIAL_TOKENS,
   ToolsConfig,
 } from '../types/llm';
-import { LLMNativeModule } from '../native/RnExecutorchModules';
 import { parseToolCall } from '../utils/llm';
 
 export class LLMController {
-  private nativeModule: typeof LLMNativeModule;
+  private nativeModule: any;
   private chatConfig: ChatConfig = DEFAULT_CHAT_CONFIG;
   private toolsConfig: ToolsConfig | undefined;
   private tokenizerConfig: any;
-  private onToken: EventSubscription | null = null;
+  private onToken?: (token: string) => void;
   private _response = '';
   private _isReady = false;
   private _isGenerating = false;
@@ -77,8 +75,6 @@ export class LLMController {
     };
 
     this.onDownloadProgressCallback = onDownloadProgressCallback;
-
-    this.nativeModule = LLMNativeModule;
   }
 
   public get response() {
@@ -123,9 +119,10 @@ export class LLMController {
         this.onDownloadProgressCallback
       );
 
-      await this.nativeModule.loadLLM(modelFileUri, tokenizerFileUri);
+      // Create an LLM host object on load call
+      this.nativeModule = global.loadLLM(modelFileUri, tokenizerFileUri);
       this.isReadyCallback(true);
-      this.onToken = this.nativeModule.onToken((data: string) => {
+      this.onToken = (data: string) => {
         if (
           !data ||
           (SPECIAL_TOKENS.EOS_TOKEN in this.tokenizerConfig &&
@@ -138,7 +135,7 @@ export class LLMController {
 
         this.tokenCallback(data);
         this.responseCallback(this._response + data);
-      });
+      };
     } catch (e) {
       this.isReadyCallback(false);
       throw new Error(getError(e));
@@ -172,9 +169,8 @@ export class LLMController {
           'You cannot delete the model now. You need to interrupt first.'
       );
     }
-    this.onToken?.remove();
-    this.onToken = null;
-    this.nativeModule.releaseResources();
+    this.onToken = () => {};
+    this.nativeModule.unload();
     this.isReadyCallback(false);
     this.isGeneratingCallback(false);
   }
@@ -189,7 +185,7 @@ export class LLMController {
     try {
       this.responseCallback('');
       this.isGeneratingCallback(true);
-      await this.nativeModule.forward(input);
+      await this.nativeModule.generate(input, this.onToken);
     } catch (e) {
       throw new Error(getError(e));
     } finally {
diff --git a/packages/react-native-executorch/src/index.tsx b/packages/react-native-executorch/src/index.tsx
index 4386593e13..dec7bcda24 100644
--- a/packages/react-native-executorch/src/index.tsx
+++ b/packages/react-native-executorch/src/index.tsx
@@ -12,6 +12,7 @@ declare global {
   var loadTokenizerModule: (source: string) => any;
   var loadImageEmbeddings: (source: string) => any;
   var loadTextEmbeddings: (modelSource: string, tokenizerSource: string) => any;
+  var loadLLM: (modelSource: string, tokenizerSource: string) => any;
 }
 // eslint-disable no-var
 if (
@@ -22,7 +23,8 @@ if (
   global.loadObjectDetection == null ||
   global.loadTokenizerModule == null ||
   global.loadTextEmbeddings == null ||
-  global.loadImageEmbeddings == null
+  global.loadImageEmbeddings == null ||
+  global.loadLLM == null
 ) {
   if (!ETInstallerNativeModule) {
     throw new Error(
diff --git a/packages/react-native-executorch/src/native/NativeLLM.ts b/packages/react-native-executorch/src/native/NativeLLM.ts
deleted file mode 100644
index e89ba01f53..0000000000
--- a/packages/react-native-executorch/src/native/NativeLLM.ts
+++ /dev/null
@@ -1,14 +0,0 @@
-import type { TurboModule } from 'react-native';
-import { TurboModuleRegistry } from 'react-native';
-import type { EventEmitter } from 'react-native/Libraries/Types/CodegenTypes';
-
-export interface Spec extends TurboModule {
-  loadLLM(modelSource: string, tokenizerSource: string): Promise<string>;
-  forward(input: string): Promise<string>;
-  interrupt(): void;
-  releaseResources(): void;
-
-  readonly onToken: EventEmitter<string>;
-}
-
-export default TurboModuleRegistry.get<Spec>('LLM');
diff --git a/packages/react-native-executorch/src/native/RnExecutorchModules.ts b/packages/react-native-executorch/src/native/RnExecutorchModules.ts
index fa07d33793..7b702eaa7c 100644
--- a/packages/react-native-executorch/src/native/RnExecutorchModules.ts
+++ b/packages/react-native-executorch/src/native/RnExecutorchModules.ts
@@ -2,7 +2,6 @@ import { Platform } from 'react-native';
 import { Spec as OCRInterface } from './NativeOCR';
 import { Spec as VerticalOCRInterface } from './NativeVerticalOCR';
 import { Spec as SpeechToTextInterface } from './NativeSpeechToText';
-import { Spec as LLMInterface } from './NativeLLM';
 import { Spec as ETInstallerInterface } from './NativeETInstaller';
 
 const LINKING_ERROR =
@@ -24,9 +23,6 @@ function returnSpecOrThrowLinkingError(spec: any) {
       );
 }
 
-const LLMNativeModule: LLMInterface = returnSpecOrThrowLinkingError(
-  require('./NativeLLM').default
-);
 const SpeechToTextNativeModule: SpeechToTextInterface =
   returnSpecOrThrowLinkingError(require('./NativeSpeechToText').default);
 const OCRNativeModule: OCRInterface = returnSpecOrThrowLinkingError(
@@ -38,7 +34,6 @@ const ETInstallerNativeModule: ETInstallerInterface =
   returnSpecOrThrowLinkingError(require('./NativeETInstaller').default);
 
 export {
-  LLMNativeModule,
   SpeechToTextNativeModule,
   OCRNativeModule,
   VerticalOCRNativeModule,
diff --git a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/libtokenizers_cpp.a b/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/libtokenizers_cpp.a
deleted file mode 100644
index eb820ccd48..0000000000
Binary files a/packages/react-native-executorch/third-party/android/libs/tokenizers-cpp/libtokenizers_cpp.a and /dev/null differ
diff --git a/packages/react-native-executorch/third-party/include/tokenizers-cpp/tokenizers_c.h b/packages/react-native-executorch/third-party/include/tokenizers-cpp/tokenizers_c.h
deleted file mode 100644
index 42a59e94e5..0000000000
--- a/packages/react-native-executorch/third-party/include/tokenizers-cpp/tokenizers_c.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*!
- *  Copyright (c) 2023 by Contributors
- * \file tokenizers_c.h
- * \brief C binding to tokenizers rust library
- */
-#ifndef TOKENIZERS_C_H_
-#define TOKENIZERS_C_H_
-
-// The C API
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stddef.h>
-#include <stdint.h>
-
-typedef void *TokenizerHandle;
-
-typedef struct {
-  int *token_ids;
-  size_t len;
-} TokenizerEncodeResult;
-
-TokenizerHandle tokenizers_new_from_str(const char *json, size_t len);
-
-TokenizerHandle byte_level_bpe_tokenizers_new_from_str(
-    const char *vocab, size_t vocab_len, const char *merges, size_t merges_len,
-    const char *added_tokens, size_t added_tokens_len);
-
-void tokenizers_encode(TokenizerHandle handle, const char *data, size_t len,
-                       int add_special_token, TokenizerEncodeResult *result);
-
-void tokenizers_encode_batch(TokenizerHandle handle, const char **data,
-                             size_t *len, size_t num_seqs,
-                             int add_special_token,
-                             TokenizerEncodeResult *results);
-
-void tokenizers_free_encode_results(TokenizerEncodeResult *results,
-                                    size_t num_seqs);
-
-void tokenizers_decode(TokenizerHandle handle, const uint32_t *data, size_t len,
-                       int skip_special_token);
-
-void tokenizers_get_decode_str(TokenizerHandle handle, const char **data,
-                               size_t *len);
-
-void tokenizers_get_vocab_size(TokenizerHandle handle, size_t *size);
-
-void tokenizers_id_to_token(TokenizerHandle handle, uint32_t id,
-                            const char **data, size_t *len);
-
-// tokenizers_token_to_id stores -1 to *id if the token is not in the vocab
-void tokenizers_token_to_id(TokenizerHandle handle, const char *token,
-                            size_t len, int32_t *id);
-
-void tokenizers_free(TokenizerHandle handle);
-
-#ifdef __cplusplus
-}
-#endif
-#endif // TOKENIZERS_C_H_
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj
index acb87c8ffb..eedabc07e0 100644
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj
+++ b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj
@@ -21,37 +21,10 @@
 		5576B4B72CEF9709005027B7 /* ETModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 5576B4B62CEF9705005027B7 /* ETModel.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		5576B4B92CEF970E005027B7 /* ETModel.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5576B4B82CEF970C005027B7 /* ETModel.mm */; };
 		558699BB2D8AD562004180E5 /* re2.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5576B49E2CEF76CC005027B7 /* re2.xcframework */; };
-		55A815F22D9D40680075A106 /* irunner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55A815F12D9D40680075A106 /* irunner.h */; };
 		55DEEA382D05ABBB004422A3 /* InputType.h in Headers */ = {isa = PBXBuildFile; fileRef = 55DEEA372D05ABB4004422A3 /* InputType.h */; };
-		55DEEBF02D8C45960033DBBA /* HuggingFaceTokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 55DEEBEF2D8C458F0033DBBA /* HuggingFaceTokenizer.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		55DEEBF22D8C459A0033DBBA /* HuggingFaceTokenizer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 55DEEBF12D8C45990033DBBA /* HuggingFaceTokenizer.mm */; };
-		55EA2C3F2CB90C7A004315B3 /* runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C262CB90C7A004315B3 /* runner.cpp */; };
-		55EA2C402CB90C7A004315B3 /* runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C272CB90C7A004315B3 /* runner.h */; };
-		55EA2C412CB90C7A004315B3 /* stats.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C282CB90C7A004315B3 /* stats.h */; };
-		55EA2C422CB90C7A004315B3 /* text_decoder_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C292CB90C7A004315B3 /* text_decoder_runner.cpp */; };
-		55EA2C432CB90C7A004315B3 /* text_decoder_runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2A2CB90C7A004315B3 /* text_decoder_runner.h */; };
-		55EA2C442CB90C7A004315B3 /* text_prefiller.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C2B2CB90C7A004315B3 /* text_prefiller.cpp */; };
-		55EA2C452CB90C7A004315B3 /* text_prefiller.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2C2CB90C7A004315B3 /* text_prefiller.h */; };
-		55EA2C462CB90C7A004315B3 /* text_token_generator.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2D2CB90C7A004315B3 /* text_token_generator.h */; };
-		55EA2C472CB90C7A004315B3 /* util.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2E2CB90C7A004315B3 /* util.h */; };
-		55EA2C482CB90C7A004315B3 /* sampler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C302CB90C7A004315B3 /* sampler.cpp */; };
-		55EA2C492CB90C7A004315B3 /* sampler.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C312CB90C7A004315B3 /* sampler.h */; };
-		55EA2C4B2CB90C7A004315B3 /* LLaMARunner.mm in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C342CB90C7A004315B3 /* LLaMARunner.mm */; };
-		55EA2C4C2CB90C7A004315B3 /* base64.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C362CB90C7A004315B3 /* base64.h */; };
-		55EA2C4D2CB90C7A004315B3 /* bpe_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C372CB90C7A004315B3 /* bpe_tokenizer.cpp */; };
-		55EA2C4E2CB90C7A004315B3 /* bpe_tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C382CB90C7A004315B3 /* bpe_tokenizer.h */; };
-		55EA2C4F2CB90C7A004315B3 /* llama_tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C392CB90C7A004315B3 /* llama_tiktoken.cpp */; };
-		55EA2C502CB90C7A004315B3 /* llama_tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C3A2CB90C7A004315B3 /* llama_tiktoken.h */; };
-		55EA2C512CB90C7A004315B3 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C3B2CB90C7A004315B3 /* tiktoken.cpp */; };
-		55EA2C522CB90C7A004315B3 /* tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C3C2CB90C7A004315B3 /* tiktoken.h */; };
-		55EA2C532CB90C7A004315B3 /* tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C3D2CB90C7A004315B3 /* tokenizer.h */; };
-		55EA2C542CB90E70004315B3 /* LLaMARunner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C332CB90C7A004315B3 /* LLaMARunner.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		55EA2C572CB90E7D004315B3 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 55EA2C562CB90E7D004315B3 /* Accelerate.framework */; };
 		55EA2C592CB90E80004315B3 /* CoreML.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 55EA2C582CB90E80004315B3 /* CoreML.framework */; };
 		55EA2C5B2CB90E85004315B3 /* libsqlite3.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 55EA2C5A2CB90E85004315B3 /* libsqlite3.tbd */; };
-		8CDA5FC02D68AB25005C1B75 /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8CDA5FBE2D68AB0F005C1B75 /* Metal.framework */; };
-		8CDA5FC32D68AB25005C1B75 /* MetalPerformanceShaders.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8CDA5FC12D68AB25005C1B75 /* MetalPerformanceShaders.framework */; };
-		8CDA5FC42D68AB25005C1B75 /* MetalPerformanceShadersGraph.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8CDA5FC22D68AB25005C1B75 /* MetalPerformanceShadersGraph.framework */; };
 		A851C4072CF9F1B600424E93 /* Utils.hpp in Headers */ = {isa = PBXBuildFile; fileRef = A851C4042CF9F1B600424E93 /* Utils.hpp */; };
 /* End PBXBuildFile section */
 
@@ -70,39 +43,11 @@
 		5576B49E2CEF76CC005027B7 /* re2.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = re2.xcframework; path = frameworks/re2.xcframework; sourceTree = "<group>"; };
 		5576B4B62CEF9705005027B7 /* ETModel.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ETModel.h; sourceTree = "<group>"; };
 		5576B4B82CEF970C005027B7 /* ETModel.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ETModel.mm; sourceTree = "<group>"; };
-		55A815F12D9D40680075A106 /* irunner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = irunner.h; sourceTree = "<group>"; };
 		55DEEA372D05ABB4004422A3 /* InputType.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = InputType.h; sourceTree = "<group>"; };
-		55DEEBEF2D8C458F0033DBBA /* HuggingFaceTokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = HuggingFaceTokenizer.h; sourceTree = "<group>"; };
-		55DEEBF12D8C45990033DBBA /* HuggingFaceTokenizer.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = HuggingFaceTokenizer.mm; sourceTree = "<group>"; };
 		55EA2C1C2CB90C22004315B3 /* ExecutorchLib.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = ExecutorchLib.framework; sourceTree = BUILT_PRODUCTS_DIR; };
-		55EA2C262CB90C7A004315B3 /* runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = runner.cpp; sourceTree = "<group>"; };
-		55EA2C272CB90C7A004315B3 /* runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = runner.h; sourceTree = "<group>"; };
-		55EA2C282CB90C7A004315B3 /* stats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stats.h; sourceTree = "<group>"; };
-		55EA2C292CB90C7A004315B3 /* text_decoder_runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_decoder_runner.cpp; sourceTree = "<group>"; };
-		55EA2C2A2CB90C7A004315B3 /* text_decoder_runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_decoder_runner.h; sourceTree = "<group>"; };
-		55EA2C2B2CB90C7A004315B3 /* text_prefiller.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_prefiller.cpp; sourceTree = "<group>"; };
-		55EA2C2C2CB90C7A004315B3 /* text_prefiller.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_prefiller.h; sourceTree = "<group>"; };
-		55EA2C2D2CB90C7A004315B3 /* text_token_generator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_token_generator.h; sourceTree = "<group>"; };
-		55EA2C2E2CB90C7A004315B3 /* util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = util.h; sourceTree = "<group>"; };
-		55EA2C302CB90C7A004315B3 /* sampler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sampler.cpp; sourceTree = "<group>"; };
-		55EA2C312CB90C7A004315B3 /* sampler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sampler.h; sourceTree = "<group>"; };
-		55EA2C332CB90C7A004315B3 /* LLaMARunner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LLaMARunner.h; sourceTree = "<group>"; };
-		55EA2C342CB90C7A004315B3 /* LLaMARunner.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = LLaMARunner.mm; sourceTree = "<group>"; };
-		55EA2C362CB90C7A004315B3 /* base64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = base64.h; sourceTree = "<group>"; };
-		55EA2C372CB90C7A004315B3 /* bpe_tokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bpe_tokenizer.cpp; sourceTree = "<group>"; };
-		55EA2C382CB90C7A004315B3 /* bpe_tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bpe_tokenizer.h; sourceTree = "<group>"; };
-		55EA2C392CB90C7A004315B3 /* llama_tiktoken.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = llama_tiktoken.cpp; sourceTree = "<group>"; };
-		55EA2C3A2CB90C7A004315B3 /* llama_tiktoken.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = llama_tiktoken.h; sourceTree = "<group>"; };
-		55EA2C3B2CB90C7A004315B3 /* tiktoken.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tiktoken.cpp; sourceTree = "<group>"; };
-		55EA2C3C2CB90C7A004315B3 /* tiktoken.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tiktoken.h; sourceTree = "<group>"; };
-		55EA2C3D2CB90C7A004315B3 /* tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tokenizer.h; sourceTree = "<group>"; };
 		55EA2C562CB90E7D004315B3 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
 		55EA2C582CB90E80004315B3 /* CoreML.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreML.framework; path = System/Library/Frameworks/CoreML.framework; sourceTree = SDKROOT; };
 		55EA2C5A2CB90E85004315B3 /* libsqlite3.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libsqlite3.tbd; path = usr/lib/libsqlite3.tbd; sourceTree = SDKROOT; };
-		8CDA5FBE2D68AB0F005C1B75 /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
-		8CDA5FC12D68AB25005C1B75 /* MetalPerformanceShaders.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalPerformanceShaders.framework; path = System/Library/Frameworks/MetalPerformanceShaders.framework; sourceTree = SDKROOT; };
-		8CDA5FC22D68AB25005C1B75 /* MetalPerformanceShadersGraph.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalPerformanceShadersGraph.framework; path = System/Library/Frameworks/MetalPerformanceShadersGraph.framework; sourceTree = SDKROOT; };
-		A84198832D02DF29006D4D5E /* InputType.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = InputType.h; sourceTree = "<group>"; };
 		A851C4042CF9F1B600424E93 /* Utils.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Utils.hpp; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
@@ -154,68 +99,20 @@
 			children = (
 				55DEEA372D05ABB4004422A3 /* InputType.h */,
 				55EA2C352CB90C7A004315B3 /* Exported */,
-				55EA2C2F2CB90C7A004315B3 /* runner */,
-				55EA2C322CB90C7A004315B3 /* sampler */,
-				55EA2C3E2CB90C7A004315B3 /* tokenizer */,
 				A851C4042CF9F1B600424E93 /* Utils.hpp */,
 			);
 			path = ExecutorchLib;
 			sourceTree = "<group>";
 		};
-		55EA2C2F2CB90C7A004315B3 /* runner */ = {
-			isa = PBXGroup;
-			children = (
-				55A815F12D9D40680075A106 /* irunner.h */,
-				55EA2C262CB90C7A004315B3 /* runner.cpp */,
-				55EA2C272CB90C7A004315B3 /* runner.h */,
-				55EA2C282CB90C7A004315B3 /* stats.h */,
-				55EA2C292CB90C7A004315B3 /* text_decoder_runner.cpp */,
-				55EA2C2A2CB90C7A004315B3 /* text_decoder_runner.h */,
-				55EA2C2B2CB90C7A004315B3 /* text_prefiller.cpp */,
-				55EA2C2C2CB90C7A004315B3 /* text_prefiller.h */,
-				55EA2C2D2CB90C7A004315B3 /* text_token_generator.h */,
-				55EA2C2E2CB90C7A004315B3 /* util.h */,
-			);
-			path = runner;
-			sourceTree = "<group>";
-		};
-		55EA2C322CB90C7A004315B3 /* sampler */ = {
-			isa = PBXGroup;
-			children = (
-				55EA2C302CB90C7A004315B3 /* sampler.cpp */,
-				55EA2C312CB90C7A004315B3 /* sampler.h */,
-			);
-			path = sampler;
-			sourceTree = "<group>";
-		};
 		55EA2C352CB90C7A004315B3 /* Exported */ = {
 			isa = PBXGroup;
 			children = (
-				55DEEBF12D8C45990033DBBA /* HuggingFaceTokenizer.mm */,
-				55DEEBEF2D8C458F0033DBBA /* HuggingFaceTokenizer.h */,
 				5576B4B82CEF970C005027B7 /* ETModel.mm */,
 				5576B4B62CEF9705005027B7 /* ETModel.h */,
-				55EA2C332CB90C7A004315B3 /* LLaMARunner.h */,
-				55EA2C342CB90C7A004315B3 /* LLaMARunner.mm */,
 			);
 			path = Exported;
 			sourceTree = "<group>";
 		};
-		55EA2C3E2CB90C7A004315B3 /* tokenizer */ = {
-			isa = PBXGroup;
-			children = (
-				55EA2C362CB90C7A004315B3 /* base64.h */,
-				55EA2C372CB90C7A004315B3 /* bpe_tokenizer.cpp */,
-				55EA2C382CB90C7A004315B3 /* bpe_tokenizer.h */,
-				55EA2C392CB90C7A004315B3 /* llama_tiktoken.cpp */,
-				55EA2C3A2CB90C7A004315B3 /* llama_tiktoken.h */,
-				55EA2C3B2CB90C7A004315B3 /* tiktoken.cpp */,
-				55EA2C3C2CB90C7A004315B3 /* tiktoken.h */,
-				55EA2C3D2CB90C7A004315B3 /* tokenizer.h */,
-			);
-			path = tokenizer;
-			sourceTree = "<group>";
-		};
 		55EA2C552CB90E7D004315B3 /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
@@ -245,24 +142,9 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				55EA2C542CB90E70004315B3 /* LLaMARunner.h in Headers */,
 				5576B4B72CEF9709005027B7 /* ETModel.h in Headers */,
-				55DEEBF02D8C45960033DBBA /* HuggingFaceTokenizer.h in Headers */,
-				55EA2C532CB90C7A004315B3 /* tokenizer.h in Headers */,
 				55DEEA382D05ABBB004422A3 /* InputType.h in Headers */,
-				55EA2C412CB90C7A004315B3 /* stats.h in Headers */,
-				55EA2C4E2CB90C7A004315B3 /* bpe_tokenizer.h in Headers */,
-				55EA2C402CB90C7A004315B3 /* runner.h in Headers */,
-				55A815F22D9D40680075A106 /* irunner.h in Headers */,
-				55EA2C432CB90C7A004315B3 /* text_decoder_runner.h in Headers */,
-				55EA2C492CB90C7A004315B3 /* sampler.h in Headers */,
-				55EA2C4C2CB90C7A004315B3 /* base64.h in Headers */,
-				55EA2C452CB90C7A004315B3 /* text_prefiller.h in Headers */,
-				55EA2C522CB90C7A004315B3 /* tiktoken.h in Headers */,
-				55EA2C502CB90C7A004315B3 /* llama_tiktoken.h in Headers */,
-				55EA2C472CB90C7A004315B3 /* util.h in Headers */,
 				A851C4072CF9F1B600424E93 /* Utils.hpp in Headers */,
-				55EA2C462CB90C7A004315B3 /* text_token_generator.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -273,7 +155,6 @@
 			isa = PBXNativeTarget;
 			buildConfigurationList = 55EA2C232CB90C22004315B3 /* Build configuration list for PBXNativeTarget "ExecutorchLib" */;
 			buildPhases = (
-				558699CC2D8B1D30004180E5 /* Build tokenizers cpp */,
 				55EA2C172CB90C22004315B3 /* Headers */,
 				55EA2C182CB90C22004315B3 /* Sources */,
 				55EA2C192CB90C22004315B3 /* Frameworks */,
@@ -334,43 +215,12 @@
 		};
 /* End PBXResourcesBuildPhase section */
 
-/* Begin PBXShellScriptBuildPhase section */
-		558699CC2D8B1D30004180E5 /* Build tokenizers cpp */ = {
-			isa = PBXShellScriptBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			inputFileListPaths = (
-			);
-			inputPaths = (
-			);
-			name = "Build tokenizers cpp";
-			outputFileListPaths = (
-			);
-			outputPaths = (
-				"$(DERIVED_FILE_DIR)/newOutputFile",
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-			shellPath = /bin/sh;
-			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run `sudo /Applications/CMake.app/Contents/bin/cmake-gui --install` to install CMake commandline tools.\"\n    exit 1\nfi\n\n# Type a script or drag a script file from your workspace to insert its path.\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == \"iphoneos\" ]]; then\n  TOKENIZERS_TARGET=\"aarch64-apple-ios\"\nelif [[ \"$PLATFORM_NAME\" == \"iphonesimulator\" ]]; then\n  TOKENIZERS_TARGET=\"x86_64-apple-ios\"\nfi\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"10.15\"\nfi\n\n\ncmake_build_tokenizers() {\n    export PATH=\"$PATH:$HOME/.cargo/bin\"\n    export PATH=\"/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib:$PATH\"\n    export LIBRARY_PATH=\"$LIBRARY_PATH:/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib\"\n    local src_dir=$1\n    local target=$2\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n    \n    if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n        extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n        extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n    fi\n    \n    cmake \"$src_dir\" -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../ios/ios.toolchain.cmake\" \\\n         -DPLATFORM=\"$PLATFORM\" \\\n         -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n         -DENABLE_BITCODE=FALSE \\\n         -DENABLE_ARC=TRUE \\\n         -DENABLE_VISIBILITY=TRUE \\\n         -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n         -DMLC_ENABLE_SENTENCEPIECE_TOKENIZER=ON\n          \n    cmake --build . --config \"Release\" --target \"install\"\n}\n\ncmake_build_tokenizers \"$SRCROOT/../../tokenizers-cpp\" \"install\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n\n";
-		};
-/* End PBXShellScriptBuildPhase section */
-
 /* Begin PBXSourcesBuildPhase section */
 		55EA2C182CB90C22004315B3 /* Sources */ = {
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				55EA2C482CB90C7A004315B3 /* sampler.cpp in Sources */,
-				55EA2C3F2CB90C7A004315B3 /* runner.cpp in Sources */,
-				55EA2C422CB90C7A004315B3 /* text_decoder_runner.cpp in Sources */,
-				55EA2C4D2CB90C7A004315B3 /* bpe_tokenizer.cpp in Sources */,
-				55EA2C4F2CB90C7A004315B3 /* llama_tiktoken.cpp in Sources */,
 				5576B4B92CEF970E005027B7 /* ETModel.mm in Sources */,
-				55EA2C442CB90C7A004315B3 /* text_prefiller.cpp in Sources */,
-				55EA2C512CB90C7A004315B3 /* tiktoken.cpp in Sources */,
-				55DEEBF22D8C459A0033DBBA /* HuggingFaceTokenizer.mm in Sources */,
-				55EA2C4B2CB90C7A004315B3 /* LLaMARunner.mm in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -549,7 +399,6 @@
 					"$(BUILT_PRODUCTS_DIR)/libbackend_mps_ios.a",
 					"-force_load",
 					"$(BUILT_PRODUCTS_DIR)/libexecutorch_ios.a",
-					"@$(TEMP_DIR)/cmake/linker_flags",
 				);
 				"OTHER_LDFLAGS[sdk=iphonesimulator*]" = (
 					"$(inherited)",
@@ -567,7 +416,6 @@
 					"$(BUILT_PRODUCTS_DIR)/libbackend_mps_simulator.a",
 					"-force_load",
 					"$(BUILT_PRODUCTS_DIR)/libexecutorch_simulator.a",
-					"@$(TEMP_DIR)/cmake/linker_flags",
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = com.swmansion.Executorch;
 				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
@@ -631,7 +479,6 @@
 					"$(BUILT_PRODUCTS_DIR)/libbackend_mps_ios.a",
 					"-force_load",
 					"$(BUILT_PRODUCTS_DIR)/libexecutorch_ios.a",
-					"@$(TEMP_DIR)/cmake/linker_flags",
 				);
 				"OTHER_LDFLAGS[sdk=iphonesimulator*]" = (
 					"$(inherited)",
@@ -649,7 +496,6 @@
 					"$(BUILT_PRODUCTS_DIR)/libbackend_mps_simulator.a",
 					"-force_load",
 					"$(BUILT_PRODUCTS_DIR)/libexecutorch_simulator.a",
-					"@$(TEMP_DIR)/cmake/linker_flags",
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = com.swmansion.Executorch;
 				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h
deleted file mode 100644
index 4332cf811d..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#import <Foundation/Foundation.h>
-
-@interface HuggingFaceTokenizer : NSObject
-
-- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath;
-- (NSArray<NSNumber *> *)encode:(NSString *)text;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
-    skipSpecialTokens:(BOOL)skipSpecialTokens;
-- (NSUInteger)getVocabSize;
-- (NSString *)idToToken:(NSInteger)tokenId;
-- (NSInteger)tokenToId:(NSString *)token;
-
-@end
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm
deleted file mode 100644
index 38d7cdfd09..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm
+++ /dev/null
@@ -1,80 +0,0 @@
-#import "HuggingFaceTokenizer.h"
-#include <fstream>
-#include <iostream>
-#include <stdexcept>
-#include <string>
-#include <tokenizers-cpp/tokenizers_cpp.h>
-
-std::string loadBytesFromFile(const std::string &path) {
-  std::ifstream fs(path, std::ios::in | std::ios::binary);
-  if (fs.fail()) {
-    throw std::runtime_error("Failed to open tokenizer file");
-  }
-  std::string data;
-  fs.seekg(0, std::ios::end);
-  size_t size = static_cast<size_t>(fs.tellg());
-  fs.seekg(0, std::ios::beg);
-  data.resize(size);
-  fs.read(data.data(), size);
-  return data;
-}
-
-@implementation HuggingFaceTokenizer {
-  std::unique_ptr<tokenizers::Tokenizer> _tokenizer;
-}
-
-- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath {
-  self = [super init];
-  if (self) {
-    auto blob = loadBytesFromFile([tokenizerPath UTF8String]);
-    _tokenizer = tokenizers::Tokenizer::FromBlobJSON(blob);
-  }
-  return self;
-}
-
-- (NSArray<NSNumber *> *)encode:(NSString *)text {
-  std::vector<int32_t> result = _tokenizer->Encode([text UTF8String]);
-  NSMutableArray<NSNumber *> *encodedResult =
-      [[NSMutableArray alloc] initWithCapacity:result.size()];
-  for (int32_t tokenId : result) {
-    [encodedResult addObject:@(tokenId)];
-  }
-
-  return encodedResult;
-}
-
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds {
-  return [self decode:tokenIds skipSpecialTokens:NO];
-}
-
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
-    skipSpecialTokens:(BOOL)skipSpecialTokens {
-  std::vector<int32_t> stdTokenIds;
-  stdTokenIds.reserve([tokenIds count]);
-  for (NSNumber *tokenId in tokenIds) {
-    stdTokenIds.push_back([tokenId intValue]);
-  }
-  std::string decodedString =
-      _tokenizer->Decode(stdTokenIds, skipSpecialTokens);
-  return [NSString stringWithUTF8String:decodedString.c_str()];
-}
-
-- (NSUInteger)getVocabSize {
-  return (NSUInteger)_tokenizer->GetVocabSize();
-}
-
-- (NSString *)idToToken:(NSInteger)tokenId {
-  std::string token = _tokenizer->IdToToken(static_cast<int32_t>(tokenId));
-  return [NSString stringWithUTF8String:token.c_str()];
-}
-
-- (NSInteger)tokenToId:(NSString *)token {
-  std::string stdToken = [token UTF8String];
-  return (NSInteger)_tokenizer->TokenToId(stdToken);
-}
-
-- (void)dealloc {
-  _tokenizer.reset();
-}
-
-@end
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h
deleted file mode 100644
index d8638cfa6a..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#import <UIKit/UIKit.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain;
-
-NS_SWIFT_NAME(Runner)
-@interface LLaMARunner : NSObject
-
-- (instancetype)initWithModelPath:(NSString *)filePath
-                    tokenizerPath:(NSString *)tokenizerPath;
-- (BOOL)isLoaded;
-- (BOOL)loadWithError:(NSError **)error;
-- (BOOL)generate:(NSString *)prompt
-    withTokenCallback:(nullable void (^)(NSString *))callback
-                error:(NSError **)error;
-- (void)stop;
-
-+ (instancetype)new NS_UNAVAILABLE;
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm
deleted file mode 100644
index b50bfb7b01..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#import "LLaMARunner.h"
-
-#import "runner.h"
-#import <ExecuTorch/ExecuTorchLog.h>
-
-using namespace ::torch::executor;
-
-NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
-
-@interface LLaMARunner () <ExecuTorchLogSink>
-@end
-
-@implementation LLaMARunner {
-  std::unique_ptr<example::Runner> _runner;
-}
-
-- (instancetype)initWithModelPath:(NSString *)modelPath
-                    tokenizerPath:(NSString *)tokenizerPath {
-  self = [super init];
-  if (self) {
-    [ExecuTorchLog.sharedLog addSink:self];
-    _runner = std::make_unique<example::Runner>(modelPath.UTF8String,
-                                                tokenizerPath.UTF8String);
-  }
-  return self;
-}
-
-- (void)dealloc {
-  [ExecuTorchLog.sharedLog removeSink:self];
-}
-
-- (BOOL)isLoaded {
-  return _runner->is_loaded();
-}
-
-- (BOOL)loadWithError:(NSError **)error {
-  const auto status = _runner->load();
-  if (status != Error::Ok) {
-    if (error) {
-      *error = [NSError errorWithDomain:LLaMARunnerErrorDomain
-                                   code:(NSInteger)status
-                               userInfo:nil];
-    }
-    return NO;
-  }
-  return YES;
-}
-
-- (BOOL)generate:(NSString *)prompt
-    withTokenCallback:(nullable void (^)(NSString *))callback
-                error:(NSError **)error {
-  const auto status = _runner->generate(
-      prompt.UTF8String,
-      [callback](const std::string &token) { callback(@(token.c_str())); });
-  if (status != Error::Ok) {
-    if (error) {
-      *error = [NSError errorWithDomain:LLaMARunnerErrorDomain
-                                   code:(NSInteger)status
-                               userInfo:nil];
-      return NO;
-    }
-  }
-  return YES;
-}
-
-- (void)stop {
-  _runner->stop();
-}
-
-#pragma mark - ExecuTorchLogSink
-
-- (void)logWithLevel:(ExecuTorchLogLevel)level
-           timestamp:(NSTimeInterval)timestamp
-            filename:(NSString *)filename
-                line:(NSUInteger)line
-             message:(NSString *)message {
-  NSUInteger totalSeconds = (NSUInteger)timestamp;
-  NSUInteger hours = (totalSeconds / 3600) % 24;
-  NSUInteger minutes = (totalSeconds / 60) % 60;
-  NSUInteger seconds = totalSeconds % 60;
-  NSUInteger microseconds = (timestamp - totalSeconds) * 1000000;
-  NSLog(@"%c %02lu:%02lu:%02lu.%06lu executorch:%s:%zu] %s", (char)level, hours,
-        minutes, seconds, microseconds, filename.UTF8String, line,
-        message.UTF8String);
-}
-
-@end
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h
deleted file mode 100644
index 722fe3900e..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-// @lint-ignore-every LICENSELINT
-/**************************************************************************
-   Copyright (c) 2023 sewenew
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
- *************************************************************************/
-
-#pragma once
-
-#include <cassert>
-#include <executorch/runtime/core/error.h>
-#include <executorch/runtime/core/result.h>
-#include <executorch/runtime/platform/assert.h>
-#include <string>
-#include <string_view>
-
-namespace executorch {
-namespace extension {
-namespace llm {
-using Error = executorch::runtime::Error;
-template <typename T> using Result = executorch::runtime::Result<T>;
-
-namespace base64 {
-
-Result<std::string> decode(const std::string_view &input);
-
-namespace detail {
-
-constexpr uint32_t DECODE_TABLE[] = {
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62,  255,
-    255, 255, 63,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  255, 255,
-    255, 255, 255, 255, 255, 0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
-    10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
-    25,  255, 255, 255, 255, 255, 255, 26,  27,  28,  29,  30,  31,  32,  33,
-    34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
-    49,  50,  51,  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255};
-
-inline Error validate(uint32_t v) {
-  ET_CHECK_OR_RETURN_ERROR(v != 255, InvalidArgument, "invalid char");
-  return Error::Ok;
-}
-
-inline Error decode(const std::string_view &input, std::string &output) {
-  ET_CHECK_OR_RETURN_ERROR(input.size() == 4, InvalidArgument,
-                           "input length must be 4, got %zu", input.size());
-
-  uint32_t val = 0;
-
-  uint8_t c = input[0];
-  auto v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = v;
-
-  c = input[1];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  c = input[2];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  c = input[3];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  output.push_back(static_cast<char>((val >> 16) & 0xFF));
-  output.push_back(static_cast<char>((val >> 8) & 0xFF));
-  output.push_back(static_cast<char>(val & 0xFF));
-  return Error::Ok;
-}
-
-inline Error decode_1_padding(const std::string_view &input,
-                              std::string &output) {
-  ET_CHECK_OR_RETURN_ERROR(input.size() == 3, InvalidArgument,
-                           "input length must be 3, got %zu", input.size());
-
-  uint32_t val = 0;
-
-  uint8_t c = input[0];
-  auto v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = v;
-
-  c = input[1];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  c = input[2];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  output.push_back(static_cast<char>((val >> 10) & 0xFF));
-  output.push_back(static_cast<char>((val >> 2) & 0xFF));
-  return Error::Ok;
-}
-
-inline Error decode_2_padding(const std::string_view &input,
-                              std::string &output) {
-  ET_CHECK_OR_RETURN_ERROR(input.size() == 2, InvalidArgument,
-                           "input length must be 2, got %zu", input.size());
-
-  uint32_t val = 0;
-
-  uint8_t c = input[0];
-  auto v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = v;
-
-  c = input[1];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  output.push_back(static_cast<char>((val >> 4) & 0xFF));
-  return Error::Ok;
-}
-
-} // namespace detail
-
-inline Result<std::string> decode(const std::string_view &input) {
-  ET_CHECK_OR_RETURN_ERROR(!input.empty(), InvalidArgument, "empty input");
-
-  // Faster than `input.size() % 4`.
-  ET_CHECK_OR_RETURN_ERROR(
-      (input.size() & 3) == 0 && input.size() >= 4, InvalidArgument,
-      "input length must be larger than 4 and is multiple of 4, got %zu",
-      input.size());
-
-  std::string output;
-  output.reserve(input.size() / 4 * 3);
-  auto idx = 0U;
-  for (; idx < input.size() - 4; idx += 4) {
-    ET_CHECK_OK_OR_RETURN_ERROR(detail::decode(input.substr(idx, 4), output));
-  }
-
-  // Last 4 bytes. Might contain paddings.
-  if (input[idx + 3] == '=') {
-    if (input[idx + 2] == '=') {
-      // Tow paddings.
-      ET_CHECK_OK_OR_RETURN_ERROR(
-          detail::decode_2_padding(input.substr(idx, 2), output));
-    } else {
-      // One padding.
-      ET_CHECK_OK_OR_RETURN_ERROR(
-          detail::decode_1_padding(input.substr(idx, 3), output));
-    }
-  } else {
-    // No padding.
-    ET_CHECK_OK_OR_RETURN_ERROR(detail::decode(input.substr(idx, 4), output));
-  }
-
-  return output;
-}
-
-} // namespace base64
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
-
-namespace torch {
-namespace executor {
-namespace base64 {
-// TODO(T197294990): Remove these deprecated aliases once all users have moved
-// to the new `::executorch` namespaces.
-using ::executorch::extension::llm::base64::decode;
-} // namespace base64
-} // namespace executor
-} // namespace torch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp
deleted file mode 100644
index aa0a6d1baa..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include "bpe_tokenizer.h"
-
-#include <cstring>
-
-using ::executorch::runtime::Error;
-using ::executorch::runtime::Result;
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-static int compare_tokens(const void *a, const void *b) {
-  if (((TokenIndex *)a)->str == nullptr) {
-    return -1;
-  }
-  if (((TokenIndex *)b)->str == nullptr) {
-    return 1;
-  }
-  return strcmp(((TokenIndex *)a)->str, ((TokenIndex *)b)->str);
-}
-
-BPETokenizer::BPETokenizer() : Tokenizer() {
-  for (int i = 0; i < 256; i++) {
-    byte_pieces_[i * 2] = (unsigned char)i;
-    byte_pieces_[i * 2 + 1] = '\0';
-  }
-}
-
-/**
- * @brief Load the tokenizer from a file. The tokenizer file contains the
- * vocabulary and scores. The format is: the first integer is the maximum
- * token length, followed by a list of (word_len, word) pairs. Here we
- * are reading all the vocabulary into memory and keep it sorted for fast
- * lookup.
- *
- * @param tokenizer_path The path to the tokenizer file.
- * @return Error
- */
-Error BPETokenizer::load(const std::string &tokenizer_path) {
-  if (initialized_) {
-    ET_LOG(Info, "Tokenizer already initialized");
-    return Error::Ok;
-  }
-  // read in the file
-  FILE *file = fopen(tokenizer_path.c_str(), "rb");
-  if (!file) {
-    ET_LOG(Error, "couldn't load %s", tokenizer_path.c_str());
-    return Error::InvalidArgument;
-  }
-  int32_t metadata[4];
-  for (int i = 0; i < 4; i++) {
-    if (fread(metadata + i, sizeof(int32_t), 1, file) != 1) {
-      ET_LOG(Error,
-             "Failed to read the metadata at position %d, the tokenizer file "
-             "is not valid!",
-             i);
-      return Error::InvalidArgument;
-    }
-  }
-
-  // now we have two vocab_sizes one from the model and another from the
-  // tokenizer file.
-  int32_t tokenizer_vocab_size = metadata[0];
-  vocab_size_ = tokenizer_vocab_size;
-  bos_tok_ = metadata[1];
-  eos_tok_ = metadata[2];
-  max_token_length_ = metadata[3];
-
-  // allocate space for the vocabulary
-  vocab_ = std::make_unique<char *[]>(vocab_size_);
-  vocab_scores_ = std::make_unique<float[]>(vocab_size_);
-  sorted_vocab_ = std::make_unique<TokenIndex[]>(vocab_size_);
-
-  // read in the vocabulary
-  for (int i = 0; i < vocab_size_; i++) {
-    if (fread(vocab_scores_.get() + i, sizeof(float), 1, file) != 1) {
-      // This is allowed, we just pad the rest of the vocab with <pad> strings
-      std::string padding = "<pad>";
-      vocab_[i] = new char[padding.length() + 1];
-      strcpy(vocab_[i], padding.c_str());
-      vocab_[i][padding.length()] = '\0';
-      continue;
-    }
-    int32_t len;
-    if (fread(&len, sizeof(int32_t), 1, file) != 1) {
-      ET_LOG(Error, "Failed to read the length of the word at index %d", i);
-      return Error::InvalidArgument;
-    }
-    vocab_[i] = new char[len + 1];
-    if (fread(vocab_[i], len, 1, file) != 1) {
-      ET_LOG(Error, "Failed to read the word, total length %d, index %d\n", len,
-             i);
-      return Error::InvalidArgument;
-    }
-    vocab_[i][len] = '\0'; // add the string terminating token
-  }
-  fclose(file);
-
-  for (int32_t i = 0; i < vocab_size_; i++) {
-    sorted_vocab_[i].str = vocab_[i];
-    sorted_vocab_[i].id = i;
-  }
-  qsort(sorted_vocab_.get(), vocab_size_, sizeof(TokenIndex), compare_tokens);
-
-  initialized_ = true;
-  return Error::Ok;
-}
-
-BPETokenizer::~BPETokenizer() {
-  for (int i = 0; i < vocab_size_; i++) {
-    delete[] vocab_[i];
-  }
-}
-
-/**
- * @brief Decode a token into string.
- *
- * @param prev_token The previous token.
- * @param token The current token.
- * @return Result<std::string> A pointer to the string representation of the
- * token.
- */
-Result<std::string> BPETokenizer::decode(uint64_t prev_token,
-                                         uint64_t token) const {
-  ET_CHECK_OK_OR_RETURN_ERROR(Tokenizer::decode_verify(token));
-  const char *piece = vocab_[token];
-  // following BOS token, sentencepiece decoder strips any leading
-  // whitespace
-  if (prev_token == bos_tok_ && piece[0] == ' ') {
-    piece++;
-  }
-  // careful, some tokens designate raw bytes, and look like e.g. '<0x01>'
-  // parse this and convert and return the actual byte
-  unsigned char byte_val;
-  if (sscanf(piece, "<0x%02hhX>", &byte_val) == 1) {
-    piece = (char *)byte_pieces_ + byte_val * 2;
-  }
-  std::string res(piece);
-  return res;
-}
-
-static int32_t str_lookup(const char *str, TokenIndex *sorted_vocab,
-                          int32_t vocab_size) {
-  // efficiently find the perfect match for str in vocab, return its index or -1
-  // if not found
-  TokenIndex tok = {.str = str}; // acts as the key to search for
-  TokenIndex *res = (TokenIndex *)bsearch(&tok, sorted_vocab, vocab_size,
-                                          sizeof(TokenIndex), compare_tokens);
-  return res != nullptr ? res->id : -1;
-}
-
-/**
- * @brief Encode a string into a sequence of tokens.
- *
- * @param text The string to be encoded.
- * @param bos The number of BOS to prepend to the token list.
- * @param eos The number of EOS to append to the token list.
- * @param tokens The output tokens.
- * @param n_tokens The number of tokens.
- * @return Result<std::vector<uint64_t>>
- */
-Result<std::vector<uint64_t>>
-BPETokenizer::encode(const std::string &text, int8_t bos, int8_t eos) const {
-  if (!initialized_) {
-    ET_LOG(Error, "Tokenizer not initialized");
-    return Error::NotSupported;
-  }
-  // encode the string text (input) into an upper-bound preallocated tokens[]
-  // array bos != 0 means prepend the BOS token (=1), eos != 0 means append the
-  // EOS token (=2)
-  if (text.empty()) {
-    ET_LOG(Error, "cannot encode empty text");
-    return Error::InvalidArgument;
-  }
-
-  // create a temporary buffer that will store merge candidates of always two
-  // consecutive tokens *2 for concat, +1 for null terminator +2 for UTF8 (in
-  // case max_token_length is 1)
-  char *str_buffer = new char[max_token_length_ * 2 + 1 + 2];
-  size_t str_len = 0;
-
-  // start at 0 tokens
-  std::vector<uint64_t> tokens;
-
-  // add optional BOS token, if desired
-  if (bos >= 0) {
-    while (bos--) {
-      tokens.push_back(bos_tok_);
-    }
-  } else {
-    ET_LOG(Error, "bos %d should be >= 0", bos);
-    return Error::InvalidArgument;
-  }
-
-  // add_dummy_prefix is true by default
-  // so prepend a dummy prefix token to the input string, but only if text != ""
-  // TODO: pretty sure this isn't correct in the general case but I don't have
-  // the energy to read more of the sentencepiece code to figure out what it's
-  // doing
-  const char *space = " ";
-  if (text[0] != '\0') {
-    int dummy_prefix = str_lookup(space, sorted_vocab_.get(), vocab_size_);
-    tokens.push_back(dummy_prefix);
-  }
-
-  // Okay UTF-8 time. This will get messy. Here is the reference from Wikipedia:
-  // Code point ↔ UTF-8 conversion
-  // First code point  Last code point  Byte 1  Byte 2  Byte 3  Byte 4
-  // U+0000  U+007F      0xxxxxxx
-  // U+0080  U+07FF      110xxxxx  10xxxxxx
-  // U+0800  U+FFFF      1110xxxx  10xxxxxx  10xxxxxx
-  // U+10000  U+10FFFF    11110xxx  10xxxxxx  10xxxxxx  10xxxxxx
-
-  // process the raw (UTF-8) byte sequence of the input string
-  for (const char *c = text.c_str(); *c != '\0'; c++) {
-    // reset buffer if the current byte is ASCII or a leading byte
-    // 0xC0 is 11000000, so (*c & 0xC0) keeps the first 2 bits and zeros the
-    // rest 0x80 is 10000000 in UTF-8, all continuation bytes start with "10" in
-    // first two bits so in English this is: "if this byte is not a continuation
-    // byte"
-    if ((*c & 0xC0) != 0x80) {
-      // this byte must be either a leading byte (11...) or an ASCII char
-      // (0x...)
-      // => reset our location, as we're starting a new UTF-8 codepoint
-      str_len = 0;
-    }
-
-    // append the current byte to the buffer
-    str_buffer[str_len++] =
-        *c; // ++ is post-increment, incremented after this line
-    str_buffer[str_len] = '\0';
-
-    // while the next character is a continuation byte, continue appending
-    // but if there are too many of them, just stop to avoid overruning
-    // str_buffer size.
-    if ((*(c + 1) & 0xC0) == 0x80 && str_len < 4) {
-      continue;
-    }
-
-    // ok c+1 is not a continuation byte, so we've read in a full codepoint
-    int id = str_lookup(str_buffer, sorted_vocab_.get(), vocab_size_);
-    if (id != -1) {
-      // we found this codepoint in vocab, add it as a token
-      tokens.push_back(id);
-    } else {
-      // byte_fallback encoding: just encode each byte as a token
-      // +3 is here because the first 3 vocab elements are <unk>, <s>, </s>
-      // so the individual bytes only start at index 3
-      for (int i = 0; i < str_len; i++) {
-        tokens.push_back((unsigned char)str_buffer[i] + 3);
-      }
-    }
-    str_len = 0; // protect against a sequence of stray UTF8 continuation bytes
-  }
-
-  // merge the best consecutive pair each iteration, according the scores in
-  // vocab_scores
-  while (1) {
-    float best_score = -1e10;
-    int best_id = -1;
-    int best_idx = -1;
-
-    for (int i = 0; i < tokens.size() - 1; i++) {
-      // check if we can merge the pair (tokens[i], tokens[i+1])
-      snprintf(str_buffer, max_token_length_ * 2 + 3, "%s%s", vocab_[tokens[i]],
-               vocab_[tokens[i + 1]]);
-      int id = str_lookup(str_buffer, sorted_vocab_.get(), vocab_size_);
-      if (id != -1 && vocab_scores_[id] > best_score) {
-        // this merge pair exists in vocab! record its score and position
-        best_score = vocab_scores_[id];
-        best_id = id;
-        best_idx = i;
-      }
-    }
-
-    if (best_idx == -1) {
-      break; // we couldn't find any more pairs to merge, so we're done
-    }
-
-    // merge the consecutive pair (best_idx, best_idx+1) into new token best_id
-    tokens[best_idx] = best_id;
-    // delete token at position best_idx+1, shift the entire sequence back 1
-    for (int i = best_idx + 1; i < tokens.size() - 1; i++) {
-      tokens[i] = tokens[i + 1];
-    }
-    tokens.pop_back(); // token length decreased
-  }
-
-  // add optional EOS (=2) token, if desired
-  if (eos >= 0) {
-    while (eos--) {
-      tokens.push_back(eos_tok_);
-    }
-  } else {
-    ET_LOG(Error, "eos %d should be >= 0", eos);
-    return Error::InvalidArgument;
-  }
-
-  delete[] str_buffer;
-  return Result(tokens);
-}
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h
deleted file mode 100644
index b619905793..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include "tokenizer.h"
-#include <memory>
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-struct TokenIndex {
-  const char *str;
-  int32_t id;
-};
-
-// A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code
-// won't work with this class, it needs to go through tokenizer.py first.
-class BPETokenizer : public Tokenizer {
-public:
-  explicit BPETokenizer();
-  ~BPETokenizer() override;
-
-  ::executorch::runtime::Error load(const std::string &tokenizer_path) override;
-
-  ::executorch::runtime::Result<std::vector<uint64_t>>
-  encode(const std::string &input, int8_t bos, int8_t eos) const override;
-
-  ::executorch::runtime::Result<std::string>
-  decode(uint64_t prev_token, uint64_t token) const override;
-
-private:
-  std::unique_ptr<char *[]> vocab_ = nullptr;
-  std::unique_ptr<float[]> vocab_scores_ = nullptr;
-  std::unique_ptr<TokenIndex[]> sorted_vocab_ = nullptr;
-  unsigned int max_token_length_ = 0;
-  unsigned char byte_pieces_[512]; // stores all single-byte strings
-};
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
-
-namespace torch {
-namespace executor {
-// TODO(T197294990): Remove these deprecated aliases once all users have moved
-// to the new `::executorch` namespaces.
-using ::executorch::extension::llm::BPETokenizer;
-using ::executorch::extension::llm::TokenIndex;
-} // namespace executor
-} // namespace torch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp
deleted file mode 100644
index 8bc7ef4879..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include "llama_tiktoken.h"
-
-namespace example {
-
-using ::executorch::extension::llm::Tiktoken;
-
-namespace {
-static constexpr int32_t kSpecialTokensSize = 256;
-static constexpr size_t kBOSTokenIndex = 0;
-static constexpr size_t kEOSTokenIndex = 1;
-
-static inline std::unique_ptr<std::vector<std::string>>
-_get_default_special_tokens() {
-  auto special_tokens =
-      std::make_unique<std::vector<std::string>>(std::vector<std::string>{
-          "<|begin_of_text|>", "<|end_of_text|>",
-          "<|reserved_special_token_0|>", "<|reserved_special_token_1|>",
-          "<|finetune_right_pad_id|>", "<|step_id|>", "<|start_header_id|>",
-          "<|end_header_id|>", "<|eom_id|>", "<|eot_id|>", "<|python_tag|>"});
-  // pad the rest of the special tokens with reserved tokens
-  ssize_t reserved_special_token_num = 2;
-  while (special_tokens->size() < kSpecialTokensSize) {
-    special_tokens->emplace_back("<|reserved_special_token_" +
-                                 std::to_string(reserved_special_token_num++) +
-                                 "|>");
-  }
-  return special_tokens;
-}
-
-static inline std::unique_ptr<std::vector<std::string>>
-_get_multimodal_special_tokens() {
-  auto special_tokens =
-      std::make_unique<std::vector<std::string>>(std::vector<std::string>{
-          "<|begin_of_text|>", "<|end_of_text|>",
-          "<|reserved_special_token_0|>", "<|reserved_special_token_1|>",
-          "<|reserved_special_token_2|>", "<|reserved_special_token_3|>",
-          "<|start_header_id|>", "<|end_header_id|>", "<|eom_id|>",
-          "<|eot_id|>", "<|image|>"});
-
-  // pad the rest of the special tokens with reserved tokens except the last
-  // one
-  ssize_t reserved_special_token_num = 4;
-  while (special_tokens->size() < kSpecialTokensSize - 1) {
-    special_tokens->emplace_back("<|reserved_special_token_" +
-                                 std::to_string(reserved_special_token_num++) +
-                                 "|>");
-  }
-
-  special_tokens->emplace_back("<|python_tag|>");
-
-  return special_tokens;
-}
-
-std::unique_ptr<std::vector<std::string>> _get_special_tokens(Version version) {
-  switch (version) {
-  case Version::Multimodal:
-    return _get_multimodal_special_tokens();
-  default:
-    return _get_default_special_tokens();
-  }
-}
-
-} // namespace
-
-std::unique_ptr<Tiktoken> get_tiktoken_for_llama(Version version) {
-  return std::make_unique<Tiktoken>(_get_special_tokens(version),
-                                    kBOSTokenIndex, kEOSTokenIndex);
-}
-
-} // namespace example
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h
deleted file mode 100644
index 10e106f116..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include "tiktoken.h"
-
-namespace example {
-
-enum class Version {
-  Default,
-  Multimodal,
-};
-
-std::unique_ptr<::executorch::extension::llm::Tiktoken>
-get_tiktoken_for_llama(Version version = Version::Default);
-
-} // namespace example
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp
deleted file mode 100644
index aad4de6f75..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-// Adopted from https://github.com/sewenew/tokenizer
-
-// @lint-ignore-every LICENSELINT
-/**************************************************************************
-   Copyright (c) 2023 sewenew
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
- *************************************************************************/
-
-#include "tiktoken.h"
-#include "base64.h"
-#include <executorch/runtime/core/result.h>
-#include <fstream>
-#include <limits>
-
-using ::executorch::runtime::Error;
-using ::executorch::runtime::Result;
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-// ------------------------------Util start------------------------------------
-
-static uint64_t _max_size() { return std::numeric_limits<uint64_t>::max(); }
-
-static Re2UPtr _create_regex(const std::string &pattern) {
-  assert(!pattern.empty());
-
-  return std::make_unique<re2::RE2>("(" + pattern + ")");
-}
-
-static Re2UPtr _build_special_token_regex(const Encoder &special_encoder) {
-  std::string special_pattern;
-  for (const auto &ele : special_encoder) {
-    if (!special_pattern.empty()) {
-      special_pattern += "|";
-    }
-    special_pattern += re2::RE2::QuoteMeta(ele.first);
-  }
-
-  if (special_pattern.empty()) {
-    return nullptr;
-  }
-
-  return _create_regex(special_pattern);
-}
-
-static Result<std::pair<std::string, uint64_t>>
-_parse(const std::string &line) {
-  // Tiktoken format
-  // https://github.com/openai/tiktoken/blob/main/tiktoken/load.py#L140 <base64
-  // encoded token str> <rank>
-  auto pos = line.find(" ");
-  ET_CHECK_OR_RETURN_ERROR(pos != std::string::npos, InvalidArgument,
-                           "invalid tiktoken line: %s", line.c_str());
-
-  auto token = ET_UNWRAP(base64::decode({line.data(), pos}));
-  uint64_t rank = 0;
-  try {
-    rank = std::stoul(line.substr(pos + 1));
-  } catch (const std::exception &) {
-    ET_CHECK_OR_RETURN_ERROR(false, InvalidArgument, "invalid encoder rank: %s",
-                             line.c_str());
-  }
-
-  return std::pair{std::move(token), rank};
-}
-
-static Result<Encoder> _load_encoder(const std::string &path) {
-  std::ifstream file(path);
-  ET_CHECK_OR_RETURN_ERROR(file, InvalidArgument,
-                           "failed to open encoder file: %s", path.c_str());
-
-  Encoder encoder;
-  std::string line;
-  while (std::getline(file, line)) {
-    auto [token, rank] = ET_UNWRAP(_parse(line));
-
-    ET_CHECK_OR_RETURN_ERROR(encoder.emplace(std::move(token), rank).second,
-                             InvalidArgument, "duplicate item: %s",
-                             line.c_str());
-  }
-
-  return encoder;
-}
-
-static Result<Decoder> _build_decoder(const Encoder &encoder) {
-  Decoder decoder;
-  for (const auto &[k, v] : encoder) {
-    decoder.emplace(v, k);
-  }
-
-  ET_CHECK_OR_RETURN_ERROR(encoder.size() == decoder.size(), InvalidArgument,
-                           "duplicate items in encoder");
-
-  return decoder;
-}
-
-static std::vector<uint64_t>
-_byte_pair_merge(const std::string &piece,
-                 const std::unordered_map<std::string, uint64_t> &ranks,
-                 std::function<uint64_t(uint64_t, uint64_t)> func) {
-  // This is a vector of (start, rank).
-  // The rank is of the byte pair starting at position start.
-  // The rank of the last item in the vector is not a valid value.
-  std::vector<std::pair<uint64_t, uint64_t>> parts;
-  parts.reserve(piece.size() + 1);
-  for (auto idx = 0U; idx < piece.size() + 1; ++idx) {
-    parts.emplace_back(idx, _max_size());
-  }
-
-  auto get_rank =
-      [&piece, &ranks](const std::vector<std::pair<uint64_t, uint64_t>> &parts,
-                       uint64_t start_idx,
-                       uint64_t skip) -> std::optional<uint64_t> {
-    if (start_idx + skip + 2 < parts.size()) {
-      auto s = parts[start_idx].first;
-      auto e = parts[start_idx + skip + 2].first;
-      auto key = piece.substr(s, e - s);
-      auto iter = ranks.find(key);
-      if (iter != ranks.end()) {
-        return iter->second;
-      }
-    }
-    return std::nullopt;
-  };
-
-  // We look up the ranks once in the beginning and iteratively update
-  // them during each merge, which reduces the number of rank lookups.
-  for (auto i = 0U; i < parts.size() - 2; ++i) {
-    auto rank = get_rank(parts, i, 0);
-    if (rank) {
-      // usize::MAX is a sentinel value and cannot be a valid rank
-      ET_CHECK_MSG(*rank != _max_size(), "rank is too large");
-      parts[i].second = *rank;
-    }
-  }
-
-  // If you have n parts and m merges, this does O(mn) work.
-  // We could do something with a heap and do O(m log n) work.
-  // It is important to consider that n is often small (<100), and as such
-  // the cache-locality benefits outweigh the algorithmic complexity downsides
-  // of the `parts` vector data structure above.
-
-  // Note that we hash bytes, not token pairs. As long as we train BPE the way
-  // we currently do, this is equivalent. An easy way to break this would be
-  // to decouple merge priority from token index or to prevent specific token
-  // merges.
-  while (true) {
-    if (parts.size() == 1) {
-      break;
-    }
-
-    // usize::MAX is a sentinel rank value allowing us to
-    // take the min more quickly
-    auto min_rank = std::make_pair<uint64_t, uint64_t>(_max_size(), 0);
-    for (auto i = 0U; i < parts.size() - 1; ++i) {
-      auto rank = parts[i].second;
-      if (rank < min_rank.first) {
-        min_rank.first = rank;
-        min_rank.second = i;
-      }
-    }
-
-    if (min_rank.first != _max_size()) {
-      auto i = min_rank.second;
-
-      // NOTE: We are about to remove parts[i + 1]. We do not do it
-      // yet because there are cache-locality benefits to updating
-      // parts[i] and parts[i-1] before removing, which could thrash
-      // the cache. Thus, we update the rank calculation by skipping over
-      // parts[i + 1], by invoking `get_rank!` with `skip = 1`.
-      auto rank = get_rank(parts, i, 1);
-      if (rank) {
-        parts[i].second = *rank;
-      } else {
-        parts[i].second = _max_size();
-      }
-      if (i > 0) {
-        rank = get_rank(parts, i - 1, 1);
-        if (rank) {
-          parts[i - 1].second = *rank;
-        } else {
-          parts[i - 1].second = _max_size();
-        }
-      }
-
-      parts.erase(parts.begin() + (i + 1));
-    } else {
-      break;
-    }
-  }
-  std::vector<uint64_t> out;
-  out.reserve(parts.size() - 1);
-  for (auto i = 0U; i < parts.size() - 1; ++i) {
-    auto s = parts[i].first;
-    auto e = parts[i + 1].first;
-    out.push_back(func(s, e));
-  }
-  return out;
-}
-
-static std::vector<uint64_t> _byte_pair_encode(const std::string &piece,
-                                               const Encoder &encoder) {
-  if (piece.size() == 1) {
-    auto iter = encoder.find(piece);
-    if (iter != encoder.end()) {
-      return std::vector<uint64_t>({iter->second});
-    } else {
-      // TODO: is it possible?
-      return {};
-    }
-  }
-
-  return _byte_pair_merge(piece, encoder,
-                          [&piece, &encoder](uint64_t start, uint64_t stop) {
-                            std::string key = piece.substr(start, stop - start);
-                            auto iter = encoder.find(key);
-                            if (iter != encoder.end()) {
-                              return iter->second;
-                            } else {
-                              // TODO: what if key does not exist? Should we
-                              // return `unknown`? assert(false); // ??
-                              return uint64_t(0);
-                            }
-                          });
-}
-// ------------------------------Util end------------------------------------
-// -------------------------private method start-------------------------------
-
-template <typename T>
-std::pair<std::optional<std::string>, re2::StringPiece>
-Tiktoken::_split_with_allowed_special_token(re2::StringPiece &input,
-                                            const T &allowed_special) const {
-  if (!_special_token_regex) {
-    return std::make_pair(std::nullopt, input);
-  }
-
-#if __cplusplus >= 202002L
-  auto start = input.begin();
-#else
-  const char *start = input.data();
-#endif
-  std::string special;
-  while (true) {
-    if (!re2::RE2::FindAndConsume(&input, *_special_token_regex, &special)) {
-      // No special token.
-      break;
-    }
-
-    if (allowed_special.count(special) == 1) {
-      // Found an allowed special token, split the text with it.
-#if __cplusplus >= 202002L
-      return std::make_pair(
-          special,
-          re2::StringPiece(start, input.begin() - start - special.size()));
-#else
-      return std::make_pair(
-          special,
-          re2::StringPiece(start, (input.data() - start) - special.size()));
-#endif
-    } // else try to find the next special token
-  }
-
-  return std::make_pair(std::nullopt, input);
-}
-
-void Tiktoken::_encode(re2::StringPiece &input, std::vector<uint64_t> &ret,
-                       uint64_t &last_piece_token_len) const {
-  std::string piece;
-  assert(_regex);
-  while (re2::RE2::FindAndConsume(&input, *_regex, &piece)) {
-    auto iter = _encoder.find(piece);
-    if (iter != _encoder.end()) {
-      last_piece_token_len = 1;
-      ret.push_back(iter->second);
-      continue;
-    }
-    auto tokens = _byte_pair_encode(piece, _encoder);
-    last_piece_token_len = tokens.size();
-    ret.insert(ret.end(), tokens.begin(), tokens.end());
-  }
-}
-
-template <typename T>
-std::pair<std::vector<uint64_t>, uint64_t>
-Tiktoken::_encode_with_special_token(const std::string &text,
-                                     const T &allowed_special) const {
-  std::vector<uint64_t> tokens;
-  uint64_t last_piece_token_len = 0;
-  re2::StringPiece input(text);
-  while (true) {
-    auto [special, sub_input] =
-        _split_with_allowed_special_token(input, allowed_special);
-
-    _encode(sub_input, tokens, last_piece_token_len);
-
-    if (special) {
-      uint64_t token = 0;
-      try {
-        token = _special_token_encoder.at(*special);
-      } catch (const std::out_of_range &) {
-        // Should never go here, since special pattern includes all special
-        // chars.
-        ET_CHECK_MSG(false, "unknown special token: %s", special->c_str());
-      }
-
-      tokens.push_back(token);
-      last_piece_token_len = 0;
-    } else {
-      break;
-    }
-  }
-
-  // last_piece_token_len is how many tokens came from the last regex split.
-  // This is used for determining unstable tokens, since you can't merge
-  // across (stable) regex splits
-  return std::make_pair(tokens, last_piece_token_len);
-}
-
-Encoder Tiktoken::_build_special_token_encoder(ssize_t num_base_tokens) const {
-  Encoder special_token_encoder;
-  for (ssize_t i = 0; i < _special_tokens->size(); ++i) {
-    special_token_encoder.emplace(_special_tokens->at(i), num_base_tokens + i);
-  }
-  return special_token_encoder;
-}
-
-// -------------------------private method end-------------------------------
-// -------------------------public method start-------------------------------
-
-Tiktoken::Tiktoken(std::unique_ptr<std::vector<std::string>> special_tokens,
-                   size_t bos_token_index, size_t eos_token_index)
-    : Tokenizer(), _special_tokens(std::move(special_tokens)),
-      _bos_token_index(bos_token_index), _eos_token_index(eos_token_index) {
-  ET_CHECK_MSG(_bos_token_index < _special_tokens->size(),
-               "invalid bos_token_index %zu", _bos_token_index);
-  ET_CHECK_MSG(_eos_token_index < _special_tokens->size(),
-               "invalid eos_token_index %zu", _eos_token_index);
-}
-
-Error Tiktoken::load(const std::string &path) {
-  _encoder = ET_UNWRAP(_load_encoder(path));
-  _special_token_encoder = _build_special_token_encoder(_encoder.size());
-
-  _decoder = ET_UNWRAP(_build_decoder(_encoder));
-  _special_token_decoder = ET_UNWRAP(_build_decoder(_special_token_encoder));
-
-  _regex = _create_regex(_pattern);
-  // Warmup re2 as it is slow on the first run, void the return value as it's
-  // not needed Refer to
-  // https://github.com/google/re2/blob/6dcd83d60f7944926bfd308cc13979fc53dd69ca/re2/fuzzing/re2_fuzzer.cc#L136-L141
-  (void)_regex->ReverseProgramSize();
-
-  _special_token_regex = _build_special_token_regex(_special_token_encoder);
-  // Same as above, warm up re2
-  (void)_special_token_regex->ReverseProgramSize();
-
-  // initialize vocab_size, bos_tok, eos_tok
-  vocab_size_ = _encoder.size() + _special_token_encoder.size();
-  bos_tok_ = _special_token_encoder.at(_special_tokens->at(_bos_token_index));
-  eos_tok_ = _special_token_encoder.at(_special_tokens->at(_eos_token_index));
-
-  initialized_ = true;
-  return Error::Ok;
-}
-
-Result<std::vector<uint64_t>> Tiktoken::encode(const std::string &text,
-                                               int8_t bos, int8_t eos) const {
-  if (!initialized_) {
-    return Error::NotSupported;
-  }
-  auto res = _encode_with_special_token(text, _special_token_encoder).first;
-  for (auto i = 0; i < bos; ++i) {
-    res.insert(res.begin(), bos_tok_);
-  }
-  for (auto i = 0; i < eos; ++i) {
-    res.push_back(eos_tok_);
-  }
-  return Result<std::vector<uint64_t>>(std::move(res));
-}
-
-Result<std::string> Tiktoken::decode(uint64_t prev, uint64_t cur) const {
-  (void)prev;
-  ET_CHECK_OK_OR_RETURN_ERROR(Tokenizer::decode_verify(cur));
-  std::string ret;
-
-  std::string token_bytes;
-  auto iter = _decoder.find(cur);
-  if (iter != _decoder.end()) {
-    token_bytes = iter->second;
-  } else {
-    iter = _special_token_decoder.find(cur);
-    if (iter != _special_token_decoder.end()) {
-      token_bytes = iter->second;
-    } else {
-      ET_CHECK_MSG(false, "unknown token: %" PRIu64, cur);
-    }
-  }
-  ret += token_bytes;
-
-  return ret;
-}
-// -------------------------public method end-------------------------------
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h
deleted file mode 100644
index 5eed7e94c8..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include "tokenizer.h"
-#include <memory>
-#include <optional>
-#include <re2/re2.h>
-#include <unordered_map>
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-using Encoder = std::unordered_map<std::string, uint64_t>;
-using Decoder = std::unordered_map<uint64_t, std::string>;
-using Re2UPtr = std::unique_ptr<re2::RE2>;
-
-class Tiktoken : public Tokenizer {
-public:
-  /**
-   * @param[in] special_tokens List of special tokens including bos, eos;
-   * @param[in] bos_token_index Index of the bos token in special_tokens;
-   * @param[in] eos_token_index Index of the eos token in special_tokens.
-   */
-  explicit Tiktoken(std::unique_ptr<std::vector<std::string>> special_tokens,
-                    size_t bos_token_index, size_t eos_token_index);
-
-  ::executorch::runtime::Error load(const std::string &tokenizer_path) override;
-
-  ::executorch::runtime::Result<std::vector<uint64_t>>
-  encode(const std::string &input, int8_t bos, int8_t eos) const override;
-
-  ::executorch::runtime::Result<std::string>
-  decode(uint64_t prev_token, uint64_t token) const override;
-
-private:
-  template <typename T>
-  std::pair<std::optional<std::string>, re2::StringPiece>
-  _split_with_allowed_special_token(re2::StringPiece &input,
-                                    const T &allowed_special) const;
-
-  void _encode(re2::StringPiece &input, std::vector<uint64_t> &ret,
-               uint64_t &last_piece_token_len) const;
-
-  template <typename T>
-  std::pair<std::vector<uint64_t>, uint64_t>
-  _encode_with_special_token(const std::string &text,
-                             const T &allowed_special) const;
-
-  Encoder _build_special_token_encoder(ssize_t num_base_tokens) const;
-
-  std::unique_ptr<std::vector<std::string>> _special_tokens;
-  size_t _bos_token_index;
-  size_t _eos_token_index;
-  // Removed negative lookahead \s+(?!\S) since it's not supported by RE2.
-  const std::string _pattern =
-      R"((?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+)";
-  Encoder _encoder;
-  Encoder _special_token_encoder;
-  Decoder _decoder;
-  Decoder _special_token_decoder;
-
-  Re2UPtr _regex;
-  Re2UPtr _special_token_regex;
-};
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
-
-namespace torch {
-namespace executor {
-// TODO(T197294990): Remove these deprecated aliases once all users have moved
-// to the new `::executorch` namespaces.
-using ::executorch::extension::llm::Decoder;
-using ::executorch::extension::llm::Encoder;
-using ::executorch::extension::llm::Re2UPtr;
-using ::executorch::extension::llm::Tiktoken;
-} // namespace executor
-} // namespace torch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h
deleted file mode 100644
index 948cccc0d7..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include <cinttypes>
-// patternlint-disable-next-line executorch-cpp-nostdinc
-#include <string>
-// patternlint-disable-next-line executorch-cpp-nostdinc
-#include <vector>
-
-#include <executorch/runtime/core/error.h>
-#include <executorch/runtime/core/result.h>
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-// A tokenizer interface.
-class Tokenizer {
-public:
-  explicit Tokenizer() {}
-  virtual ~Tokenizer() {}
-
-  virtual ::executorch::runtime::Error
-  load(const std::string &tokenizer_path) = 0;
-
-  virtual ::executorch::runtime::Result<std::vector<uint64_t>>
-  encode(const std::string &input, int8_t bos, int8_t eos) const = 0;
-
-  ::executorch::runtime::Error decode_verify(uint64_t token) const {
-    if (!initialized_) {
-      ET_LOG(Error, "Tokenizer not initialized");
-      return ::executorch::runtime::Error::NotSupported;
-    }
-    if (token >= vocab_size_) {
-      ET_LOG(Error, "token  %" PRIu64 " is out side of vacab range %d", token,
-             vocab_size_);
-      return ::executorch::runtime::Error::NotSupported;
-    }
-    return ::executorch::runtime::Error::Ok;
-  }
-
-  virtual ::executorch::runtime::Result<std::string>
-  decode(uint64_t prev_token, uint64_t token) const = 0;
-
-  // getters
-  int32_t vocab_size() const { return vocab_size_; }
-
-  uint64_t bos_tok() const { return bos_tok_; }
-
-  uint64_t eos_tok() const { return eos_tok_; }
-
-protected:
-  bool initialized_ = false;
-  int32_t vocab_size_ = 0;
-  uint64_t bos_tok_ = 0;
-  uint64_t eos_tok_ = 0;
-};
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
-
-namespace torch {
-namespace executor {
-// TODO(T197294990): Remove these deprecated aliases once all users have moved
-// to the new `::executorch` namespaces.
-using ::executorch::extension::llm::Tokenizer;
-} // namespace executor
-} // namespace torch