diff --git a/apps/llm/ios/Podfile.lock b/apps/llm/ios/Podfile.lock
index 3c226c3076..16a673f2ad 100644
--- a/apps/llm/ios/Podfile.lock
+++ b/apps/llm/ios/Podfile.lock
@@ -2153,9 +2153,9 @@ PODS:
     - ReactCommon/turbomodule/core
     - Yoga
   - SocketRocket (0.7.1)
-  - sqlite3 (3.50.4):
-    - sqlite3/common (= 3.50.4)
-  - sqlite3/common (3.50.4)
+  - sqlite3 (3.50.1):
+    - sqlite3/common (= 3.50.1)
+  - sqlite3/common (3.50.1)
   - Yoga (0.0.0)
 
 DEPENDENCIES:
@@ -2444,97 +2444,97 @@ EXTERNAL SOURCES:
 SPEC CHECKSUMS:
   boost: 7e761d76ca2ce687f7cc98e698152abd03a18f90
   DoubleConversion: cb417026b2400c8f53ae97020b2be961b59470cb
-  EXConstants: 9f310f44bfedba09087042756802040e464323c0
-  Expo: 4e8bda07d30b024b1732f87843a5349a3ecc1316
-  ExpoAsset: 3bc9adb7dbbf27ae82c18ca97eb988a3ae7e73b1
-  ExpoBrightness: c335c6ccc082d5249a4b38dba5cd9a08aa0bf62b
-  ExpoCalendar: f5f94ea8dcd957b1434beb4e1c0da1af063322e6
-  ExpoFileSystem: c36eb8155eb2381c83dda7dc210e3eec332368b6
-  ExpoFont: abbb91a911eb961652c2b0a22eef801860425ed6
-  ExpoHead: af044f3e9c99e7d8d21bf653b4c2f2ef53a7f082
-  ExpoKeepAwake: bf0811570c8da182bfb879169437d4de298376e7
-  ExpoLinking: b85ff4eafeae6fc638c6cace60007ae521af0ef4
-  ExpoModulesCore: d431ffe83c8673d02cb38425594a5f5480fd3061
+  EXConstants: be238322d57d084dc055dbd5d6fe6479510504ce
+  Expo: 77b39f42396989cbe6fbef9f6fafc9b35186a95b
+  ExpoAsset: 3ea3275cca6a7793b3d36fbf1075c590f803fbcb
+  ExpoBrightness: 05e750736f8886dcf235212b0caf85b0f605fc88
+  ExpoCalendar: 660542dc1c5ef98f46bedcc8745aa707df5d501a
+  ExpoFileSystem: 3a98ca2a6f13674ecfd97327d1b44a8ace444cbd
+  ExpoFont: 312c73403bbd4f98e1d6a5330641a56292583cd2
+  ExpoHead: 5df88545652c2d3a3ea50bcd7f6be6ca935ac997
+  ExpoKeepAwake: e8dedc115d9f6f24b153ccd2d1d8efcdfd68a527
+  ExpoLinking: 5d151d4a497d7e375308602f0a89b4e8acf7b5f8
+  ExpoModulesCore: e2e363bcdee87b46f858586d1887ebb215582001
   fast_float: 06eeec4fe712a76acc9376682e4808b05ce978b6
   FBLazyVector: 84b955f7b4da8b895faf5946f73748267347c975
   fmt: a40bb5bd0294ea969aaaba240a927bd33d878cdd
   glog: 5683914934d5b6e4240e497e0f4a3b42d1854183
   hermes-engine: 314be5250afa5692b57b4dd1705959e1973a8ebe
   opencv-rne: 2305807573b6e29c8c87e3416ab096d09047a7a0
-  RCT-Folly: e78785aa9ba2ed998ea4151e314036f6c49e6d82
+  RCT-Folly: 36fe2295e44b10d831836cc0d1daec5f8abcf809
   RCTDeprecation: 83ffb90c23ee5cea353bd32008a7bca100908f8c
   RCTRequired: eb7c0aba998009f47a540bec9e9d69a54f68136e
   RCTTypeSafety: 659ae318c09de0477fd27bbc9e140071c7ea5c93
   React: c2d3aa44c49bb34e4dfd49d3ee92da5ebacc1c1c
   React-callinvoker: 1bdfb7549b5af266d85757193b5069f60659ef9d
-  React-Core: 10597593fdbae06f0089881e025a172e51d4a769
-  React-CoreModules: 6907b255529dd46895cf687daa67b24484a612c2
-  React-cxxreact: a9f5b8180d6955bc3f6a3fcd657c4d9b4d95c1f6
+  React-Core: 7150cf9b6a5af063b37003062689f1691e79c020
+  React-CoreModules: 15a85e6665d61678942da6ae485b351f4c699049
+  React-cxxreact: 74f9de59259ac951923f5726aa14f0398f167af9
   React-debug: e74e76912b91e08d580c481c34881899ccf63da9
-  React-defaultsnativemodule: 11f6ee2cf69bf3af9d0f28a6253def33d21b5266
-  React-domnativemodule: f940bbc4fa9e134190acbf3a4a9f95621b5a8f51
-  React-Fabric: 6f5c357bf3a42ff11f8844ad3fc7a1eb04f4b9de
-  React-FabricComponents: 10e0c0209822ac9e69412913a8af1ca33573379b
-  React-FabricImage: f582e764072dfa4715ae8c42979a5bace9cbcc12
+  React-defaultsnativemodule: 628285212bbd65417d40ad6a9f8781830fda6c98
+  React-domnativemodule: 185d9808198405c176784aaf33403d713bd24fb7
+  React-Fabric: c814804affbe1952e16149ddd20256e1bccae67e
+  React-FabricComponents: 81ef47d596966121784afec9924f9562a29b1691
+  React-FabricImage: f14f371d678aa557101def954ac3ba27e48948ff
   React-featureflags: d5facceff8f8f6de430e0acecf4979a9a0839ba9
-  React-featureflagsnativemodule: a7dd141f1ef4b7c1331af0035689fbc742a49ff4
-  React-graphics: 36ae3407172c1c77cea29265d2b12b90aaef6aa0
-  React-hermes: 9116d4e6d07abeb519a2852672de087f44da8f12
-  React-idlecallbacksnativemodule: ae7f5ffc6cf2d2058b007b78248e5b08172ad5c3
-  React-ImageManager: 9daee0dc99ad6a001d4b9e691fbf37107e2b7b54
-  React-jserrorhandler: 1e6211581071edaf4ecd5303147328120c73f4dc
-  React-jsi: 753ba30c902f3a41fa7f956aca8eea3317a44ee6
-  React-jsiexecutor: 47520714aa7d9589c51c0f3713dfbfca4895d4f9
-  React-jsinspector: cfd27107f6d6f1076a57d88c932401251560fe5f
-  React-jsinspectortracing: 76a7d791f3c0c09a0d2bf6f46dfb0e79a4fcc0ac
-  React-jsitooling: 995e826570dd58f802251490486ebd3244a037ab
-  React-jsitracing: 094ae3d8c123cea67b50211c945b7c0443d3e97b
-  React-logger: 8edfcedc100544791cd82692ca5a574240a16219
-  React-Mapbuffer: c3f4b608e4a59dd2f6a416ef4d47a14400194468
-  React-microtasksnativemodule: 054f34e9b82f02bd40f09cebd4083828b5b2beb6
-  react-native-executorch: 88c3786c6346d5fbd62417b5c799e818568e6cc5
-  react-native-safe-area-context: 562163222d999b79a51577eda2ea8ad2c32b4d06
-  React-NativeModulesApple: 2c4377e139522c3d73f5df582e4f051a838ff25e
+  React-featureflagsnativemodule: 96f0ab285382d95c90f663e02526a5ceefa95a11
+  React-graphics: 1a66ee0a3f093b125b853f6370296fadcaf6f233
+  React-hermes: 8b86e5f54a65ecb69cdf22b3a00a11562eda82d2
+  React-idlecallbacksnativemodule: 5c25ab145c602264d00cb26a397ab52e0efa031c
+  React-ImageManager: 15e34bd5ef1ac4a18e96660817ef70a7f99ee8c2
+  React-jserrorhandler: 02cdf2cd45350108be1ffd2b164578936dbbdff7
+  React-jsi: 6af1987cfbb1b6621664fdbf6c7b62bd4d38c923
+  React-jsiexecutor: 51f372998e0303585cb0317232b938d694663cbd
+  React-jsinspector: 3539ad976d073bfaa8a7d2fa9bef35e70e55033e
+  React-jsinspectortracing: e8dbacaf67c201f23052ca1c2bae2f7b84dec443
+  React-jsitooling: 95a34f41e3c249d42181de13b4f8d854f178ca9f
+  React-jsitracing: 25b029cf5cad488252d46da19dd8c4c134fd5fe4
+  React-logger: 368570a253f00879a1e4fea24ed4047e72e7bbf3
+  React-Mapbuffer: c04fcda1c6281fc0a6824c7dcc1633dd217ac1ec
+  React-microtasksnativemodule: ca2804a25fdcefffa0aa942aa23ab53b99614a34
+  react-native-executorch: 66ffc33df70ec85bc591f9ee34c862835966ead3
+  react-native-safe-area-context: 00d03dc688ba86664be66f9e3f203fc7d747d899
+  React-NativeModulesApple: 452b86b29fae99ed0a4015dca3ad9cd222f88abf
   React-oscompat: ef5df1c734f19b8003e149317d041b8ce1f7d29c
-  React-perflogger: 9a151e0b4c933c9205fd648c246506a83f31395d
-  React-performancetimeline: 5b0dfc0acba29ea0269ddb34cd6dd59d3b8a1c66
+  React-perflogger: 6fd2f6811533e9c19a61e855c3033eecbf4ad2a0
+  React-performancetimeline: abf31259d794c9274b3ea19c5016186925eec6c4
   React-RCTActionSheet: a499b0d6d9793886b67ba3e16046a3fef2cdbbc3
-  React-RCTAnimation: cc64adc259aabc3354b73065e2231d796dfce576
-  React-RCTAppDelegate: 9d523da768f1c9e84c5f3b7e3624d097dfb0e16b
-  React-RCTBlob: e727f53eeefded7e6432eb76bd22b57bc880e5d1
-  React-RCTFabric: 58590aa4fdb4ad546c06a7449b486cf6844e991f
-  React-RCTFBReactNativeSpec: 9064c63d99e467a3893e328ba3612745c3c3a338
-  React-RCTImage: 7159cbdbb18a09d97ba1a611416eced75b3ccb29
-  React-RCTLinking: 46293afdb859bccc63e1d3dedc6901a3c04ef360
-  React-RCTNetwork: 4a6cd18f5bcd0363657789c64043123a896b1170
-  React-RCTRuntime: 5ab904fd749aa52f267ef771d265612582a17880
-  React-RCTSettings: 61e361dc85136d1cb0e148b7541993d2ee950ea7
-  React-RCTText: abd1e196c3167175e6baef18199c6d9d8ac54b4e
-  React-RCTVibration: 490e0dcb01a3fe4a0dfb7bc51ad5856d8b84f343
+  React-RCTAnimation: 2595dcb10a82216a511b54742f8c28d793852ac6
+  React-RCTAppDelegate: f03604b70f57c9469a84a159d8abecf793a5bcff
+  React-RCTBlob: e00f9b4e2f151938f4d9864cf33ebf24ac03328a
+  React-RCTFabric: 3945d116fd271598db262d4e6ed5691d431ed9e8
+  React-RCTFBReactNativeSpec: 0f4d4f0da938101f2ca9d5333a8f46e527ad2819
+  React-RCTImage: dac5e9f8ec476aefe6e60ee640ebc1dfaf1a4dbe
+  React-RCTLinking: 494b785a40d952a1dfbe712f43214376e5f0e408
+  React-RCTNetwork: b3d7c30cd21793e268db107dd0980cb61b3c1c44
+  React-RCTRuntime: a8ff419d437228e7b8a793b14f9d711e1cbb82af
+  React-RCTSettings: a060c7e381a3896104761b8eed7e284d95e37df3
+  React-RCTText: 4f272b72dbb61f390d8c8274528f9fdbff983806
+  React-RCTVibration: 0e5326220719aca12473d703aa46693e3b4ce67a
   React-rendererconsistency: 351fdbc5c1fe4da24243d939094a80f0e149c7a1
-  React-renderercss: 3438814bee838ae7840a633ab085ac81699fd5cf
-  React-rendererdebug: 0ac2b9419ad6f88444f066d4b476180af311fb1e
+  React-renderercss: d333f2ada83969591100d91ec6b23ca2e17e1507
+  React-rendererdebug: 039e5949b72ba63c703de020701e3fd152434c61
   React-rncore: 57ed480649bb678d8bdc386d20fee8bf2b0c307c
-  React-RuntimeApple: 8b7a9788f31548298ba1990620fe06b40de65ad7
-  React-RuntimeCore: e03d96fbd57ce69fd9bca8c925942194a5126dbc
+  React-RuntimeApple: 344a5e1105256000afabaa8df12c3e4cab880340
+  React-RuntimeCore: 0e48fb5e5160acc0334c7a723a42d42cef4b58b6
   React-runtimeexecutor: d60846710facedd1edb70c08b738119b3ee2c6c2
-  React-RuntimeHermes: aab794755d9f6efd249b61f3af4417296904e3ba
-  React-runtimescheduler: c3cd124fa5db7c37f601ee49ca0d97019acd8788
+  React-RuntimeHermes: 064286a03871d932c99738e0f8ef854962ab4b99
+  React-runtimescheduler: e917ab17ae08c204af1ebf8f669b7e411b0220c8
   React-timing: a90f4654cbda9c628614f9bee68967f1768bd6a5
-  React-utils: a612d50555b6f0f90c74b7d79954019ad47f5de6
-  ReactAppDependencyProvider: 04d5eb15eb46be6720e17a4a7fa92940a776e584
-  ReactCodegen: 7ea266ccd94436294f516247db7402b57b1214af
-  ReactCommon: 76d2dc87136d0a667678668b86f0fca0c16fdeb0
-  RNAudioAPI: 2e3fd4bf75aa5717791babb30126707504996f09
-  RNDeviceInfo: d863506092aef7e7af3a1c350c913d867d795047
-  RNGestureHandler: 7d0931a61d7ba0259f32db0ba7d0963c3ed15d2b
-  RNLiveAudioStream: 93ac2bb6065be9018d0b00157b220f11cebc1513
-  RNReanimated: afd6a269a47d6f13ba295c46c6c0e14e3cbd0d8a
-  RNScreens: 482e9707f9826230810c92e765751af53826d509
-  RNSVG: 794f269526df9ddc1f79b3d1a202b619df0368e3
+  React-utils: 51c4e71608b8133fecc9a15801d244ae7bdf3758
+  ReactAppDependencyProvider: d5dcc564f129632276bd3184e60f053fcd574d6b
+  ReactCodegen: c9a256facbe4996140f3fb95c7f03ba61c12acc9
+  ReactCommon: 4d0da92a5eb8da86c08e3ec34bd23ab439fb2461
+  RNAudioAPI: f93e51adeee0911c8c6629a56f6df35edc60c084
+  RNDeviceInfo: feea80a690d2bde1fe51461cf548039258bd03f2
+  RNGestureHandler: ccf4105b125002bd88e39d2a1f2b7e6001bcdf34
+  RNLiveAudioStream: 02584d52711b6b9f268cb371a4b1bdd76ab3e079
+  RNReanimated: c567de23384730756bb19ff55490819980536b09
+  RNScreens: c2e3cc506212228c607b4785b315205e28acbf0f
+  RNSVG: ee32efbed652c5151fd3f98bed13c68af285bc38
   SocketRocket: d4aabe649be1e368d1318fdf28a022d714d65748
-  sqlite3: 73513155ec6979715d3904ef53a8d68892d4032b
-  Yoga: c758bfb934100bb4bf9cbaccb52557cee35e8bdf
+  sqlite3: 1d85290c3321153511f6e900ede7a1608718bbd5
+  Yoga: 9f110fc4b7aa538663cba3c14cbb1c335f43c13f
 
 PODFILE CHECKSUM: bba19a069e673f2259009e9d2caab44374fdebcf
 
diff --git a/apps/llm/ios/llm.xcodeproj/project.pbxproj b/apps/llm/ios/llm.xcodeproj/project.pbxproj
index 4b52e71d52..c86af0f8ab 100644
--- a/apps/llm/ios/llm.xcodeproj/project.pbxproj
+++ b/apps/llm/ios/llm.xcodeproj/project.pbxproj
@@ -26,14 +26,14 @@
 		63C842393C3838DA2ECEFC7C /* Pods-llm.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-llm.release.xcconfig"; path = "Target Support Files/Pods-llm/Pods-llm.release.xcconfig"; sourceTree = "<group>"; };
 		8CD8BF58A368F789F1E7DF50 /* ExpoModulesProvider.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; name = ExpoModulesProvider.swift; path = "Pods/Target Support Files/Pods-llm/ExpoModulesProvider.swift"; sourceTree = "<group>"; };
 		AA286B85B6C04FC6940260E9 /* SplashScreen.storyboard */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.storyboard; name = SplashScreen.storyboard; path = llm/SplashScreen.storyboard; sourceTree = "<group>"; };
-		B79E360E00239D910BF9B38D /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xml; name = PrivacyInfo.xcprivacy; path = llm/PrivacyInfo.xcprivacy; sourceTree = "<group>"; };
+		B79E360E00239D910BF9B38D /* PrivacyInfo.xcprivacy */ = {isa = PBXFileReference; includeInIndex = 1; name = PrivacyInfo.xcprivacy; path = llm/PrivacyInfo.xcprivacy; sourceTree = "<group>"; };
 		BB2F792C24A3F905000567C9 /* Expo.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Expo.plist; sourceTree = "<group>"; };
-		E8C01EF33FCE4105BBBC9DF6 /* Aeonik-Medium.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = 9; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Medium.otf"; path = "../assets/fonts/Aeonik-Medium.otf"; sourceTree = "<group>"; };
+		E8C01EF33FCE4105BBBC9DF6 /* Aeonik-Medium.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = undefined; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Medium.otf"; path = "../assets/fonts/Aeonik-Medium.otf"; sourceTree = "<group>"; };
 		ED297162215061F000B7C4FE /* JavaScriptCore.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = JavaScriptCore.framework; path = System/Library/Frameworks/JavaScriptCore.framework; sourceTree = SDKROOT; };
 		F11748412D0307B40044C1D9 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = AppDelegate.swift; path = llm/AppDelegate.swift; sourceTree = "<group>"; };
 		F11748442D0722820044C1D9 /* llm-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "llm-Bridging-Header.h"; path = "llm/llm-Bridging-Header.h"; sourceTree = "<group>"; };
 		F5CE0775ADE5923FA417B603 /* libPods-llm.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libPods-llm.a"; sourceTree = BUILT_PRODUCTS_DIR; };
-		F866B7979FB94C8797EE2E3D /* Aeonik-Regular.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = 9; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Regular.otf"; path = "../assets/fonts/Aeonik-Regular.otf"; sourceTree = "<group>"; };
+		F866B7979FB94C8797EE2E3D /* Aeonik-Regular.otf */ = {isa = PBXFileReference; explicitFileType = undefined; fileEncoding = undefined; includeInIndex = 0; lastKnownFileType = unknown; name = "Aeonik-Regular.otf"; path = "../assets/fonts/Aeonik-Regular.otf"; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -93,6 +93,7 @@
 				4F489A14802F01369BFDDEFD /* Pods-llm.debug.xcconfig */,
 				63C842393C3838DA2ECEFC7C /* Pods-llm.release.xcconfig */,
 			);
+			name = Pods;
 			path = Pods;
 			sourceTree = "<group>";
 		};
@@ -134,6 +135,7 @@
 				E8C01EF33FCE4105BBBC9DF6 /* Aeonik-Medium.otf */,
 			);
 			name = Resources;
+			path = "";
 			sourceTree = "<group>";
 		};
 		BB2F792B24A3F905000567C9 /* Supporting */ = {
@@ -353,7 +355,6 @@
 				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_ENTITLEMENTS = llm/llm.entitlements;
 				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = "";
 				ENABLE_BITCODE = NO;
 				GCC_PREPROCESSOR_DEFINITIONS = (
 					"$(inherited)",
@@ -390,7 +391,6 @@
 				CLANG_ENABLE_MODULES = YES;
 				CODE_SIGN_ENTITLEMENTS = llm/llm.entitlements;
 				CURRENT_PROJECT_VERSION = 1;
-				DEVELOPMENT_TEAM = "";
 				INFOPLIST_FILE = llm/Info.plist;
 				IPHONEOS_DEPLOYMENT_TARGET = 15.1;
 				LD_RUNPATH_SEARCH_PATHS = (
@@ -468,7 +468,10 @@
 				LIBRARY_SEARCH_PATHS = "$(SDKROOT)/usr/lib/swift\"$(inherited)\"";
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
-				OTHER_LDFLAGS = "$(inherited)  ";
+				OTHER_LDFLAGS = (
+					"$(inherited)",
+					" ",
+				);
 				REACT_NATIVE_PATH = "${PODS_ROOT}/../../../../node_modules/react-native";
 				SDKROOT = iphoneos;
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "$(inherited) DEBUG";
@@ -523,7 +526,10 @@
 				);
 				LIBRARY_SEARCH_PATHS = "$(SDKROOT)/usr/lib/swift\"$(inherited)\"";
 				MTL_ENABLE_DEBUG_INFO = NO;
-				OTHER_LDFLAGS = "$(inherited)  ";
+				OTHER_LDFLAGS = (
+					"$(inherited)",
+					" ",
+				);
 				REACT_NATIVE_PATH = "${PODS_ROOT}/../../../../node_modules/react-native";
 				SDKROOT = iphoneos;
 				USE_HERMES = true;
diff --git a/apps/llm/ios/llm/llm.entitlements b/apps/llm/ios/llm/llm.entitlements
index 0c67376eba..8f5046f7d4 100644
--- a/apps/llm/ios/llm/llm.entitlements
+++ b/apps/llm/ios/llm/llm.entitlements
@@ -1,5 +1,10 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
-<dict/>
+<dict>
+	<key>com.apple.developer.kernel.increased-debugging-memory-limit</key>
+	<true/>
+	<key>com.apple.developer.kernel.increased-memory-limit</key>
+	<true/>
+</dict>
 </plist>
diff --git a/packages/react-native-executorch/android/CMakeLists.txt b/packages/react-native-executorch/android/CMakeLists.txt
index 96164c49c6..d35311d7a4 100644
--- a/packages/react-native-executorch/android/CMakeLists.txt
+++ b/packages/react-native-executorch/android/CMakeLists.txt
@@ -14,4 +14,28 @@ set(COMMON_CPP_DIR "${CMAKE_SOURCE_DIR}/../common")
 set(LIBS_DIR "${CMAKE_SOURCE_DIR}/../third-party/android/libs")
 set(INCLUDE_DIR "${CMAKE_SOURCE_DIR}/../third-party/include")
 
+# FIXME: Below u can see miserable attempts of trying to link tokenizers-cpp 
+# directly into react-native-executorch instead of it being linked against ExecuTorch
+# and then transitively to our library. Please go back to this when we bump ET runtime to the next version.  
+# The problem with directly linking tokenizers-cpp using a submodule is that we get unresolved symbols for 
+# some android logging libraries, which are referenced by sentencepiece.
+
+# set(TOKENIZERS_CPP_DIR "${CMAKE_SOURCE_DIR}/../../../third-party/tokenizers-cpp")
+# add_subdirectory("${TOKENIZERS_CPP_DIR}" tokenizers-cpp)
+
+# # Link Android log library to sentencepiece targets
+# if(TARGET sentencepiece-static)
+#   target_link_libraries(sentencepiece-static INTERFACE log)
+# endif()
+# if(TARGET sentencepiece_train-static)
+#   target_link_libraries(sentencepiece_train-static INTERFACE log)
+# endif()
+
+# # Link log library to sentencepiece executables
+# foreach(exe spm_encode spm_decode spm_normalize spm_train spm_export_vocab)
+#   if(TARGET ${exe})
+#     target_link_libraries(${exe} log)
+#   endif()
+# endforeach()
+
 add_subdirectory("${ANDROID_CPP_DIR}")
\ No newline at end of file
diff --git a/packages/react-native-executorch/android/build.gradle b/packages/react-native-executorch/android/build.gradle
index 0e54c69ec0..10fd2323f1 100644
--- a/packages/react-native-executorch/android/build.gradle
+++ b/packages/react-native-executorch/android/build.gradle
@@ -168,5 +168,6 @@ dependencies {
   implementation 'com.facebook.fbjni:fbjni:0.6.0'
   implementation "org.jetbrains.kotlin:kotlin-stdlib:$kotlin_version"
   implementation files('libs/classes.jar')
+  implementation 'org.opencv:opencv:4.10.0'
   implementation("com.squareup.okhttp3:okhttp:4.9.2")
 }
diff --git a/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt b/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt
index 11b30acdc6..bf1544aeb4 100644
--- a/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt
+++ b/packages/react-native-executorch/android/src/main/cpp/CMakeLists.txt
@@ -33,6 +33,7 @@ set(RN_VERSION_LINK_LIBRARIES
 )
 
 # Dependencies:
+
 # ------- Executorch -------
 
 add_library(executorch SHARED IMPORTED)
@@ -40,6 +41,29 @@ add_library(executorch SHARED IMPORTED)
 set_target_properties(executorch PROPERTIES
                       IMPORTED_LOCATION "${LIBS_DIR}/executorch/${ANDROID_ABI}/libexecutorch.so")
 
+                      
+if(ANDROID_ABI STREQUAL "arm64-v8a")
+  target_compile_definitions(react-native-executorch PRIVATE ARCH_ARM64)
+
+  # ------- pthreadpool -------
+  add_library(pthreadpool SHARED IMPORTED)
+  
+  set_target_properties(pthreadpool PROPERTIES
+                        IMPORTED_LOCATION "${LIBS_DIR}/pthreadpool/${ANDROID_ABI}/libpthreadpool.so"
+                        INTERFACE_INCLUDE_DIRECTORIES "${LIBS_DIR}/../../include/pthreadpool/")
+  
+  # ------- cpuinfo -------
+  add_library(cpuinfo SHARED IMPORTED)
+  
+  set_target_properties(cpuinfo PROPERTIES
+                        IMPORTED_LOCATION "${LIBS_DIR}/cpuinfo/${ANDROID_ABI}/libcpuinfo.so"
+                        INTERFACE_INCLUDE_DIRECTORIES "${LIBS_DIR}/../../include/cpuinfo/")
+  set(EXECUTORCH_LIBS
+    "pthreadpool"
+    "cpuinfo"
+  )
+endif()
+
 # ------- OpenCV -------
 
 set(OPENCV_LIBS 
@@ -70,4 +94,5 @@ target_link_libraries(
   ${OPENCV_LIBS}
   ${OPENCV_THIRD_PARTY_LIBS}
   executorch
+  ${EXECUTORCH_LIBS}
 )
\ No newline at end of file
diff --git a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt b/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt
deleted file mode 100644
index 04205ddcca..0000000000
--- a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/LLM.kt
+++ /dev/null
@@ -1,63 +0,0 @@
-package com.swmansion.rnexecutorch
-
-import android.util.Log
-import com.facebook.react.bridge.Promise
-import com.facebook.react.bridge.ReactApplicationContext
-import org.pytorch.executorch.extension.llm.LlmCallback
-import org.pytorch.executorch.extension.llm.LlmModule
-
-class LLM(
-  reactContext: ReactApplicationContext,
-) : NativeLLMSpec(reactContext),
-  LlmCallback {
-  private var llmModule: LlmModule? = null
-
-  override fun getName(): String = NAME
-
-  override fun initialize() {
-    super.initialize()
-  }
-
-  override fun onResult(result: String) {
-    emitOnToken(result)
-  }
-
-  override fun onStats(tps: Float) {
-    Log.d("rn_executorch", "TPS: $tps")
-  }
-
-  override fun loadLLM(
-    modelSource: String,
-    tokenizerSource: String,
-    promise: Promise,
-  ) {
-    try {
-      llmModule = LlmModule(modelSource, tokenizerSource, 0.7f)
-      promise.resolve("Model loaded successfully")
-    } catch (e: Exception) {
-      promise.reject("Model loading failed", e.message)
-    }
-  }
-
-  override fun forward(
-    input: String,
-    promise: Promise,
-  ) {
-    Thread {
-      llmModule!!.generate(input, this)
-      promise.resolve("Inference completed successfully")
-    }.start()
-  }
-
-  override fun interrupt() {
-    llmModule!!.stop()
-  }
-
-  override fun releaseResources() {
-    llmModule = null
-  }
-
-  companion object {
-    const val NAME = "LLM"
-  }
-}
diff --git a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt b/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt
index 98a1fa1d38..0b15e216a5 100644
--- a/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt
+++ b/packages/react-native-executorch/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt
@@ -14,9 +14,7 @@ class RnExecutorchPackage : TurboReactPackage() {
     name: String,
     reactContext: ReactApplicationContext,
   ): NativeModule? =
-    if (name == LLM.NAME) {
-      LLM(reactContext)
-    } else if (name == ETInstaller.NAME) {
+    if (name == ETInstaller.NAME) {
       ETInstaller(reactContext)
     } else {
       null
@@ -25,16 +23,6 @@ class RnExecutorchPackage : TurboReactPackage() {
   override fun getReactModuleInfoProvider(): ReactModuleInfoProvider =
     ReactModuleInfoProvider {
       val moduleInfos: MutableMap<String, ReactModuleInfo> = HashMap()
-      moduleInfos[LLM.NAME] =
-        ReactModuleInfo(
-          LLM.NAME,
-          LLM.NAME,
-          false, // canOverrideExistingModule
-          false, // needsEagerInit
-          true, // hasConstants
-          false, // isCxxModule
-          true,
-        )
       moduleInfos[ETInstaller.NAME] =
         ReactModuleInfo(
           ETInstaller.NAME,
diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp
index 31b4691cc8..0ac90972df 100644
--- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.cpp
@@ -6,12 +6,20 @@
 #include <rnexecutorch/models/embeddings/image/ImageEmbeddings.h>
 #include <rnexecutorch/models/embeddings/text/TextEmbeddings.h>
 #include <rnexecutorch/models/image_segmentation/ImageSegmentation.h>
+#include <rnexecutorch/models/llm/LLM.h>
 #include <rnexecutorch/models/object_detection/ObjectDetection.h>
 #include <rnexecutorch/models/ocr/OCR.h>
 #include <rnexecutorch/models/speech_to_text/SpeechToText.h>
 #include <rnexecutorch/models/style_transfer/StyleTransfer.h>
 #include <rnexecutorch/models/vertical_ocr/VerticalOCR.h>
 
+#if defined(__ANDROID__) && defined(__aarch64__)
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <executorch/extension/threadpool/threadpool.h>
+#include <format>
+#include <rnexecutorch/Log.h>
+#endif
+
 namespace rnexecutorch {
 
 // This function fetches data from a url address. It is implemented in
@@ -58,14 +66,15 @@ void RnExecutorchInstaller::injectJSIBindings(
       *jsiRuntime, "loadImageEmbeddings",
       RnExecutorchInstaller::loadModel<ImageEmbeddings>(
           jsiRuntime, jsCallInvoker, "loadImageEmbeddings"));
+
   jsiRuntime->global().setProperty(
       *jsiRuntime, "loadTextEmbeddings",
       RnExecutorchInstaller::loadModel<TextEmbeddings>(
           jsiRuntime, jsCallInvoker, "loadTextEmbeddings"));
-  jsiRuntime->global().setProperty(
-      *jsiRuntime, "loadSpeechToText",
-      RnExecutorchInstaller::loadModel<SpeechToText>(jsiRuntime, jsCallInvoker,
-                                                     "loadSpeechToText"));
+
+  jsiRuntime->global().setProperty(*jsiRuntime, "loadLLM",
+                                   RnExecutorchInstaller::loadModel<LLM>(
+                                       jsiRuntime, jsCallInvoker, "loadLLM"));
 
   jsiRuntime->global().setProperty(*jsiRuntime, "loadOCR",
                                    RnExecutorchInstaller::loadModel<OCR>(
@@ -74,5 +83,29 @@ void RnExecutorchInstaller::injectJSIBindings(
       *jsiRuntime, "loadVerticalOCR",
       RnExecutorchInstaller::loadModel<VerticalOCR>(jsiRuntime, jsCallInvoker,
                                                     "loadVerticalOCR"));
+
+  jsiRuntime->global().setProperty(
+      *jsiRuntime, "loadSpeechToText",
+      RnExecutorchInstaller::loadModel<SpeechToText>(jsiRuntime, jsCallInvoker,
+                                                     "loadSpeechToText"));
+
+#if defined(__ANDROID__) && defined(__aarch64__)
+  auto num_of_perf_cores =
+      ::executorch::extension::cpuinfo::get_num_performant_cores();
+  log(LOG_LEVEL::Info,
+      std::format("Detected {} performant cores", num_of_perf_cores));
+  // setting num_of_cores to floor(num_of_perf_cores / 2) + 1) because depending
+  // on cpu arch as when possible we want to leave at least 2 performant cores
+  // for other tasks (setting more actually results in drop of performance). For
+  // older devices (i.e. samsung s22) resolves to 3 cores, and for newer ones
+  // (like OnePlus 12) resolves to 4, which when benchamrked gives highest
+  // throughput.
+  auto num_of_cores = static_cast<uint32_t>(num_of_perf_cores / 2) + 1;
+  ::executorch::extension::threadpool::get_threadpool()
+      ->_unsafe_reset_threadpool(num_of_cores);
+  log(LOG_LEVEL::Info,
+      std::format("Configuring xnnpack for {} threads", num_of_cores));
+#endif
 }
+
 } // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
index a4b923003f..cd24787e3f 100644
--- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
+++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
@@ -34,6 +34,8 @@ REGISTER_CONSTRUCTOR(ImageEmbeddings, std::string,
                      std::shared_ptr<react::CallInvoker>);
 REGISTER_CONSTRUCTOR(TextEmbeddings, std::string, std::string,
                      std::shared_ptr<react::CallInvoker>);
+REGISTER_CONSTRUCTOR(LLM, std::string, std::string,
+                     std::shared_ptr<react::CallInvoker>);
 REGISTER_CONSTRUCTOR(SpeechToText, std::string, std::string, std::string,
                      std::shared_ptr<react::CallInvoker>);
 REGISTER_CONSTRUCTOR(OCR, std::string, std::string, std::string, std::string,
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index 029ccef47d..4fd3da5e69 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -45,6 +45,14 @@ inline std::string getValue<std::string>(const jsi::Value &val,
   return val.getString(runtime).utf8(runtime);
 }
 
+template <>
+inline std::shared_ptr<jsi::Function>
+getValue<std::shared_ptr<jsi::Function>>(const jsi::Value &val,
+                                         jsi::Runtime &runtime) {
+  return std::make_shared<jsi::Function>(
+      val.asObject(runtime).asFunction(runtime));
+}
+
 template <>
 inline std::vector<int32_t>
 getValue<std::vector<int32_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index 15d8698cd3..eb4e426149 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -1,6 +1,11 @@
 #pragma once
 
 #include <ReactCommon/CallInvoker.h>
+#include <string>
+#include <thread>
+#include <tuple>
+#include <type_traits>
+
 #include <memory.h>
 #include <rnexecutorch/TokenizerModule.h>
 #include <rnexecutorch/host_objects/JSTensorViewOut.h>
@@ -10,12 +15,9 @@
 #include <rnexecutorch/metaprogramming/FunctionHelpers.h>
 #include <rnexecutorch/metaprogramming/TypeConcepts.h>
 #include <rnexecutorch/models/BaseModel.h>
+#include <rnexecutorch/models/llm/LLM.h>
 #include <rnexecutorch/models/ocr/OCR.h>
 #include <rnexecutorch/models/vertical_ocr/VerticalOCR.h>
-#include <string>
-#include <thread>
-#include <tuple>
-#include <type_traits>
 
 namespace rnexecutorch {
 
@@ -77,6 +79,20 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        promiseHostFunction<&Model::tokenToId>,
                                        "tokenToId"));
     }
+
+    if constexpr (meta::SameAs<Model, LLM>) {
+      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                                       promiseHostFunction<&Model::generate>,
+                                       "generate"));
+
+      addFunctions(JSI_EXPORT_FUNCTION(
+          ModelHostObject<Model>, synchronousHostFunction<&Model::interrupt>,
+          "interrupt"));
+
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>, unload, "unload"));
+    }
+
     if constexpr (meta::SameAs<Model, OCR>) {
       addFunctions(
           JSI_EXPORT_FUNCTION(ModelHostObject<Model>, unload, "unload"));
@@ -129,6 +145,9 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
     }
   }
 
+  // A generic host function that resolves a promise with a result of a
+  // function. JSI arguments are converted to the types provided in the function
+  // signature, and the return value is converted back to JSI before resolving.
   template <auto FnPtr> JSI_HOST_FUNCTION(promiseHostFunction) {
     auto promise = Promise::createPromise(
         runtime, callInvoker,
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
new file mode 100644
index 0000000000..7767719e1f
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
@@ -0,0 +1,58 @@
+#include "LLM.h"
+
+#include <executorch/extension/tensor/tensor.h>
+#include <filesystem>
+
+namespace rnexecutorch {
+using namespace facebook;
+using executorch::extension::TensorPtr;
+using executorch::runtime::Error;
+
+LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
+         std::shared_ptr<react::CallInvoker> callInvoker)
+    : runner(std::make_unique<example::Runner>(modelSource, tokenizerSource)),
+      callInvoker(callInvoker) {
+
+  auto loadResult = runner->load();
+  if (loadResult != Error::Ok) {
+    throw std::runtime_error("Failed to load LLM runner, error code: " +
+                             std::to_string(static_cast<int>(loadResult)));
+  }
+  memorySizeLowerBound =
+      std::filesystem::file_size(std::filesystem::path(modelSource)) +
+      std::filesystem::file_size(std::filesystem::path(tokenizerSource));
+}
+
+void LLM::generate(std::string input, std::shared_ptr<jsi::Function> callback) {
+  if (!runner || !runner->is_loaded()) {
+    throw std::runtime_error("Runner is not loaded");
+  }
+
+  // Create a native callback that will invoke the JS callback on the JS thread
+  auto nativeCallback = [this, callback](const std::string &token) {
+    callInvoker->invokeAsync([callback, token](jsi::Runtime &runtime) {
+      callback->call(runtime, jsi::String::createFromUtf8(runtime, token));
+    });
+  };
+
+  auto error = runner->generate(input, nativeCallback, {}, false);
+  if (error != executorch::runtime::Error::Ok) {
+    throw std::runtime_error("Failed to generate text, error code: " +
+                             std::to_string(static_cast<int>(error)));
+  }
+}
+
+void LLM::interrupt() {
+  if (!runner || !runner->is_loaded()) {
+    throw std::runtime_error("Can't interrupt a model that's not loaded!");
+  }
+  runner->stop();
+}
+
+std::size_t LLM::getMemoryLowerBound() const noexcept {
+  return memorySizeLowerBound;
+}
+
+void LLM::unload() noexcept { runner.reset(nullptr); }
+
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h
new file mode 100644
index 0000000000..f946409d95
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include <ReactCommon/CallInvoker.h>
+#include <jsi/jsi.h>
+#include <runner/runner.h>
+
+namespace rnexecutorch {
+using namespace facebook;
+
+class LLM {
+public:
+  explicit LLM(const std::string &modelSource,
+               const std::string &tokenizerSource,
+               std::shared_ptr<react::CallInvoker> callInvoker);
+
+  void generate(std::string input, std::shared_ptr<jsi::Function> callback);
+  void interrupt();
+  void unload() noexcept;
+  std::size_t getMemoryLowerBound() const noexcept;
+
+private:
+  size_t memorySizeLowerBound;
+  std::unique_ptr<example::Runner> runner;
+  std::shared_ptr<react::CallInvoker> callInvoker;
+};
+} // namespace rnexecutorch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h b/packages/react-native-executorch/common/runner/irunner.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/irunner.h
rename to packages/react-native-executorch/common/runner/irunner.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp b/packages/react-native-executorch/common/runner/runner.cpp
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.cpp
rename to packages/react-native-executorch/common/runner/runner.cpp
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h b/packages/react-native-executorch/common/runner/runner.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/runner.h
rename to packages/react-native-executorch/common/runner/runner.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp b/packages/react-native-executorch/common/runner/sampler.cpp
similarity index 97%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp
rename to packages/react-native-executorch/common/runner/sampler.cpp
index 7ba8152889..e156b9f70e 100644
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.cpp
+++ b/packages/react-native-executorch/common/runner/sampler.cpp
@@ -184,9 +184,10 @@ template <typename T> int32_t Sampler::sample(T *logits) {
 }
 
 template int32_t Sampler::sample<float>(float *logits);
-template int32_t Sampler::sample<exec_aten::Half>(exec_aten::Half *logits);
 template int32_t
-Sampler::sample<exec_aten::BFloat16>(exec_aten::BFloat16 *logits);
+Sampler::sample<executorch::aten::Half>(executorch::aten::Half *logits);
+template int32_t
+Sampler::sample<executorch::aten::BFloat16>(executorch::aten::BFloat16 *logits);
 
 } // namespace llm
 } // namespace extension
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h b/packages/react-native-executorch/common/runner/sampler.h
similarity index 91%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h
rename to packages/react-native-executorch/common/runner/sampler.h
index 0b29ca9fcb..03d3d09a01 100644
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/sampler/sampler.h
+++ b/packages/react-native-executorch/common/runner/sampler.h
@@ -19,18 +19,19 @@
 #endif
 
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/platform/compiler.h>
 
 namespace executorch {
 namespace extension {
 namespace llm {
 // A simple llama2 sampler.
 
-template <typename T> struct ProbIndex {
+template <typename T> struct ET_EXPERIMENTAL ProbIndex {
   T prob;
   int32_t index;
 }; // struct used when sorting probabilities during top-p sampling
 
-class Sampler {
+class ET_EXPERIMENTAL Sampler {
 public:
   Sampler(int32_t vocab_size, float temperature, float topp,
           unsigned long long rng_seed);
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h b/packages/react-native-executorch/common/runner/stats.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/stats.h
rename to packages/react-native-executorch/common/runner/stats.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp b/packages/react-native-executorch/common/runner/text_decoder_runner.cpp
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.cpp
rename to packages/react-native-executorch/common/runner/text_decoder_runner.cpp
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h b/packages/react-native-executorch/common/runner/text_decoder_runner.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_decoder_runner.h
rename to packages/react-native-executorch/common/runner/text_decoder_runner.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp b/packages/react-native-executorch/common/runner/text_prefiller.cpp
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.cpp
rename to packages/react-native-executorch/common/runner/text_prefiller.cpp
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h b/packages/react-native-executorch/common/runner/text_prefiller.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_prefiller.h
rename to packages/react-native-executorch/common/runner/text_prefiller.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h b/packages/react-native-executorch/common/runner/text_token_generator.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/text_token_generator.h
rename to packages/react-native-executorch/common/runner/text_token_generator.h
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h b/packages/react-native-executorch/common/runner/util.h
similarity index 100%
rename from packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/runner/util.h
rename to packages/react-native-executorch/common/runner/util.h
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib
index cc7da01f7d..3acc9408d9 100755
Binary files a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib and b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib differ
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h
deleted file mode 100644
index 4332cf811d..0000000000
--- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#import <Foundation/Foundation.h>
-
-@interface HuggingFaceTokenizer : NSObject
-
-- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath;
-- (NSArray<NSNumber *> *)encode:(NSString *)text;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
-    skipSpecialTokens:(BOOL)skipSpecialTokens;
-- (NSUInteger)getVocabSize;
-- (NSString *)idToToken:(NSInteger)tokenId;
-- (NSInteger)tokenToId:(NSString *)token;
-
-@end
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h
deleted file mode 100644
index d8638cfa6a..0000000000
--- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Headers/LLaMARunner.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#import <UIKit/UIKit.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain;
-
-NS_SWIFT_NAME(Runner)
-@interface LLaMARunner : NSObject
-
-- (instancetype)initWithModelPath:(NSString *)filePath
-                    tokenizerPath:(NSString *)tokenizerPath;
-- (BOOL)isLoaded;
-- (BOOL)loadWithError:(NSError **)error;
-- (BOOL)generate:(NSString *)prompt
-    withTokenCallback:(nullable void (^)(NSString *))callback
-                error:(NSError **)error;
-- (void)stop;
-
-+ (instancetype)new NS_UNAVAILABLE;
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib
index 71f3ae6809..43df0d606e 100755
Binary files a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib and b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib differ
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h
deleted file mode 100644
index 4332cf811d..0000000000
--- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/HuggingFaceTokenizer.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#import <Foundation/Foundation.h>
-
-@interface HuggingFaceTokenizer : NSObject
-
-- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath;
-- (NSArray<NSNumber *> *)encode:(NSString *)text;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
-    skipSpecialTokens:(BOOL)skipSpecialTokens;
-- (NSUInteger)getVocabSize;
-- (NSString *)idToToken:(NSInteger)tokenId;
-- (NSInteger)tokenToId:(NSString *)token;
-
-@end
diff --git a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h b/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h
deleted file mode 100644
index d8638cfa6a..0000000000
--- a/packages/react-native-executorch/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Headers/LLaMARunner.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#import <UIKit/UIKit.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain;
-
-NS_SWIFT_NAME(Runner)
-@interface LLaMARunner : NSObject
-
-- (instancetype)initWithModelPath:(NSString *)filePath
-                    tokenizerPath:(NSString *)tokenizerPath;
-- (BOOL)isLoaded;
-- (BOOL)loadWithError:(NSError **)error;
-- (BOOL)generate:(NSString *)prompt
-    withTokenCallback:(nullable void (^)(NSString *))callback
-                error:(NSError **)error;
-- (void)stop;
-
-+ (instancetype)new NS_UNAVAILABLE;
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/packages/react-native-executorch/ios/RnExecutorch/LLM.h b/packages/react-native-executorch/ios/RnExecutorch/LLM.h
deleted file mode 100644
index 5047919a48..0000000000
--- a/packages/react-native-executorch/ios/RnExecutorch/LLM.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#import <RnExecutorchSpec/RnExecutorchSpec.h>
-
-@interface LLM : NativeLLMSpecBase <NativeLLMSpec>
-
-@end
diff --git a/packages/react-native-executorch/ios/RnExecutorch/LLM.mm b/packages/react-native-executorch/ios/RnExecutorch/LLM.mm
deleted file mode 100644
index 33971f755c..0000000000
--- a/packages/react-native-executorch/ios/RnExecutorch/LLM.mm
+++ /dev/null
@@ -1,78 +0,0 @@
-#import "LLM.h"
-#import <ExecutorchLib/LLaMARunner.h>
-
-@implementation LLM {
-  LLaMARunner *runner;
-}
-
-- (instancetype)init {
-  self = [super init];
-
-  return self;
-}
-
-RCT_EXPORT_MODULE()
-
-- (void)onResult:(NSString *)token prompt:(NSString *)prompt {
-  if ([token isEqualToString:prompt]) {
-    return;
-  }
-
-  dispatch_async(dispatch_get_main_queue(), ^{
-    [self emitOnToken:token];
-  });
-}
-
-- (void)loadLLM:(NSString *)modelSource
-    tokenizerSource:(NSString *)tokenizerSource
-            resolve:(RCTPromiseResolveBlock)resolve
-             reject:(RCTPromiseRejectBlock)reject {
-  @try {
-    self->runner = [[LLaMARunner alloc] initWithModelPath:modelSource
-                                            tokenizerPath:tokenizerSource];
-
-    resolve(@"Model and tokenizer loaded successfully");
-    return;
-  } @catch (NSException *exception) {
-    [self releaseResources];
-    reject(@"Model or tokenizer loading failed", exception.reason, nil);
-    return;
-  }
-}
-
-- (void)forward:(NSString *)input
-        resolve:(RCTPromiseResolveBlock)resolve
-         reject:(RCTPromiseRejectBlock)reject {
-
-  dispatch_async(
-      dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
-        NSError *error = nil;
-        [self->runner generate:input
-             withTokenCallback:^(NSString *token) {
-               [self onResult:token prompt:input];
-             }
-                         error:&error];
-
-        if (error) {
-          reject(@"error_in_generation", error.localizedDescription, nil);
-          return;
-        }
-        resolve(@"Inference completed successfully");
-        return;
-      });
-}
-
-- (void)interrupt {
-  [self->runner stop];
-}
-
-- (void)releaseResources {
-  self->runner = nil;
-}
-
-- (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
-    (const facebook::react::ObjCTurboModule::InitParams &)params {
-  return std::make_shared<facebook::react::NativeLLMSpecJSI>(params);
-}
-
-@end
diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_coreml-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_coreml-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml_ios.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_coreml_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_coreml_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_coreml_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_coreml_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_mps-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_mps-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps_ios.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_mps_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_mps_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_mps_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_mps_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_mps_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack_ios.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libbackend_xnnpack_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libbackend_xnnpack_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libbackend_xnnpack_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libexecutorch-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libexecutorch-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libexecutorch-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libexecutorch-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libexecutorch_ios.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libexecutorch_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libexecutorch_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libexecutorch_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libexecutorch_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libexecutorch_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_custom-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_custom-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_custom_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_custom_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_custom_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_custom_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_custom_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_optimized-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_optimized-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_optimized_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_optimized_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_optimized_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_optimized_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_portable-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_portable-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_portable_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_portable_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_portable_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_portable_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_portable_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized-ios-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-ios-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_quantized-ios-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-ios-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized-simulator-release.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-simulator-release.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_quantized-simulator-release.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized-simulator-release.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized_ios.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_ios.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_quantized_ios.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_ios.a
diff --git a/packages/react-native-executorch/ios/libs/libkernels_quantized_simulator.a b/packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_simulator.a
similarity index 100%
rename from packages/react-native-executorch/ios/libs/libkernels_quantized_simulator.a
rename to packages/react-native-executorch/ios/libs/executorch/libkernels_quantized_simulator.a
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a
new file mode 100644
index 0000000000..69cc738f00
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a differ
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a
new file mode 100644
index 0000000000..86280b1c5c
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a differ
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a
new file mode 100644
index 0000000000..6c99b48d6f
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a differ
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a
new file mode 100644
index 0000000000..f3aa9203d9
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a differ
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a
new file mode 100644
index 0000000000..a24e87cedf
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a differ
diff --git a/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a
new file mode 100644
index 0000000000..338db91c8b
Binary files /dev/null and b/packages/react-native-executorch/ios/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a differ
diff --git a/packages/react-native-executorch/package.json b/packages/react-native-executorch/package.json
index f47f62f51d..fc83fd0df4 100644
--- a/packages/react-native-executorch/package.json
+++ b/packages/react-native-executorch/package.json
@@ -12,7 +12,6 @@
     "lib",
     "android",
     "ios",
-    "!ios/libs",
     "cpp",
     "common",
     "*.podspec",
diff --git a/packages/react-native-executorch/react-native-executorch.podspec b/packages/react-native-executorch/react-native-executorch.podspec
index 67b03a8552..400b0883f0 100644
--- a/packages/react-native-executorch/react-native-executorch.podspec
+++ b/packages/react-native-executorch/react-native-executorch.podspec
@@ -13,52 +13,56 @@ Pod::Spec.new do |s|
   s.platforms    = { :ios => min_ios_version_supported }
   s.source       = { :git => "https://github.com/software-mansion/react-native-executorch.git", :tag => "#{s.version}" }
 
-  et_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs', __dir__)
+  et_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs/executorch', __dir__)
+  tokenizers_binaries_path = File.expand_path('$(PODS_TARGET_SRCROOT)/ios/libs/tokenizers-cpp', __dir__)
 
   s.user_target_xcconfig = {
     "HEADER_SEARCH_PATHS" => "$(PODS_TARGET_SRCROOT)/third-party/include",
 
-    # FIXME: The code below links the static libraries built from ExecuTorch against out library.
-    # Please uncomment it once the ExecuTorchLib is no longer required.
-
-    # "OTHER_LDFLAGS[sdk=iphoneos*][arch=*]" => [
-    #  '$(inherited)',
-    #  '-framework "CoreML"',
-    #  '-framework "Accelerate"',
-    #  '-framework "Metal"',
-    #  '-framework "MetalPerformanceShaders"',
-    #  '-framework "MetalPerformanceShadersGraph"',
-    #  "-force_load \"#{et_binaries_path}\"/libbackend_coreml_ios.a",
-    #  "-force_load \"#{et_binaries_path}\"/libbackend_mps_ios.a",
-    #  "-force_load \"#{et_binaries_path}\"/libbackend_xnnpack_ios.a",
-    #  "-force_load \"#{et_binaries_path}\"/libexecutorch_ios.a",
-    #  "-force_load \"#{et_binaries_path}\"/libkernels_custom_ios.a",
-    #  "-force_load \"#{et_binaries_path}\"/libkernels_optimized_ios.a",
-    #  "-force_load \"#{et_binaries_path}\"/libkernels_quantized_ios.a"
-    # ].join(' '),
-
-    # "OTHER_LDFLAGS[sdk=iphonesimulator*][arch=*]" => [
-    #  '$(inherited)',
-    #  '-framework "CoreML"',
-    #  '-framework "Accelerate"',
-    #  '-framework "Metal"',
-    #  '-framework "MetalPerformanceShaders"',
-    #  '-framework "MetalPerformanceShadersGraph"',
-    #  "-force_load \"#{et_binaries_path}\"/libbackend_coreml_simulator.a",
-    #  "-force_load \"#{et_binaries_path}\"/libbackend_mps_simulator.a",
-    #  "-force_load \"#{et_binaries_path}\"/libbackend_xnnpack_simulator.a",
-    #  "-force_load \"#{et_binaries_path}\"/libexecutorch_simulator.a",
-    #  "-force_load \"#{et_binaries_path}\"/libkernels_custom_simulator.a",
-    #  "-force_load \"#{et_binaries_path}\"/libkernels_optimized_simulator.a",
-    #  "-force_load \"#{et_binaries_path}\"/libkernels_quantized_simulator.a"
-    # ].join(' '),
+    "OTHER_LDFLAGS[sdk=iphoneos*][arch=*]" => [
+      '$(inherited)', 
+      '-framework "CoreML"', 
+      '-framework "Accelerate"', 
+      '-framework "Metal"', 
+      '-framework "MetalPerformanceShaders"', 
+      '-framework "MetalPerformanceShadersGraph"', 
+      "-force_load \"#{et_binaries_path}\"/libbackend_coreml_ios.a", 
+      "-force_load \"#{et_binaries_path}\"/libbackend_mps_ios.a", 
+      "-force_load \"#{et_binaries_path}\"/libbackend_xnnpack_ios.a", 
+      "-force_load \"#{et_binaries_path}\"/libexecutorch_ios.a", 
+      "-force_load \"#{et_binaries_path}\"/libkernels_custom_ios.a", 
+      "-force_load \"#{et_binaries_path}\"/libkernels_optimized_ios.a", 
+      "-force_load \"#{et_binaries_path}\"/libkernels_quantized_ios.a",
+      "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_cpp.a\"",
+      "\"#{tokenizers_binaries_path}/physical-arm64-release/libsentencepiece.a\"",
+      "\"#{tokenizers_binaries_path}/physical-arm64-release/libtokenizers_c.a\""
+    ].join(' '),
+      
+    "OTHER_LDFLAGS[sdk=iphonesimulator*][arch=*]" => [
+      '$(inherited)', 
+      '-framework "CoreML"', 
+      '-framework "Accelerate"', 
+      '-framework "Metal"', 
+      '-framework "MetalPerformanceShaders"', 
+      '-framework "MetalPerformanceShadersGraph"', 
+      "-force_load \"#{et_binaries_path}\"/libbackend_coreml_simulator.a", 
+      "-force_load \"#{et_binaries_path}\"/libbackend_mps_simulator.a", 
+      "-force_load \"#{et_binaries_path}\"/libbackend_xnnpack_simulator.a", 
+      "-force_load \"#{et_binaries_path}\"/libexecutorch_simulator.a", 
+      "-force_load \"#{et_binaries_path}\"/libkernels_custom_simulator.a", 
+      "-force_load \"#{et_binaries_path}\"/libkernels_optimized_simulator.a", 
+      "-force_load \"#{et_binaries_path}\"/libkernels_quantized_simulator.a",
+      "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_cpp.a\"",
+      "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libsentencepiece.a\"",
+      "\"#{tokenizers_binaries_path}/simulator-arm64-debug/libtokenizers_c.a\""
+    ].join(' '),
 
     'EXCLUDED_ARCHS[sdk=iphonesimulator*]' => 'x86_64',
   }
 
   s.pod_target_xcconfig = {
     "USE_HEADERMAP" => "YES",
-    "HEADER_SEARCH_PATHS" =>
+    "HEADER_SEARCH_PATHS" => 
       '"$(PODS_TARGET_SRCROOT)/ios" '+
       '"$(PODS_TARGET_SRCROOT)/third-party/include" '+
       '"$(PODS_TARGET_SRCROOT)/common" ',
@@ -73,11 +77,11 @@ Pod::Spec.new do |s|
   ]
 
   # Exclude file with tests to not introduce gtest dependency.
-  # Do not include the headers from common/rnexecutorch/jsi/ as source files.
-  # Xcode/Cocoapods leaks them to other pods that an app also depends on, so if
-  # another pod includes a header with the same name without a path by
-  # #include "Header.h" we get a conflict. Here, headers in jsi/ collide with
-  # react-native-skia. The headers are preserved by preserve_paths and
+  # Do not include the headers from common/rnexecutorch/jsi/ as source files. 
+  # Xcode/Cocoapods leaks them to other pods that an app also depends on, so if 
+  # another pod includes a header with the same name without a path by 
+  # #include "Header.h" we get a conflict. Here, headers in jsi/ collide with 
+  # react-native-skia. The headers are preserved by preserve_paths and 
   # then made available by HEADER_SEARCH_PATHS.
   s.exclude_files = [
     "common/rnexecutorch/tests/*.{cpp}",
@@ -86,9 +90,9 @@ Pod::Spec.new do |s|
   s.header_mappings_dir = "common/rnexecutorch"
   s.header_dir = "rnexecutorch"
   s.preserve_paths = "common/rnexecutorch/jsi/*.{h,hpp}"
-
+  
   s.dependency "opencv-rne", "~> 4.11.0"
   s.dependency "sqlite3"
 
   install_modules_dependencies(s)
-end
+end
\ No newline at end of file
diff --git a/packages/react-native-executorch/src/controllers/LLMController.ts b/packages/react-native-executorch/src/controllers/LLMController.ts
index 5bc7852350..a1d2fb1036 100644
--- a/packages/react-native-executorch/src/controllers/LLMController.ts
+++ b/packages/react-native-executorch/src/controllers/LLMController.ts
@@ -1,4 +1,3 @@
-import { EventSubscription } from 'react-native';
 import { ResourceSource } from '../types/common';
 import { ResourceFetcher } from '../utils/ResourceFetcher';
 import { ETError, getError } from '../Error';
@@ -12,16 +11,15 @@ import {
   SPECIAL_TOKENS,
   ToolsConfig,
 } from '../types/llm';
-import { LLMNativeModule } from '../native/RnExecutorchModules';
 import { parseToolCall } from '../utils/llm';
 import { Logger } from '../common/Logger';
 
 export class LLMController {
-  private nativeModule: typeof LLMNativeModule;
+  private nativeModule: any;
   private chatConfig: ChatConfig = DEFAULT_CHAT_CONFIG;
   private toolsConfig: ToolsConfig | undefined;
   private tokenizerConfig: any;
-  private onToken: EventSubscription | null = null;
+  private onToken?: (token: string) => void;
   private _response = '';
   private _isReady = false;
   private _isGenerating = false;
@@ -71,7 +69,6 @@ export class LLMController {
       this._isGenerating = isGenerating;
       isGeneratingCallback?.(isGenerating);
     };
-    this.nativeModule = LLMNativeModule;
   }
 
   public get response() {
@@ -132,10 +129,9 @@ export class LLMController {
       this.tokenizerConfig = JSON.parse(
         await readAsStringAsync('file://' + tokenizerConfigPath!)
       );
-
-      await this.nativeModule.loadLLM(modelPath, tokenizerPath);
+      this.nativeModule = global.loadLLM(modelPath, tokenizerPath);
       this.isReadyCallback(true);
-      this.onToken = this.nativeModule.onToken((data: string) => {
+      this.onToken = (data: string) => {
         if (
           !data ||
           (SPECIAL_TOKENS.EOS_TOKEN in this.tokenizerConfig &&
@@ -148,7 +144,7 @@ export class LLMController {
 
         this.tokenCallback(data);
         this.responseCallback(this._response + data);
-      });
+      };
     } catch (e) {
       this.isReadyCallback(false);
       throw new Error(getError(e));
@@ -182,9 +178,8 @@ export class LLMController {
           'You cannot delete the model now. You need to interrupt first.'
       );
     }
-    this.onToken?.remove();
-    this.onToken = null;
-    this.nativeModule.releaseResources();
+    this.onToken = () => {};
+    this.nativeModule.unload();
     this.isReadyCallback(false);
     this.isGeneratingCallback(false);
   }
@@ -199,7 +194,7 @@ export class LLMController {
     try {
       this.responseCallback('');
       this.isGeneratingCallback(true);
-      await this.nativeModule.forward(input);
+      await this.nativeModule.generate(input, this.onToken);
     } catch (e) {
       throw new Error(getError(e));
     } finally {
diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts
index 4a3cbe8c1e..a90d8afb77 100644
--- a/packages/react-native-executorch/src/index.ts
+++ b/packages/react-native-executorch/src/index.ts
@@ -10,6 +10,7 @@ declare global {
   var loadTokenizerModule: (source: string) => any;
   var loadImageEmbeddings: (source: string) => any;
   var loadTextEmbeddings: (modelSource: string, tokenizerSource: string) => any;
+  var loadLLM: (modelSource: string, tokenizerSource: string) => any;
   var loadSpeechToText: (
     encoderSource: string,
     decoderSource: string,
@@ -40,10 +41,10 @@ if (
   global.loadTokenizerModule == null ||
   global.loadTextEmbeddings == null ||
   global.loadImageEmbeddings == null ||
+  global.loadLLM == null ||
   global.loadSpeechToText == null ||
   global.loadOCR == null ||
-  global.loadVerticalOCR == null ||
-  global.loadImageEmbeddings == null
+  global.loadVerticalOCR == null
 ) {
   if (!ETInstallerNativeModule) {
     throw new Error(
diff --git a/packages/react-native-executorch/src/native/NativeLLM.ts b/packages/react-native-executorch/src/native/NativeLLM.ts
deleted file mode 100644
index e89ba01f53..0000000000
--- a/packages/react-native-executorch/src/native/NativeLLM.ts
+++ /dev/null
@@ -1,14 +0,0 @@
-import type { TurboModule } from 'react-native';
-import { TurboModuleRegistry } from 'react-native';
-import type { EventEmitter } from 'react-native/Libraries/Types/CodegenTypes';
-
-export interface Spec extends TurboModule {
-  loadLLM(modelSource: string, tokenizerSource: string): Promise<string>;
-  forward(input: string): Promise<string>;
-  interrupt(): void;
-  releaseResources(): void;
-
-  readonly onToken: EventEmitter<string>;
-}
-
-export default TurboModuleRegistry.get<Spec>('LLM');
diff --git a/packages/react-native-executorch/src/native/RnExecutorchModules.ts b/packages/react-native-executorch/src/native/RnExecutorchModules.ts
index 92368785b7..3cf4a10bbb 100644
--- a/packages/react-native-executorch/src/native/RnExecutorchModules.ts
+++ b/packages/react-native-executorch/src/native/RnExecutorchModules.ts
@@ -1,5 +1,4 @@
 import { Platform } from 'react-native';
-import { Spec as LLMInterface } from './NativeLLM';
 import { Spec as ETInstallerInterface } from './NativeETInstaller';
 
 const LINKING_ERROR =
@@ -23,8 +22,5 @@ function returnSpecOrThrowLinkingError(spec: any) {
 
 const ETInstallerNativeModule: ETInstallerInterface =
   returnSpecOrThrowLinkingError(require('./NativeETInstaller').default);
-const LLMNativeModule: LLMInterface = returnSpecOrThrowLinkingError(
-  require('./NativeLLM').default
-);
 
-export { LLMNativeModule, ETInstallerNativeModule };
+export { ETInstallerNativeModule };
diff --git a/packages/react-native-executorch/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so b/packages/react-native-executorch/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so
new file mode 100755
index 0000000000..c97092dbce
Binary files /dev/null and b/packages/react-native-executorch/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so differ
diff --git a/packages/react-native-executorch/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so b/packages/react-native-executorch/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so
new file mode 100755
index 0000000000..5b144aaa8c
Binary files /dev/null and b/packages/react-native-executorch/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so differ
diff --git a/packages/react-native-executorch/third-party/include/cpuinfo/cpuinfo.h b/packages/react-native-executorch/third-party/include/cpuinfo/cpuinfo.h
new file mode 100644
index 0000000000..e2e1410c57
--- /dev/null
+++ b/packages/react-native-executorch/third-party/include/cpuinfo/cpuinfo.h
@@ -0,0 +1,2305 @@
+#pragma once
+#ifndef CPUINFO_H
+#define CPUINFO_H
+
+#ifndef __cplusplus
+#include <stdbool.h>
+#endif
+
+#ifdef __APPLE__
+#include <TargetConditionals.h>
+#endif
+
+#include <stdint.h>
+
+/* Identify architecture and define corresponding macro */
+
+#if defined(__i386__) || defined(__i486__) || defined(__i586__) ||             \
+    defined(__i686__) || defined(_M_IX86)
+#define CPUINFO_ARCH_X86 1
+#endif
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) ||             \
+    defined(_M_AMD64)
+#define CPUINFO_ARCH_X86_64 1
+#endif
+
+#if defined(__arm__) || defined(_M_ARM)
+#define CPUINFO_ARCH_ARM 1
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define CPUINFO_ARCH_ARM64 1
+#endif
+
+#if defined(__PPC64__) || defined(__powerpc64__) || defined(_ARCH_PPC64)
+#define CPUINFO_ARCH_PPC64 1
+#endif
+
+#if defined(__asmjs__)
+#define CPUINFO_ARCH_ASMJS 1
+#endif
+
+#if defined(__wasm__)
+#if defined(__wasm_simd128__)
+#define CPUINFO_ARCH_WASMSIMD 1
+#else
+#define CPUINFO_ARCH_WASM 1
+#endif
+#endif
+
+#if defined(__riscv)
+#if (__riscv_xlen == 32)
+#define CPUINFO_ARCH_RISCV32 1
+#elif (__riscv_xlen == 64)
+#define CPUINFO_ARCH_RISCV64 1
+#endif
+#endif
+
+/* Define other architecture-specific macros as 0 */
+
+#ifndef CPUINFO_ARCH_X86
+#define CPUINFO_ARCH_X86 0
+#endif
+
+#ifndef CPUINFO_ARCH_X86_64
+#define CPUINFO_ARCH_X86_64 0
+#endif
+
+#ifndef CPUINFO_ARCH_ARM
+#define CPUINFO_ARCH_ARM 0
+#endif
+
+#ifndef CPUINFO_ARCH_ARM64
+#define CPUINFO_ARCH_ARM64 0
+#endif
+
+#ifndef CPUINFO_ARCH_PPC64
+#define CPUINFO_ARCH_PPC64 0
+#endif
+
+#ifndef CPUINFO_ARCH_ASMJS
+#define CPUINFO_ARCH_ASMJS 0
+#endif
+
+#ifndef CPUINFO_ARCH_WASM
+#define CPUINFO_ARCH_WASM 0
+#endif
+
+#ifndef CPUINFO_ARCH_WASMSIMD
+#define CPUINFO_ARCH_WASMSIMD 0
+#endif
+
+#ifndef CPUINFO_ARCH_RISCV32
+#define CPUINFO_ARCH_RISCV32 0
+#endif
+
+#ifndef CPUINFO_ARCH_RISCV64
+#define CPUINFO_ARCH_RISCV64 0
+#endif
+
+#if CPUINFO_ARCH_X86 && defined(_MSC_VER)
+#define CPUINFO_ABI __cdecl
+#elif CPUINFO_ARCH_X86 && defined(__GNUC__)
+#define CPUINFO_ABI __attribute__((__cdecl__))
+#else
+#define CPUINFO_ABI
+#endif
+
+#define CPUINFO_CACHE_UNIFIED 0x00000001
+#define CPUINFO_CACHE_INCLUSIVE 0x00000002
+#define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004
+
+struct cpuinfo_cache {
+  /** Cache size in bytes */
+  uint32_t size;
+  /** Number of ways of associativity */
+  uint32_t associativity;
+  /** Number of sets */
+  uint32_t sets;
+  /** Number of partitions */
+  uint32_t partitions;
+  /** Line size in bytes */
+  uint32_t line_size;
+  /**
+   * Binary characteristics of the cache (unified cache, inclusive cache,
+   * cache with complex indexing).
+   *
+   * @see CPUINFO_CACHE_UNIFIED, CPUINFO_CACHE_INCLUSIVE,
+   * CPUINFO_CACHE_COMPLEX_INDEXING
+   */
+  uint32_t flags;
+  /** Index of the first logical processor that shares this cache */
+  uint32_t processor_start;
+  /** Number of logical processors that share this cache */
+  uint32_t processor_count;
+};
+
+struct cpuinfo_trace_cache {
+  uint32_t uops;
+  uint32_t associativity;
+};
+
+#define CPUINFO_PAGE_SIZE_4KB 0x1000
+#define CPUINFO_PAGE_SIZE_1MB 0x100000
+#define CPUINFO_PAGE_SIZE_2MB 0x200000
+#define CPUINFO_PAGE_SIZE_4MB 0x400000
+#define CPUINFO_PAGE_SIZE_16MB 0x1000000
+#define CPUINFO_PAGE_SIZE_1GB 0x40000000
+
+struct cpuinfo_tlb {
+  uint32_t entries;
+  uint32_t associativity;
+  uint64_t pages;
+};
+
+/** Vendor of processor core design */
+enum cpuinfo_vendor {
+  /** Processor vendor is not known to the library, or the library failed
+     to get vendor information from the OS. */
+  cpuinfo_vendor_unknown = 0,
+
+  /* Active vendors of modern CPUs */
+
+  /**
+   * Intel Corporation. Vendor of x86, x86-64, IA64, and ARM processor
+   * microarchitectures.
+   *
+   * Sold its ARM design subsidiary in 2006. The last ARM processor design
+   * was released in 2004.
+   */
+  cpuinfo_vendor_intel = 1,
+  /** Advanced Micro Devices, Inc. Vendor of x86 and x86-64 processor
+     microarchitectures. */
+  cpuinfo_vendor_amd = 2,
+  /** ARM Holdings plc. Vendor of ARM and ARM64 processor
+     microarchitectures. */
+  cpuinfo_vendor_arm = 3,
+  /** Qualcomm Incorporated. Vendor of ARM and ARM64 processor
+     microarchitectures. */
+  cpuinfo_vendor_qualcomm = 4,
+  /** Apple Inc. Vendor of ARM and ARM64 processor microarchitectures. */
+  cpuinfo_vendor_apple = 5,
+  /** Samsung Electronics Co., Ltd. Vendir if ARM64 processor
+     microarchitectures. */
+  cpuinfo_vendor_samsung = 6,
+  /** Nvidia Corporation. Vendor of ARM64-compatible processor
+     microarchitectures. */
+  cpuinfo_vendor_nvidia = 7,
+  /** MIPS Technologies, Inc. Vendor of MIPS processor microarchitectures.
+   */
+  cpuinfo_vendor_mips = 8,
+  /** International Business Machines Corporation. Vendor of PowerPC
+     processor microarchitectures. */
+  cpuinfo_vendor_ibm = 9,
+  /** Ingenic Semiconductor. Vendor of MIPS processor microarchitectures.
+   */
+  cpuinfo_vendor_ingenic = 10,
+  /**
+   * VIA Technologies, Inc. Vendor of x86 and x86-64 processor
+   * microarchitectures.
+   *
+   * Processors are designed by Centaur Technology, a subsidiary of VIA
+   * Technologies.
+   */
+  cpuinfo_vendor_via = 11,
+  /** Cavium, Inc. Vendor of ARM64 processor microarchitectures. */
+  cpuinfo_vendor_cavium = 12,
+  /** Broadcom, Inc. Vendor of ARM processor microarchitectures. */
+  cpuinfo_vendor_broadcom = 13,
+  /** Applied Micro Circuits Corporation (APM). Vendor of ARM64 processor
+     microarchitectures. */
+  cpuinfo_vendor_apm = 14,
+  /**
+   * Huawei Technologies Co., Ltd. Vendor of ARM64 processor
+   * microarchitectures.
+   *
+   * Processors are designed by HiSilicon, a subsidiary of Huawei.
+   */
+  cpuinfo_vendor_huawei = 15,
+  /**
+   * Hygon (Chengdu Haiguang Integrated Circuit Design Co., Ltd), Vendor
+   * of x86-64 processor microarchitectures.
+   *
+   * Processors are variants of AMD cores.
+   */
+  cpuinfo_vendor_hygon = 16,
+  /** SiFive, Inc. Vendor of RISC-V processor microarchitectures. */
+  cpuinfo_vendor_sifive = 17,
+
+  /* Active vendors of embedded CPUs */
+
+  /** Texas Instruments Inc. Vendor of ARM processor microarchitectures.
+   */
+  cpuinfo_vendor_texas_instruments = 30,
+  /** Marvell Technology Group Ltd. Vendor of ARM processor
+   * microarchitectures.
+   */
+  cpuinfo_vendor_marvell = 31,
+  /** RDC Semiconductor Co., Ltd. Vendor of x86 processor
+     microarchitectures. */
+  cpuinfo_vendor_rdc = 32,
+  /** DM&P Electronics Inc. Vendor of x86 processor microarchitectures. */
+  cpuinfo_vendor_dmp = 33,
+  /** Motorola, Inc. Vendor of PowerPC and ARM processor
+     microarchitectures. */
+  cpuinfo_vendor_motorola = 34,
+
+  /* Defunct CPU vendors */
+
+  /**
+   * Transmeta Corporation. Vendor of x86 processor microarchitectures.
+   *
+   * Now defunct. The last processor design was released in 2004.
+   * Transmeta processors implemented VLIW ISA and used binary translation
+   * to execute x86 code.
+   */
+  cpuinfo_vendor_transmeta = 50,
+  /**
+   * Cyrix Corporation. Vendor of x86 processor microarchitectures.
+   *
+   * Now defunct. The last processor design was released in 1996.
+   */
+  cpuinfo_vendor_cyrix = 51,
+  /**
+   * Rise Technology. Vendor of x86 processor microarchitectures.
+   *
+   * Now defunct. The last processor design was released in 1999.
+   */
+  cpuinfo_vendor_rise = 52,
+  /**
+   * National Semiconductor. Vendor of x86 processor microarchitectures.
+   *
+   * Sold its x86 design subsidiary in 1999. The last processor design was
+   * released in 1998.
+   */
+  cpuinfo_vendor_nsc = 53,
+  /**
+   * Silicon Integrated Systems. Vendor of x86 processor
+   * microarchitectures.
+   *
+   * Sold its x86 design subsidiary in 2001. The last processor design was
+   * released in 2001.
+   */
+  cpuinfo_vendor_sis = 54,
+  /**
+   * NexGen. Vendor of x86 processor microarchitectures.
+   *
+   * Now defunct. The last processor design was released in 1994.
+   * NexGen designed the first x86 microarchitecture which decomposed x86
+   * instructions into simple microoperations.
+   */
+  cpuinfo_vendor_nexgen = 55,
+  /**
+   * United Microelectronics Corporation. Vendor of x86 processor
+   * microarchitectures.
+   *
+   * Ceased x86 in the early 1990s. The last processor design was released
+   * in 1991. Designed U5C and U5D processors. Both are 486 level.
+   */
+  cpuinfo_vendor_umc = 56,
+  /**
+   * Digital Equipment Corporation. Vendor of ARM processor
+   * microarchitecture.
+   *
+   * Sold its ARM designs in 1997. The last processor design was released
+   * in 1997.
+   */
+  cpuinfo_vendor_dec = 57,
+};
+
+/**
+ * Processor microarchitecture
+ *
+ * Processors with different microarchitectures often have different instruction
+ * performance characteristics, and may have dramatically different pipeline
+ * organization.
+ */
+enum cpuinfo_uarch {
+  /** Microarchitecture is unknown, or the library failed to get
+     information about the microarchitecture from OS */
+  cpuinfo_uarch_unknown = 0,
+
+  /** Pentium and Pentium MMX microarchitecture. */
+  cpuinfo_uarch_p5 = 0x00100100,
+  /** Intel Quark microarchitecture. */
+  cpuinfo_uarch_quark = 0x00100101,
+
+  /** Pentium Pro, Pentium II, and Pentium III. */
+  cpuinfo_uarch_p6 = 0x00100200,
+  /** Pentium M. */
+  cpuinfo_uarch_dothan = 0x00100201,
+  /** Intel Core microarchitecture. */
+  cpuinfo_uarch_yonah = 0x00100202,
+  /** Intel Core 2 microarchitecture on 65 nm process. */
+  cpuinfo_uarch_conroe = 0x00100203,
+  /** Intel Core 2 microarchitecture on 45 nm process. */
+  cpuinfo_uarch_penryn = 0x00100204,
+  /** Intel Nehalem and Westmere microarchitectures (Core i3/i5/i7 1st
+     gen). */
+  cpuinfo_uarch_nehalem = 0x00100205,
+  /** Intel Sandy Bridge microarchitecture (Core i3/i5/i7 2nd gen). */
+  cpuinfo_uarch_sandy_bridge = 0x00100206,
+  /** Intel Ivy Bridge microarchitecture (Core i3/i5/i7 3rd gen). */
+  cpuinfo_uarch_ivy_bridge = 0x00100207,
+  /** Intel Haswell microarchitecture (Core i3/i5/i7 4th gen). */
+  cpuinfo_uarch_haswell = 0x00100208,
+  /** Intel Broadwell microarchitecture. */
+  cpuinfo_uarch_broadwell = 0x00100209,
+  /** Intel Sky Lake microarchitecture (14 nm, including
+     Kaby/Coffee/Whiskey/Amber/Comet/Cascade/Cooper Lake). */
+  cpuinfo_uarch_sky_lake = 0x0010020A,
+  /** DEPRECATED (Intel Kaby Lake microarchitecture). */
+  cpuinfo_uarch_kaby_lake = 0x0010020A,
+  /** Intel Palm Cove microarchitecture (10 nm, Cannon Lake). */
+  cpuinfo_uarch_palm_cove = 0x0010020B,
+  /** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */
+  cpuinfo_uarch_sunny_cove = 0x0010020C,
+
+  /** Pentium 4 with Willamette, Northwood, or Foster cores. */
+  cpuinfo_uarch_willamette = 0x00100300,
+  /** Pentium 4 with Prescott and later cores. */
+  cpuinfo_uarch_prescott = 0x00100301,
+
+  /** Intel Atom on 45 nm process. */
+  cpuinfo_uarch_bonnell = 0x00100400,
+  /** Intel Atom on 32 nm process. */
+  cpuinfo_uarch_saltwell = 0x00100401,
+  /** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */
+  cpuinfo_uarch_silvermont = 0x00100402,
+  /** Intel Airmont microarchitecture (14 nm out-of-order Atom). */
+  cpuinfo_uarch_airmont = 0x00100403,
+  /** Intel Goldmont microarchitecture (Denverton, Apollo Lake). */
+  cpuinfo_uarch_goldmont = 0x00100404,
+  /** Intel Goldmont Plus microarchitecture (Gemini Lake). */
+  cpuinfo_uarch_goldmont_plus = 0x00100405,
+
+  /** Intel Knights Ferry HPC boards. */
+  cpuinfo_uarch_knights_ferry = 0x00100500,
+  /** Intel Knights Corner HPC boards (aka Xeon Phi). */
+  cpuinfo_uarch_knights_corner = 0x00100501,
+  /** Intel Knights Landing microarchitecture (second-gen MIC). */
+  cpuinfo_uarch_knights_landing = 0x00100502,
+  /** Intel Knights Hill microarchitecture (third-gen MIC). */
+  cpuinfo_uarch_knights_hill = 0x00100503,
+  /** Intel Knights Mill Xeon Phi. */
+  cpuinfo_uarch_knights_mill = 0x00100504,
+
+  /** Intel/Marvell XScale series. */
+  cpuinfo_uarch_xscale = 0x00100600,
+
+  /** AMD K5. */
+  cpuinfo_uarch_k5 = 0x00200100,
+  /** AMD K6 and alike. */
+  cpuinfo_uarch_k6 = 0x00200101,
+  /** AMD Athlon and Duron. */
+  cpuinfo_uarch_k7 = 0x00200102,
+  /** AMD Athlon 64, Opteron 64. */
+  cpuinfo_uarch_k8 = 0x00200103,
+  /** AMD Family 10h (Barcelona, Istambul, Magny-Cours). */
+  cpuinfo_uarch_k10 = 0x00200104,
+  /**
+   * AMD Bulldozer microarchitecture
+   * Zambezi FX-series CPUs, Zurich, Valencia and Interlagos Opteron CPUs.
+   */
+  cpuinfo_uarch_bulldozer = 0x00200105,
+  /**
+   * AMD Piledriver microarchitecture
+   * Vishera FX-series CPUs, Trinity and Richland APUs, Delhi, Seoul, Abu
+   * Dhabi Opteron CPUs.
+   */
+  cpuinfo_uarch_piledriver = 0x00200106,
+  /** AMD Steamroller microarchitecture (Kaveri APUs). */
+  cpuinfo_uarch_steamroller = 0x00200107,
+  /** AMD Excavator microarchitecture (Carizzo APUs). */
+  cpuinfo_uarch_excavator = 0x00200108,
+  /** AMD Zen microarchitecture (12/14 nm Ryzen and EPYC CPUs). */
+  cpuinfo_uarch_zen = 0x00200109,
+  /** AMD Zen 2 microarchitecture (7 nm Ryzen and EPYC CPUs). */
+  cpuinfo_uarch_zen2 = 0x0020010A,
+  /** AMD Zen 3 microarchitecture. */
+  cpuinfo_uarch_zen3 = 0x0020010B,
+  /** AMD Zen 4 microarchitecture. */
+  cpuinfo_uarch_zen4 = 0x0020010C,
+
+  /** NSC Geode and AMD Geode GX and LX. */
+  cpuinfo_uarch_geode = 0x00200200,
+  /** AMD Bobcat mobile microarchitecture. */
+  cpuinfo_uarch_bobcat = 0x00200201,
+  /** AMD Jaguar mobile microarchitecture. */
+  cpuinfo_uarch_jaguar = 0x00200202,
+  /** AMD Puma mobile microarchitecture. */
+  cpuinfo_uarch_puma = 0x00200203,
+
+  /** ARM7 series. */
+  cpuinfo_uarch_arm7 = 0x00300100,
+  /** ARM9 series. */
+  cpuinfo_uarch_arm9 = 0x00300101,
+  /** ARM 1136, ARM 1156, ARM 1176, or ARM 11MPCore. */
+  cpuinfo_uarch_arm11 = 0x00300102,
+
+  /** ARM Cortex-A5. */
+  cpuinfo_uarch_cortex_a5 = 0x00300205,
+  /** ARM Cortex-A7. */
+  cpuinfo_uarch_cortex_a7 = 0x00300207,
+  /** ARM Cortex-A8. */
+  cpuinfo_uarch_cortex_a8 = 0x00300208,
+  /** ARM Cortex-A9. */
+  cpuinfo_uarch_cortex_a9 = 0x00300209,
+  /** ARM Cortex-A12. */
+  cpuinfo_uarch_cortex_a12 = 0x00300212,
+  /** ARM Cortex-A15. */
+  cpuinfo_uarch_cortex_a15 = 0x00300215,
+  /** ARM Cortex-A17. */
+  cpuinfo_uarch_cortex_a17 = 0x00300217,
+
+  /** ARM Cortex-A32. */
+  cpuinfo_uarch_cortex_a32 = 0x00300332,
+  /** ARM Cortex-A35. */
+  cpuinfo_uarch_cortex_a35 = 0x00300335,
+  /** ARM Cortex-A53. */
+  cpuinfo_uarch_cortex_a53 = 0x00300353,
+  /** ARM Cortex-A55 revision 0 (restricted dual-issue capabilities
+     compared to revision 1+). */
+  cpuinfo_uarch_cortex_a55r0 = 0x00300354,
+  /** ARM Cortex-A55. */
+  cpuinfo_uarch_cortex_a55 = 0x00300355,
+  /** ARM Cortex-A57. */
+  cpuinfo_uarch_cortex_a57 = 0x00300357,
+  /** ARM Cortex-A65. */
+  cpuinfo_uarch_cortex_a65 = 0x00300365,
+  /** ARM Cortex-A72. */
+  cpuinfo_uarch_cortex_a72 = 0x00300372,
+  /** ARM Cortex-A73. */
+  cpuinfo_uarch_cortex_a73 = 0x00300373,
+  /** ARM Cortex-A75. */
+  cpuinfo_uarch_cortex_a75 = 0x00300375,
+  /** ARM Cortex-A76. */
+  cpuinfo_uarch_cortex_a76 = 0x00300376,
+  /** ARM Cortex-A77. */
+  cpuinfo_uarch_cortex_a77 = 0x00300377,
+  /** ARM Cortex-A78. */
+  cpuinfo_uarch_cortex_a78 = 0x00300378,
+
+  /** ARM Neoverse N1. */
+  cpuinfo_uarch_neoverse_n1 = 0x00300400,
+  /** ARM Neoverse E1. */
+  cpuinfo_uarch_neoverse_e1 = 0x00300401,
+  /** ARM Neoverse V1. */
+  cpuinfo_uarch_neoverse_v1 = 0x00300402,
+  /** ARM Neoverse N2. */
+  cpuinfo_uarch_neoverse_n2 = 0x00300403,
+  /** ARM Neoverse V2. */
+  cpuinfo_uarch_neoverse_v2 = 0x00300404,
+
+  /** ARM Cortex-X1. */
+  cpuinfo_uarch_cortex_x1 = 0x00300501,
+  /** ARM Cortex-X2. */
+  cpuinfo_uarch_cortex_x2 = 0x00300502,
+  /** ARM Cortex-X3. */
+  cpuinfo_uarch_cortex_x3 = 0x00300503,
+  /** ARM Cortex-X4. */
+  cpuinfo_uarch_cortex_x4 = 0x00300504,
+
+  /** ARM Cortex-A510. */
+  cpuinfo_uarch_cortex_a510 = 0x00300551,
+  /** ARM Cortex-A520. */
+  cpuinfo_uarch_cortex_a520 = 0x00300552,
+  /** ARM Cortex-A710. */
+  cpuinfo_uarch_cortex_a710 = 0x00300571,
+  /** ARM Cortex-A715. */
+  cpuinfo_uarch_cortex_a715 = 0x00300572,
+  /** ARM Cortex-A720. */
+  cpuinfo_uarch_cortex_a720 = 0x00300573,
+
+  /** Qualcomm Scorpion. */
+  cpuinfo_uarch_scorpion = 0x00400100,
+  /** Qualcomm Krait. */
+  cpuinfo_uarch_krait = 0x00400101,
+  /** Qualcomm Kryo. */
+  cpuinfo_uarch_kryo = 0x00400102,
+  /** Qualcomm Falkor. */
+  cpuinfo_uarch_falkor = 0x00400103,
+  /** Qualcomm Saphira. */
+  cpuinfo_uarch_saphira = 0x00400104,
+
+  /** Nvidia Denver. */
+  cpuinfo_uarch_denver = 0x00500100,
+  /** Nvidia Denver 2. */
+  cpuinfo_uarch_denver2 = 0x00500101,
+  /** Nvidia Carmel. */
+  cpuinfo_uarch_carmel = 0x00500102,
+
+  /** Samsung Exynos M1 (Exynos 8890 big cores). */
+  cpuinfo_uarch_exynos_m1 = 0x00600100,
+  /** Samsung Exynos M2 (Exynos 8895 big cores). */
+  cpuinfo_uarch_exynos_m2 = 0x00600101,
+  /** Samsung Exynos M3 (Exynos 9810 big cores). */
+  cpuinfo_uarch_exynos_m3 = 0x00600102,
+  /** Samsung Exynos M4 (Exynos 9820 big cores). */
+  cpuinfo_uarch_exynos_m4 = 0x00600103,
+  /** Samsung Exynos M5 (Exynos 9830 big cores). */
+  cpuinfo_uarch_exynos_m5 = 0x00600104,
+
+  /* Deprecated synonym for Cortex-A76 */
+  cpuinfo_uarch_cortex_a76ae = 0x00300376,
+  /* Deprecated names for Exynos. */
+  cpuinfo_uarch_mongoose_m1 = 0x00600100,
+  cpuinfo_uarch_mongoose_m2 = 0x00600101,
+  cpuinfo_uarch_meerkat_m3 = 0x00600102,
+  cpuinfo_uarch_meerkat_m4 = 0x00600103,
+
+  /** Apple A6 and A6X processors. */
+  cpuinfo_uarch_swift = 0x00700100,
+  /** Apple A7 processor. */
+  cpuinfo_uarch_cyclone = 0x00700101,
+  /** Apple A8 and A8X processor. */
+  cpuinfo_uarch_typhoon = 0x00700102,
+  /** Apple A9 and A9X processor. */
+  cpuinfo_uarch_twister = 0x00700103,
+  /** Apple A10 and A10X processor. */
+  cpuinfo_uarch_hurricane = 0x00700104,
+  /** Apple A11 processor (big cores). */
+  cpuinfo_uarch_monsoon = 0x00700105,
+  /** Apple A11 processor (little cores). */
+  cpuinfo_uarch_mistral = 0x00700106,
+  /** Apple A12 processor (big cores). */
+  cpuinfo_uarch_vortex = 0x00700107,
+  /** Apple A12 processor (little cores). */
+  cpuinfo_uarch_tempest = 0x00700108,
+  /** Apple A13 processor (big cores). */
+  cpuinfo_uarch_lightning = 0x00700109,
+  /** Apple A13 processor (little cores). */
+  cpuinfo_uarch_thunder = 0x0070010A,
+  /** Apple A14 / M1 processor (big cores). */
+  cpuinfo_uarch_firestorm = 0x0070010B,
+  /** Apple A14 / M1 processor (little cores). */
+  cpuinfo_uarch_icestorm = 0x0070010C,
+  /** Apple A15 / M2 processor (big cores). */
+  cpuinfo_uarch_avalanche = 0x0070010D,
+  /** Apple A15 / M2 processor (little cores). */
+  cpuinfo_uarch_blizzard = 0x0070010E,
+
+  /** Cavium ThunderX. */
+  cpuinfo_uarch_thunderx = 0x00800100,
+  /** Cavium ThunderX2 (originally Broadcom Vulkan). */
+  cpuinfo_uarch_thunderx2 = 0x00800200,
+
+  /** Marvell PJ4. */
+  cpuinfo_uarch_pj4 = 0x00900100,
+
+  /** Broadcom Brahma B15. */
+  cpuinfo_uarch_brahma_b15 = 0x00A00100,
+  /** Broadcom Brahma B53. */
+  cpuinfo_uarch_brahma_b53 = 0x00A00101,
+
+  /** Applied Micro X-Gene. */
+  cpuinfo_uarch_xgene = 0x00B00100,
+
+  /* Hygon Dhyana (a modification of AMD Zen for Chinese market). */
+  cpuinfo_uarch_dhyana = 0x01000100,
+
+  /** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */
+  cpuinfo_uarch_taishan_v110 = 0x00C00100,
+};
+
+struct cpuinfo_processor {
+  /** SMT (hyperthread) ID within a core */
+  uint32_t smt_id;
+  /** Core containing this logical processor */
+  const struct cpuinfo_core *core;
+  /** Cluster of cores containing this logical processor */
+  const struct cpuinfo_cluster *cluster;
+  /** Physical package containing this logical processor */
+  const struct cpuinfo_package *package;
+#if defined(__linux__)
+  /**
+   * Linux-specific ID for the logical processor:
+   * - Linux kernel exposes information about this logical processor in
+   * /sys/devices/system/cpu/cpu<linux_id>/
+   * - Bit <linux_id> in the cpu_set_t identifies this logical processor
+   */
+  int linux_id;
+#endif
+#if defined(_WIN32) || defined(__CYGWIN__)
+  /** Windows-specific ID for the group containing the logical processor.
+   */
+  uint16_t windows_group_id;
+  /**
+   * Windows-specific ID of the logical processor within its group:
+   * - Bit <windows_processor_id> in the KAFFINITY mask identifies this
+   * logical processor within its group.
+   */
+  uint16_t windows_processor_id;
+#endif
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  /** APIC ID (unique x86-specific ID of the logical processor) */
+  uint32_t apic_id;
+#endif
+  struct {
+    /** Level 1 instruction cache */
+    const struct cpuinfo_cache *l1i;
+    /** Level 1 data cache */
+    const struct cpuinfo_cache *l1d;
+    /** Level 2 unified or data cache */
+    const struct cpuinfo_cache *l2;
+    /** Level 3 unified or data cache */
+    const struct cpuinfo_cache *l3;
+    /** Level 4 unified or data cache */
+    const struct cpuinfo_cache *l4;
+  } cache;
+};
+
+struct cpuinfo_core {
+  /** Index of the first logical processor on this core. */
+  uint32_t processor_start;
+  /** Number of logical processors on this core */
+  uint32_t processor_count;
+  /** Core ID within a package */
+  uint32_t core_id;
+  /** Cluster containing this core */
+  const struct cpuinfo_cluster *cluster;
+  /** Physical package containing this core. */
+  const struct cpuinfo_package *package;
+  /** Vendor of the CPU microarchitecture for this core */
+  enum cpuinfo_vendor vendor;
+  /** CPU microarchitecture for this core */
+  enum cpuinfo_uarch uarch;
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  /** Value of CPUID leaf 1 EAX register for this core */
+  uint32_t cpuid;
+#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  /** Value of Main ID Register (MIDR) for this core */
+  uint32_t midr;
+#endif
+  /** Clock rate (non-Turbo) of the core, in Hz */
+  uint64_t frequency;
+};
+
+struct cpuinfo_cluster {
+  /** Index of the first logical processor in the cluster */
+  uint32_t processor_start;
+  /** Number of logical processors in the cluster */
+  uint32_t processor_count;
+  /** Index of the first core in the cluster */
+  uint32_t core_start;
+  /** Number of cores on the cluster */
+  uint32_t core_count;
+  /** Cluster ID within a package */
+  uint32_t cluster_id;
+  /** Physical package containing the cluster */
+  const struct cpuinfo_package *package;
+  /** CPU microarchitecture vendor of the cores in the cluster */
+  enum cpuinfo_vendor vendor;
+  /** CPU microarchitecture of the cores in the cluster */
+  enum cpuinfo_uarch uarch;
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  /** Value of CPUID leaf 1 EAX register of the cores in the cluster */
+  uint32_t cpuid;
+#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  /** Value of Main ID Register (MIDR) of the cores in the cluster */
+  uint32_t midr;
+#endif
+  /** Clock rate (non-Turbo) of the cores in the cluster, in Hz */
+  uint64_t frequency;
+};
+
+#define CPUINFO_PACKAGE_NAME_MAX 48
+
+struct cpuinfo_package {
+  /** SoC or processor chip model name */
+  char name[CPUINFO_PACKAGE_NAME_MAX];
+  /** Index of the first logical processor on this physical package */
+  uint32_t processor_start;
+  /** Number of logical processors on this physical package */
+  uint32_t processor_count;
+  /** Index of the first core on this physical package */
+  uint32_t core_start;
+  /** Number of cores on this physical package */
+  uint32_t core_count;
+  /** Index of the first cluster of cores on this physical package */
+  uint32_t cluster_start;
+  /** Number of clusters of cores on this physical package */
+  uint32_t cluster_count;
+};
+
+struct cpuinfo_uarch_info {
+  /** Type of CPU microarchitecture */
+  enum cpuinfo_uarch uarch;
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  /** Value of CPUID leaf 1 EAX register for the microarchitecture */
+  uint32_t cpuid;
+#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  /** Value of Main ID Register (MIDR) for the microarchitecture */
+  uint32_t midr;
+#endif
+  /** Number of logical processors with the microarchitecture */
+  uint32_t processor_count;
+  /** Number of cores with the microarchitecture */
+  uint32_t core_count;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool CPUINFO_ABI cpuinfo_initialize(void);
+
+void CPUINFO_ABI cpuinfo_deinitialize(void);
+
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+/* This structure is not a part of stable API. Use cpuinfo_has_x86_* functions
+ * instead. */
+struct cpuinfo_x86_isa {
+#if CPUINFO_ARCH_X86
+  bool rdtsc;
+#endif
+  bool rdtscp;
+  bool rdpid;
+  bool sysenter;
+#if CPUINFO_ARCH_X86
+  bool syscall;
+#endif
+  bool msr;
+  bool clzero;
+  bool clflush;
+  bool clflushopt;
+  bool mwait;
+  bool mwaitx;
+#if CPUINFO_ARCH_X86
+  bool emmx;
+#endif
+  bool fxsave;
+  bool xsave;
+#if CPUINFO_ARCH_X86
+  bool fpu;
+  bool mmx;
+  bool mmx_plus;
+#endif
+  bool three_d_now;
+  bool three_d_now_plus;
+#if CPUINFO_ARCH_X86
+  bool three_d_now_geode;
+#endif
+  bool prefetch;
+  bool prefetchw;
+  bool prefetchwt1;
+#if CPUINFO_ARCH_X86
+  bool daz;
+  bool sse;
+  bool sse2;
+#endif
+  bool sse3;
+  bool ssse3;
+  bool sse4_1;
+  bool sse4_2;
+  bool sse4a;
+  bool misaligned_sse;
+  bool avx;
+  bool avxvnni;
+  bool fma3;
+  bool fma4;
+  bool xop;
+  bool f16c;
+  bool avx2;
+  bool avx512f;
+  bool avx512pf;
+  bool avx512er;
+  bool avx512cd;
+  bool avx512dq;
+  bool avx512bw;
+  bool avx512vl;
+  bool avx512ifma;
+  bool avx512vbmi;
+  bool avx512vbmi2;
+  bool avx512bitalg;
+  bool avx512vpopcntdq;
+  bool avx512vnni;
+  bool avx512bf16;
+  bool avx512fp16;
+  bool avx512vp2intersect;
+  bool avx512_4vnniw;
+  bool avx512_4fmaps;
+  bool amx_bf16;
+  bool amx_tile;
+  bool amx_int8;
+  bool amx_fp16;
+  bool avx_vnni_int8;
+  bool avx_vnni_int16;
+  bool avx_ne_convert;
+  bool hle;
+  bool rtm;
+  bool xtest;
+  bool mpx;
+#if CPUINFO_ARCH_X86
+  bool cmov;
+  bool cmpxchg8b;
+#endif
+  bool cmpxchg16b;
+  bool clwb;
+  bool movbe;
+#if CPUINFO_ARCH_X86_64
+  bool lahf_sahf;
+#endif
+  bool fs_gs_base;
+  bool lzcnt;
+  bool popcnt;
+  bool tbm;
+  bool bmi;
+  bool bmi2;
+  bool adx;
+  bool aes;
+  bool vaes;
+  bool pclmulqdq;
+  bool vpclmulqdq;
+  bool gfni;
+  bool rdrand;
+  bool rdseed;
+  bool sha;
+  bool rng;
+  bool ace;
+  bool ace2;
+  bool phe;
+  bool pmm;
+  bool lwp;
+};
+
+extern struct cpuinfo_x86_isa cpuinfo_isa;
+#endif
+
+static inline bool cpuinfo_has_x86_rdtsc(void) {
+#if CPUINFO_ARCH_X86_64
+  return true;
+#elif CPUINFO_ARCH_X86
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.rdtsc;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_rdtscp(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.rdtscp;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_rdpid(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.rdpid;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_clzero(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.clzero;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_mwait(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.mwait;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_mwaitx(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.mwaitx;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_fxsave(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.fxsave;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_xsave(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.xsave;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_fpu(void) {
+#if CPUINFO_ARCH_X86_64
+  return true;
+#elif CPUINFO_ARCH_X86
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.fpu;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_mmx(void) {
+#if CPUINFO_ARCH_X86_64
+  return true;
+#elif CPUINFO_ARCH_X86
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.mmx;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_mmx_plus(void) {
+#if CPUINFO_ARCH_X86_64
+  return true;
+#elif CPUINFO_ARCH_X86
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.mmx_plus;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_3dnow(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.three_d_now;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_3dnow_plus(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.three_d_now_plus;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_3dnow_geode(void) {
+#if CPUINFO_ARCH_X86_64
+  return false;
+#elif CPUINFO_ARCH_X86
+#if defined(__ANDROID__)
+  return false;
+#else
+  return cpuinfo_isa.three_d_now_geode;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_prefetch(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.prefetch;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_prefetchw(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.prefetchw;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_prefetchwt1(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.prefetchwt1;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_daz(void) {
+#if CPUINFO_ARCH_X86_64
+  return true;
+#elif CPUINFO_ARCH_X86
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.daz;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_sse(void) {
+#if CPUINFO_ARCH_X86_64
+  return true;
+#elif CPUINFO_ARCH_X86
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.sse;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_sse2(void) {
+#if CPUINFO_ARCH_X86_64
+  return true;
+#elif CPUINFO_ARCH_X86
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.sse2;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_sse3(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.sse3;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_ssse3(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.ssse3;
+#endif
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_sse4_1(void) {
+#if CPUINFO_ARCH_X86_64
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.sse4_1;
+#endif
+#elif CPUINFO_ARCH_X86
+  return cpuinfo_isa.sse4_1;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_sse4_2(void) {
+#if CPUINFO_ARCH_X86_64
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.sse4_2;
+#endif
+#elif CPUINFO_ARCH_X86
+  return cpuinfo_isa.sse4_2;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_sse4a(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.sse4a;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_misaligned_sse(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.misaligned_sse;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avxvnni(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avxvnni;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_fma3(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.fma3;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_fma4(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.fma4;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_xop(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.xop;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_f16c(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.f16c;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx2(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx2;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512f(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512f;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512pf(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512pf;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512er(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512er;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512cd(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512cd;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512dq(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512dq;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512bw(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512bw;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512vl(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512vl;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512ifma(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512ifma;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512vbmi(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512vbmi;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512vbmi2(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512vbmi2;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512bitalg(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512bitalg;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512vpopcntdq(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512vpopcntdq;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512vnni(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512vnni;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512bf16(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512bf16;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512fp16(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512fp16;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512vp2intersect(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512vp2intersect;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512_4vnniw(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512_4vnniw;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_avx512_4fmaps(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx512_4fmaps;
+#else
+  return false;
+#endif
+}
+
+/* [NOTE] Intel Advanced Matrix Extensions (AMX) detection
+ *
+ * I.  AMX is a new extensions to the x86 ISA to work on matrices, consists of
+ *   1) 2-dimentional registers (tiles), hold sub-matrices from larger matrices
+ * in memory 2) Accelerator called Tile Matrix Multiply (TMUL), contains
+ * instructions operating on tiles
+ *
+ * II. Platforms that supports AMX:
+ * +-----------------+-----+----------+----------+----------+----------+
+ * |    Platforms    | Gen | amx-bf16 | amx-tile | amx-int8 | amx-fp16 |
+ * +-----------------+-----+----------+----------+----------+----------+
+ * | Sapphire Rapids | 4th |   YES    |   YES    |   YES    |    NO    |
+ * +-----------------+-----+----------+----------+----------+----------+
+ * | Emerald Rapids  | 5th |   YES    |   YES    |   YES    |    NO    |
+ * +-----------------+-----+----------+----------+----------+----------+
+ * | Granite Rapids  | 6th |   YES    |   YES    |   YES    |   YES    |
+ * +-----------------+-----+----------+----------+----------+----------+
+ *
+ * Reference: https://www.intel.com/content/www/us/en/products/docs
+ *    /accelerator-engines/advanced-matrix-extensions/overview.html
+ */
+static inline bool cpuinfo_has_x86_amx_bf16(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.amx_bf16;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_amx_tile(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.amx_tile;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_amx_int8(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.amx_int8;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_amx_fp16(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.amx_fp16;
+#else
+  return false;
+#endif
+}
+
+/*
+ * Intel AVX Vector Neural Network Instructions (VNNI) INT8
+ * Supported Platfroms: Sierra Forest, Arrow Lake, Lunar Lake
+ */
+static inline bool cpuinfo_has_x86_avx_vnni_int8(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx_vnni_int8;
+#else
+  return false;
+#endif
+}
+
+/*
+ * Intel AVX Vector Neural Network Instructions (VNNI) INT16
+ * Supported Platfroms: Arrow Lake, Lunar Lake
+ */
+static inline bool cpuinfo_has_x86_avx_vnni_int16(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx_vnni_int16;
+#else
+  return false;
+#endif
+}
+
+/*
+ * A new set of instructions, which can convert low precision floating point
+ * like BF16/FP16 to high precision floating point FP32, as well as convert FP32
+ * elements to BF16. This instruction allows the platform to have improved AI
+ * capabilities and better compatibility.
+ *
+ * Supported Platforms: Sierra Forest, Arrow Lake, Lunar Lake
+ */
+static inline bool cpuinfo_has_x86_avx_ne_convert(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.avx_ne_convert;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_hle(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.hle;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_rtm(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.rtm;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_xtest(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.xtest;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_mpx(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.mpx;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_cmov(void) {
+#if CPUINFO_ARCH_X86_64
+  return true;
+#elif CPUINFO_ARCH_X86
+  return cpuinfo_isa.cmov;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_cmpxchg8b(void) {
+#if CPUINFO_ARCH_X86_64
+  return true;
+#elif CPUINFO_ARCH_X86
+  return cpuinfo_isa.cmpxchg8b;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_cmpxchg16b(void) {
+#if CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.cmpxchg16b;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_clwb(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.clwb;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_movbe(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.movbe;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_lahf_sahf(void) {
+#if CPUINFO_ARCH_X86
+  return true;
+#elif CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.lahf_sahf;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_lzcnt(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.lzcnt;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_popcnt(void) {
+#if CPUINFO_ARCH_X86_64
+#if defined(__ANDROID__)
+  return true;
+#else
+  return cpuinfo_isa.popcnt;
+#endif
+#elif CPUINFO_ARCH_X86
+  return cpuinfo_isa.popcnt;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_tbm(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.tbm;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_bmi(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.bmi;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_bmi2(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.bmi2;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_adx(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.adx;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_aes(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.aes;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_vaes(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.vaes;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_pclmulqdq(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.pclmulqdq;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_vpclmulqdq(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.vpclmulqdq;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_gfni(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.gfni;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_rdrand(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.rdrand;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_rdseed(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.rdseed;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_x86_sha(void) {
+#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+  return cpuinfo_isa.sha;
+#else
+  return false;
+#endif
+}
+
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+/* This structure is not a part of stable API. Use cpuinfo_has_arm_* functions
+ * instead. */
+struct cpuinfo_arm_isa {
+#if CPUINFO_ARCH_ARM
+  bool thumb;
+  bool thumb2;
+  bool thumbee;
+  bool jazelle;
+  bool armv5e;
+  bool armv6;
+  bool armv6k;
+  bool armv7;
+  bool armv7mp;
+  bool armv8;
+  bool idiv;
+
+  bool vfpv2;
+  bool vfpv3;
+  bool d32;
+  bool fp16;
+  bool fma;
+
+  bool wmmx;
+  bool wmmx2;
+  bool neon;
+#endif
+#if CPUINFO_ARCH_ARM64
+  bool atomics;
+  bool bf16;
+  bool sve;
+  bool sve2;
+  bool i8mm;
+  bool sme;
+  bool sme2;
+  bool sme2p1;
+  bool sme_i16i32;
+  bool sme_bi32i32;
+  bool sme_b16b16;
+  bool sme_f16f16;
+  uint32_t svelen;
+#endif
+  bool rdm;
+  bool fp16arith;
+  bool dot;
+  bool jscvt;
+  bool fcma;
+  bool fhm;
+
+  bool aes;
+  bool sha1;
+  bool sha2;
+  bool pmull;
+  bool crc32;
+};
+
+extern struct cpuinfo_arm_isa cpuinfo_isa;
+#endif
+
+static inline bool cpuinfo_has_arm_thumb(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.thumb;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_thumb2(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.thumb2;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_v5e(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.armv5e;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_v6(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.armv6;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_v6k(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.armv6k;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_v7(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.armv7;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_v7mp(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.armv7mp;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_v8(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.armv8;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_idiv(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.idiv;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_vfpv2(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.vfpv2;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_vfpv3(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.vfpv3;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_vfpv3_d32(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.vfpv3 && cpuinfo_isa.d32;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_vfpv3_fp16(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.vfpv3 && cpuinfo_isa.fp16;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_vfpv3_fp16_d32(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.vfpv3 && cpuinfo_isa.fp16 && cpuinfo_isa.d32;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_vfpv4(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.vfpv3 && cpuinfo_isa.fma;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_vfpv4_d32(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.vfpv3 && cpuinfo_isa.fma && cpuinfo_isa.d32;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_fp16_arith(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.fp16arith;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_bf16(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.bf16;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_wmmx(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.wmmx;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_wmmx2(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.wmmx2;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_neon(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.neon;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_neon_fp16(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.neon && cpuinfo_isa.fp16;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_neon_fma(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.neon && cpuinfo_isa.fma;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_neon_v8(void) {
+#if CPUINFO_ARCH_ARM64
+  return true;
+#elif CPUINFO_ARCH_ARM
+  return cpuinfo_isa.neon && cpuinfo_isa.armv8;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_atomics(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.atomics;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_neon_rdm(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.rdm;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_neon_fp16_arith(void) {
+#if CPUINFO_ARCH_ARM
+  return cpuinfo_isa.neon && cpuinfo_isa.fp16arith;
+#elif CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.fp16arith;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_fhm(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.fhm;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_neon_dot(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.dot;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_neon_bf16(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.bf16;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_jscvt(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.jscvt;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_fcma(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.fcma;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_i8mm(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.i8mm;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_aes(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.aes;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sha1(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sha1;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sha2(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sha2;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_pmull(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.pmull;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_crc32(void) {
+#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.crc32;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sve(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sve;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sve_bf16(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sve && cpuinfo_isa.bf16;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sve2(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sve2;
+#else
+  return false;
+#endif
+}
+
+// Function to get the max SVE vector length on ARM CPU's which support SVE.
+static inline uint32_t cpuinfo_get_max_arm_sve_length(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.svelen * 8; // bytes * 8 = bit length(vector length)
+#else
+  return 0;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sme(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sme;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sme2(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sme2;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sme2p1(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sme2p1;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sme_i16i32(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sme_i16i32;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sme_bi32i32(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sme_bi32i32;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sme_b16b16(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sme_b16b16;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_arm_sme_f16f16(void) {
+#if CPUINFO_ARCH_ARM64
+  return cpuinfo_isa.sme_f16f16;
+#else
+  return false;
+#endif
+}
+
+#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+/* This structure is not a part of stable API. Use cpuinfo_has_riscv_* functions
+ * instead. */
+struct cpuinfo_riscv_isa {
+  /**
+   * Keep fields in line with the canonical order as defined by
+   * Section 27.11 Subset Naming Convention.
+   */
+  /* RV32I/64I/128I Base ISA. */
+  bool i;
+#if CPUINFO_ARCH_RISCV32
+  /* RV32E Base ISA. */
+  bool e;
+#endif
+  /* Integer Multiply/Divide Extension. */
+  bool m;
+  /* Atomic Extension. */
+  bool a;
+  /* Single-Precision Floating-Point Extension. */
+  bool f;
+  /* Double-Precision Floating-Point Extension. */
+  bool d;
+  /* Compressed Extension. */
+  bool c;
+  /* Vector Extension. */
+  bool v;
+};
+
+extern struct cpuinfo_riscv_isa cpuinfo_isa;
+#endif
+
+static inline bool cpuinfo_has_riscv_i(void) {
+#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+  return cpuinfo_isa.i;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_riscv_e(void) {
+#if CPUINFO_ARCH_RISCV32
+  return cpuinfo_isa.e;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_riscv_m(void) {
+#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+  return cpuinfo_isa.m;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_riscv_a(void) {
+#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+  return cpuinfo_isa.a;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_riscv_f(void) {
+#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+  return cpuinfo_isa.f;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_riscv_d(void) {
+#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+  return cpuinfo_isa.d;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_riscv_g(void) {
+  // The 'G' extension is simply shorthand for 'IMAFD'.
+  return cpuinfo_has_riscv_i() && cpuinfo_has_riscv_m() &&
+         cpuinfo_has_riscv_a() && cpuinfo_has_riscv_f() &&
+         cpuinfo_has_riscv_d();
+}
+
+static inline bool cpuinfo_has_riscv_c(void) {
+#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+  return cpuinfo_isa.c;
+#else
+  return false;
+#endif
+}
+
+static inline bool cpuinfo_has_riscv_v(void) {
+#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
+  return cpuinfo_isa.v;
+#else
+  return false;
+#endif
+}
+
+const struct cpuinfo_processor *CPUINFO_ABI cpuinfo_get_processors(void);
+const struct cpuinfo_core *CPUINFO_ABI cpuinfo_get_cores(void);
+const struct cpuinfo_cluster *CPUINFO_ABI cpuinfo_get_clusters(void);
+const struct cpuinfo_package *CPUINFO_ABI cpuinfo_get_packages(void);
+const struct cpuinfo_uarch_info *CPUINFO_ABI cpuinfo_get_uarchs(void);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l1i_caches(void);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l1d_caches(void);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l2_caches(void);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l3_caches(void);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l4_caches(void);
+
+const struct cpuinfo_processor *CPUINFO_ABI
+cpuinfo_get_processor(uint32_t index);
+const struct cpuinfo_core *CPUINFO_ABI cpuinfo_get_core(uint32_t index);
+const struct cpuinfo_cluster *CPUINFO_ABI cpuinfo_get_cluster(uint32_t index);
+const struct cpuinfo_package *CPUINFO_ABI cpuinfo_get_package(uint32_t index);
+const struct cpuinfo_uarch_info *CPUINFO_ABI cpuinfo_get_uarch(uint32_t index);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index);
+const struct cpuinfo_cache *CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index);
+
+uint32_t CPUINFO_ABI cpuinfo_get_processors_count(void);
+uint32_t CPUINFO_ABI cpuinfo_get_cores_count(void);
+uint32_t CPUINFO_ABI cpuinfo_get_clusters_count(void);
+uint32_t CPUINFO_ABI cpuinfo_get_packages_count(void);
+uint32_t CPUINFO_ABI cpuinfo_get_uarchs_count(void);
+uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void);
+uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void);
+uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void);
+uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void);
+uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void);
+
+/**
+ * Returns upper bound on cache size.
+ */
+uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void);
+
+/**
+ * Identify the logical processor that executes the current thread.
+ *
+ * There is no guarantee that the thread will stay on the same logical processor
+ * for any time. Callers should treat the result as only a hint, and be prepared
+ * to handle NULL return value.
+ */
+const struct cpuinfo_processor *CPUINFO_ABI cpuinfo_get_current_processor(void);
+
+/**
+ * Identify the core that executes the current thread.
+ *
+ * There is no guarantee that the thread will stay on the same core for any
+ * time. Callers should treat the result as only a hint, and be prepared to
+ * handle NULL return value.
+ */
+const struct cpuinfo_core *CPUINFO_ABI cpuinfo_get_current_core(void);
+
+/**
+ * Identify the microarchitecture index of the core that executes the current
+ * thread. If the system does not support such identification, the function
+ * returns 0.
+ *
+ * There is no guarantee that the thread will stay on the same type of core for
+ * any time. Callers should treat the result as only a hint.
+ */
+uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void);
+
+/**
+ * Identify the microarchitecture index of the core that executes the current
+ * thread. If the system does not support such identification, the function
+ * returns the user-specified default value.
+ *
+ * There is no guarantee that the thread will stay on the same type of core for
+ * any time. Callers should treat the result as only a hint.
+ */
+uint32_t CPUINFO_ABI
+cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* CPUINFO_H */
diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h
new file mode 100644
index 0000000000..c00cc30a31
--- /dev/null
+++ b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#if defined(__ANDROID__) && defined(__aarch64__)
+
+#include <cpuinfo.h>
+
+namespace executorch::extension::cpuinfo {
+
+uint32_t get_num_performant_cores();
+
+} // namespace executorch::extension::cpuinfo
+
+namespace torch::executorch::cpuinfo { // DEPRECATED
+// TODO(T197294990): Remove these deprecated aliases once all users have moved
+// to the new `::executorch` namespaces. Note that threadpool incorrectly used
+// the namespace `torch::executorch` instead of `torch::executor`.
+using ::executorch::extension::cpuinfo::get_num_performant_cores; // DEPRECATED
+} // namespace torch::executorch::cpuinfo
+#endif
diff --git a/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h
new file mode 100644
index 0000000000..f0dcc20a8c
--- /dev/null
+++ b/packages/react-native-executorch/third-party/include/executorch/extension/threadpool/threadpool.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+#if defined(__ANDROID__) && defined(__aarch64__)
+
+#include <functional>
+#include <memory>
+#include <mutex>
+
+#include <pthreadpool.h>
+
+namespace executorch::extension::threadpool {
+
+class ThreadPool final {
+public:
+  explicit ThreadPool(size_t thread_count = 0);
+  ~ThreadPool() = default;
+
+  // Make threadpool non copyable
+  // Non-copyable: threadpool cannot be copied because it will
+  // effectively require cloning of threadpool.
+  // Cloning can be done by just calling create_thread_pool.
+  ThreadPool(const ThreadPool &) = delete;
+  ThreadPool &operator=(const ThreadPool &) = delete;
+
+  // Make threadpool non-movable.
+  ThreadPool(ThreadPool &&) = delete;
+  ThreadPool &operator=(ThreadPool &&) = delete;
+
+  size_t get_thread_count() const;
+
+  /**
+   * INTERNAL: Resets the threadpool by creating a new threadpool with requested
+   * # of threads. This is not a thread safe call. When calling this method,
+   * threads of the threadpool might be doing some work. Some other code may
+   * also be holding on to the threadpool pointer, that is no longer valid. This
+   * is a private API, which will later be replaced by something that allows
+   * creating of threadpool with requested size and use such a threadpool with
+   * backend delegates, custom ops or optimized lib.
+   */
+  [[deprecated("This API is experimental and may change without notice.")]]
+  bool _unsafe_reset_threadpool(uint32_t num_threads);
+
+  /**
+   * Run, in parallel, function fn(task_id) over task_id in range [0, range).
+   * This function is blocking.  All input is processed by the time it returns.
+   * NoThreadPoolGuard (see threadpool_guard.h) can used to disable use of
+   * multiple threads with the scope of the guard When NoThreadPoolGuard is not
+   * used all calls to run method are serialized.
+   */
+  void run(const std::function<void(size_t)> &fn, size_t range);
+
+private:
+  friend pthreadpool_t get_pthreadpool();
+
+private:
+  // This mutex is used inside get_thread_count API but it is not really needed
+  // since data members of ThreadPool objects are not really mutable.
+  // TODO(kimishpatel): Figure out if we will allow set_num_threads API, in
+  // which case this mutex will be useful. Otherwise remove it.
+  mutable std::mutex mutex_;
+  std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)> threadpool_;
+};
+
+/**
+ * Returns the singleton instance of ThreadPool for ATen/TH multithreading.
+ */
+ThreadPool *get_threadpool();
+
+/**
+ * Returns the underlying pthreadpool instance used by the implementation of
+ * ThreadPool returned by `get_threadpool()`. Only for use in external libraries
+ * so as to unify threading across internal (i.e. ATen, etc.) and external (e.g.
+ * NNPACK, QNNPACK, XNNPACK) use cases.
+ */
+pthreadpool_t get_pthreadpool();
+
+} // namespace executorch::extension::threadpool
+
+namespace torch::executorch::threadpool { // DEPRECATED
+// TODO(T197294990): Remove these deprecated aliases once all users have moved
+// to the new `::executorch` namespaces. Note that threadpool incorrectly used
+// the namespace `torch::executorch` instead of `torch::executor`.
+using ::executorch::extension::threadpool::get_pthreadpool; // DEPRECATED
+using ::executorch::extension::threadpool::get_threadpool;  // DEPRECATED
+using ::executorch::extension::threadpool::ThreadPool;      // DEPRECATED
+} // namespace torch::executorch::threadpool
+#endif
\ No newline at end of file
diff --git a/packages/react-native-executorch/third-party/include/pthreadpool/pthreadpool.h b/packages/react-native-executorch/third-party/include/pthreadpool/pthreadpool.h
new file mode 100644
index 0000000000..42d37657fc
--- /dev/null
+++ b/packages/react-native-executorch/third-party/include/pthreadpool/pthreadpool.h
@@ -0,0 +1,2236 @@
+#ifndef PTHREADPOOL_H_
+#define PTHREADPOOL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct pthreadpool *pthreadpool_t;
+
+typedef void (*pthreadpool_task_1d_t)(void *, size_t);
+typedef void (*pthreadpool_task_1d_with_thread_t)(void *, size_t, size_t);
+typedef void (*pthreadpool_task_1d_tile_1d_t)(void *, size_t, size_t);
+typedef void (*pthreadpool_task_2d_t)(void *, size_t, size_t);
+typedef void (*pthreadpool_task_2d_with_thread_t)(void *, size_t, size_t,
+                                                  size_t);
+typedef void (*pthreadpool_task_2d_tile_1d_t)(void *, size_t, size_t, size_t);
+typedef void (*pthreadpool_task_2d_tile_2d_t)(void *, size_t, size_t, size_t,
+                                              size_t);
+typedef void (*pthreadpool_task_3d_t)(void *, size_t, size_t, size_t);
+typedef void (*pthreadpool_task_3d_tile_1d_t)(void *, size_t, size_t, size_t,
+                                              size_t);
+typedef void (*pthreadpool_task_3d_tile_1d_with_thread_t)(void *, size_t,
+                                                          size_t, size_t,
+                                                          size_t, size_t);
+typedef void (*pthreadpool_task_3d_tile_2d_t)(void *, size_t, size_t, size_t,
+                                              size_t, size_t);
+typedef void (*pthreadpool_task_4d_t)(void *, size_t, size_t, size_t, size_t);
+typedef void (*pthreadpool_task_4d_tile_1d_t)(void *, size_t, size_t, size_t,
+                                              size_t, size_t);
+typedef void (*pthreadpool_task_4d_tile_2d_t)(void *, size_t, size_t, size_t,
+                                              size_t, size_t, size_t);
+typedef void (*pthreadpool_task_5d_t)(void *, size_t, size_t, size_t, size_t,
+                                      size_t);
+typedef void (*pthreadpool_task_5d_tile_1d_t)(void *, size_t, size_t, size_t,
+                                              size_t, size_t, size_t);
+typedef void (*pthreadpool_task_5d_tile_2d_t)(void *, size_t, size_t, size_t,
+                                              size_t, size_t, size_t, size_t);
+typedef void (*pthreadpool_task_6d_t)(void *, size_t, size_t, size_t, size_t,
+                                      size_t, size_t);
+typedef void (*pthreadpool_task_6d_tile_1d_t)(void *, size_t, size_t, size_t,
+                                              size_t, size_t, size_t, size_t);
+typedef void (*pthreadpool_task_6d_tile_2d_t)(void *, size_t, size_t, size_t,
+                                              size_t, size_t, size_t, size_t,
+                                              size_t);
+
+typedef void (*pthreadpool_task_1d_with_id_t)(void *, uint32_t, size_t);
+typedef void (*pthreadpool_task_2d_tile_1d_with_id_t)(void *, uint32_t, size_t,
+                                                      size_t, size_t);
+typedef void (*pthreadpool_task_2d_tile_2d_with_id_t)(void *, uint32_t, size_t,
+                                                      size_t, size_t, size_t);
+typedef void (*pthreadpool_task_3d_tile_1d_with_id_t)(void *, uint32_t, size_t,
+                                                      size_t, size_t, size_t);
+typedef void (*pthreadpool_task_3d_tile_2d_with_id_t)(void *, uint32_t, size_t,
+                                                      size_t, size_t, size_t,
+                                                      size_t);
+typedef void (*pthreadpool_task_4d_tile_2d_with_id_t)(void *, uint32_t, size_t,
+                                                      size_t, size_t, size_t,
+                                                      size_t, size_t);
+
+typedef void (*pthreadpool_task_2d_tile_1d_with_id_with_thread_t)(
+    void *, uint32_t, size_t, size_t, size_t, size_t);
+typedef void (*pthreadpool_task_3d_tile_1d_with_id_with_thread_t)(
+    void *, uint32_t, size_t, size_t, size_t, size_t, size_t);
+
+/**
+ * Disable support for denormalized numbers to the maximum extent possible for
+ * the duration of the computation.
+ *
+ * Handling denormalized floating-point numbers is often implemented in
+ * microcode, and incurs significant performance degradation. This hint
+ * instructs the thread pool to disable support for denormalized numbers before
+ * running the computation by manipulating architecture-specific control
+ * registers, and restore the initial value of control registers after the
+ * computation is complete. The thread pool temporary disables denormalized
+ * numbers on all threads involved in the computation (i.e. the caller threads,
+ * and potentially worker threads).
+ *
+ * Disabling denormalized numbers may have a small negative effect on results'
+ * accuracy. As various architectures differ in capabilities to control
+ * processing of denormalized numbers, using this flag may also hurt results'
+ * reproducibility across different instruction set architectures.
+ */
+#define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001
+
+/**
+ * Yield worker threads to the system scheduler after the operation is finished.
+ *
+ * Force workers to use kernel wait (instead of active spin-wait by default) for
+ * new commands after this command is processed. This flag affects only the
+ * immediate next operation on this thread pool. To make the thread pool always
+ * use kernel wait, pass this flag to all parallelization functions.
+ */
+#define PTHREADPOOL_FLAG_YIELD_WORKERS 0x00000002
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Create a thread pool with the specified number of threads.
+ *
+ * @param  threads_count  the number of threads in the thread pool.
+ *    A value of 0 has special interpretation: it creates a thread pool with as
+ *    many threads as there are logical processors in the system.
+ *
+ * @returns  A pointer to an opaque thread pool object if the call is
+ *    successful, or NULL pointer if the call failed.
+ */
+pthreadpool_t pthreadpool_create(size_t threads_count);
+
+/**
+ * Query the number of threads in a thread pool.
+ *
+ * @param  threadpool  the thread pool to query.
+ *
+ * @returns  The number of threads in the thread pool.
+ */
+size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);
+
+/**
+ * Process items on a 1D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range; i++)
+ *     function(context, i);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each item.
+ * @param context     the first argument passed to the specified function.
+ * @param range       the number of items on the 1D grid to process. The
+ *    specified function will be called once for each item.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
+                                pthreadpool_task_1d_t function, void *context,
+                                size_t range, uint32_t flags);
+
+/**
+ * Process items on a 1D grid passing along the current thread id.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range; i++)
+ *     function(context, thread_index, i);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each item.
+ * @param context     the first argument passed to the specified function.
+ * @param range       the number of items on the 1D grid to process. The
+ *    specified function will be called once for each item.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_1d_with_thread(
+    pthreadpool_t threadpool, pthreadpool_task_1d_with_thread_t function,
+    void *context, size_t range, uint32_t flags);
+
+/**
+ * Process items on a 1D grid using a microarchitecture-aware task function.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   uint32_t uarch_index = cpuinfo_initialize() ?
+ *       cpuinfo_get_current_uarch_index() : default_uarch_index;
+ *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
+ *   for (size_t i = 0; i < range; i++)
+ *     function(context, uarch_index, i);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool           the thread pool to use for parallelisation. If
+ *    threadpool is NULL, all items are processed serially on the calling
+ *    thread.
+ * @param function             the function to call for each item.
+ * @param context              the first argument passed to the specified
+ *    function.
+ * @param default_uarch_index  the microarchitecture index to use when
+ *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
+ *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
+ *    max_uarch_index value.
+ * @param max_uarch_index      the maximum microarchitecture index expected by
+ *    the specified function. If the index returned by
+ *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
+ *    will be used instead. default_uarch_index can exceed max_uarch_index.
+ * @param range                the number of items on the 1D grid to process.
+ *    The specified function will be called once for each item.
+ * @param flags                a bitwise combination of zero or more optional
+ *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
+ *    PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_1d_with_uarch(
+    pthreadpool_t threadpool, pthreadpool_task_1d_with_id_t function,
+    void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
+    size_t range, uint32_t flags);
+
+/**
+ * Process items on a 1D grid with specified maximum tile size.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range; i += tile)
+ *     function(context, i, min(range - i, tile));
+ *
+ * When the call returns, all items have been processed and the thread pool is
+ * ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool,
+ *    the calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range       the number of items on the 1D grid to process.
+ * @param tile        the maximum number of items on the 1D grid to process in
+ *    one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
+                                        pthreadpool_task_1d_tile_1d_t function,
+                                        void *context, size_t range,
+                                        size_t tile, uint32_t flags);
+
+/**
+ * Process items on a 2D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       function(context, i, j);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each item.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 2D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 2D grid.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
+                                pthreadpool_task_2d_t function, void *context,
+                                size_t range_i, size_t range_j, uint32_t flags);
+
+/**
+ * Process items on a 2D grid passing along the current thread id.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       function(context, thread_index, i, j);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each item.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 2D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 2D grid.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_2d_with_thread(
+    pthreadpool_t threadpool, pthreadpool_task_2d_with_thread_t function,
+    void *context, size_t range_i, size_t range_j, uint32_t flags);
+
+/**
+ * Process items on a 2D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       function(context, i, j, min(range_j - j, tile_j));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 2D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 2D grid.
+ * @param tile_j      the maximum number of items along the second dimension of
+ *    the 2D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
+                                        pthreadpool_task_2d_tile_1d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t tile_j,
+                                        uint32_t flags);
+
+/**
+ * Process items on a 2D grid with the specified maximum tile size along the
+ * last grid dimension using a microarchitecture-aware task function.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   uint32_t uarch_index = cpuinfo_initialize() ?
+ *       cpuinfo_get_current_uarch_index() : default_uarch_index;
+ *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       function(context, uarch_index, i, j, min(range_j - j, tile_j));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param default_uarch_index  the microarchitecture index to use when
+ *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
+ *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
+ *    max_uarch_index value.
+ * @param max_uarch_index      the maximum microarchitecture index expected by
+ *    the specified function. If the index returned by
+ *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
+ *    will be used instead. default_uarch_index can exceed max_uarch_index.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 2D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 2D grid.
+ * @param tile_j      the maximum number of items along the second dimension of
+ *    the 2D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_2d_tile_1d_with_uarch(
+    pthreadpool_t threadpool, pthreadpool_task_2d_tile_1d_with_id_t function,
+    void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
+    size_t range_i, size_t range_j, size_t tile_j, uint32_t flags);
+
+/**
+ * Process items on a 2D grid with the specified maximum tile size along the
+ * last grid dimension using a microarchitecture-aware task function and passing
+ * along the current thread id.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   uint32_t uarch_index = cpuinfo_initialize() ?
+ *       cpuinfo_get_current_uarch_index() : default_uarch_index;
+ *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       function(context, uarch_index, thread_index, i, j, min(range_j - j,
+ * tile_j));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param default_uarch_index  the microarchitecture index to use when
+ *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
+ *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
+ *    max_uarch_index value.
+ * @param max_uarch_index      the maximum microarchitecture index expected by
+ *    the specified function. If the index returned by
+ *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
+ *    will be used instead. default_uarch_index can exceed max_uarch_index.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 2D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 2D grid.
+ * @param tile_j      the maximum number of items along the second dimension of
+ *    the 2D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_2d_tile_1d_with_uarch_with_thread(
+    pthreadpool_t threadpool,
+    pthreadpool_task_2d_tile_1d_with_id_with_thread_t function, void *context,
+    uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
+    size_t range_j, size_t tile_j, uint32_t flags);
+
+/**
+ * Process items on a 2D grid with the specified maximum tile size along each
+ * grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i += tile_i)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       function(context, i, j,
+ *         min(range_i - i, tile_i), min(range_j - j, tile_j));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 2D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 2D grid.
+ * @param tile_j      the maximum number of items along the first dimension of
+ *    the 2D grid to process in one function call.
+ * @param tile_j      the maximum number of items along the second dimension of
+ *    the 2D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
+                                        pthreadpool_task_2d_tile_2d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t tile_i,
+                                        size_t tile_j, uint32_t flags);
+
+/**
+ * Process items on a 2D grid with the specified maximum tile size along each
+ * grid dimension using a microarchitecture-aware task function.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   uint32_t uarch_index = cpuinfo_initialize() ?
+ *       cpuinfo_get_current_uarch_index() : default_uarch_index;
+ *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
+ *   for (size_t i = 0; i < range_i; i += tile_i)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       function(context, uarch_index, i, j,
+ *         min(range_i - i, tile_i), min(range_j - j, tile_j));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool           the thread pool to use for parallelisation. If
+ *    threadpool is NULL, all items are processed serially on the calling
+ *    thread.
+ * @param function             the function to call for each tile.
+ * @param context              the first argument passed to the specified
+ *    function.
+ * @param default_uarch_index  the microarchitecture index to use when
+ *                             pthreadpool is configured without cpuinfo,
+ *                             cpuinfo initialization failed, or index returned
+ *                             by cpuinfo_get_current_uarch_index() exceeds
+ *                             the max_uarch_index value.
+ * @param max_uarch_index      the maximum microarchitecture index expected
+ *                             by the specified function. If the index returned
+ *                             by cpuinfo_get_current_uarch_index() exceeds this
+ *                             value, default_uarch_index will be used instead.
+ *                             default_uarch_index can exceed max_uarch_index.
+ * @param range_i              the number of items to process along the first
+ *    dimension of the 2D grid.
+ * @param range_j              the number of items to process along the second
+ *    dimension of the 2D grid.
+ * @param tile_j               the maximum number of items along the first
+ *    dimension of the 2D grid to process in one function call.
+ * @param tile_j               the maximum number of items along the second
+ *    dimension of the 2D grid to process in one function call.
+ * @param flags                a bitwise combination of zero or more optional
+ *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
+ *    PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_2d_tile_2d_with_uarch(
+    pthreadpool_t threadpool, pthreadpool_task_2d_tile_2d_with_id_t function,
+    void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
+    size_t range_i, size_t range_j, size_t tile_i, size_t tile_j,
+    uint32_t flags);
+
+/**
+ * Process items on a 3D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         function(context, i, j, k);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 3D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 3D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 3D grid.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
+                                pthreadpool_task_3d_t function, void *context,
+                                size_t range_i, size_t range_j, size_t range_k,
+                                uint32_t flags);
+
+/**
+ * Process items on a 3D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         function(context, i, j, k, min(range_k - k, tile_k));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 3D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 3D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 3D grid.
+ * @param tile_k      the maximum number of items along the third dimension of
+ *    the 3D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
+                                        pthreadpool_task_3d_tile_1d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t range_k,
+                                        size_t tile_k, uint32_t flags);
+
+/**
+ * Process items on a 3D grid with the specified maximum tile size along the
+ * last grid dimension and passing along the current thread id.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         function(context, thread_index, i, j, k, min(range_k - k, tile_k));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 3D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 3D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 3D grid.
+ * @param tile_k      the maximum number of items along the third dimension of
+ *    the 3D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_3d_tile_1d_with_thread(
+    pthreadpool_t threadpool,
+    pthreadpool_task_3d_tile_1d_with_thread_t function, void *context,
+    size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
+    uint32_t flags);
+
+/**
+ * Process items on a 3D grid with the specified maximum tile size along the
+ * last grid dimension using a microarchitecture-aware task function.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   uint32_t uarch_index = cpuinfo_initialize() ?
+ *       cpuinfo_get_current_uarch_index() : default_uarch_index;
+ *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         function(context, uarch_index, i, j, k, min(range_k - k, tile_k));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool           the thread pool to use for parallelisation. If
+ *    threadpool is NULL, all items are processed serially on the calling
+ *    thread.
+ * @param function             the function to call for each tile.
+ * @param context              the first argument passed to the specified
+ *    function.
+ * @param default_uarch_index  the microarchitecture index to use when
+ *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
+ *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
+ *    max_uarch_index value.
+ * @param max_uarch_index      the maximum microarchitecture index expected by
+ *    the specified function. If the index returned by
+ *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
+ *    will be used instead. default_uarch_index can exceed max_uarch_index.
+ * @param range_i              the number of items to process along the first
+ *    dimension of the 3D grid.
+ * @param range_j              the number of items to process along the second
+ *    dimension of the 3D grid.
+ * @param range_k              the number of items to process along the third
+ *    dimension of the 3D grid.
+ * @param tile_k               the maximum number of items along the third
+ *    dimension of the 3D grid to process in one function call.
+ * @param flags                a bitwise combination of zero or more optional
+ *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
+ *    PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_3d_tile_1d_with_uarch(
+    pthreadpool_t threadpool, pthreadpool_task_3d_tile_1d_with_id_t function,
+    void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
+    size_t range_i, size_t range_j, size_t range_k, size_t tile_k,
+    uint32_t flags);
+
+/**
+ * Process items on a 3D grid with the specified maximum tile size along the
+ * last grid dimension using a microarchitecture-aware task function and passing
+ * along the current thread id.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   uint32_t uarch_index = cpuinfo_initialize() ?
+ *       cpuinfo_get_current_uarch_index() : default_uarch_index;
+ *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         function(context, uarch_index, thread_index, i, j, k, min(range_k -
+ * k, tile_k));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool           the thread pool to use for parallelisation. If
+ *    threadpool is NULL, all items are processed serially on the calling
+ *    thread.
+ * @param function             the function to call for each tile.
+ * @param context              the first argument passed to the specified
+ *    function.
+ * @param default_uarch_index  the microarchitecture index to use when
+ *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
+ *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
+ *    max_uarch_index value.
+ * @param max_uarch_index      the maximum microarchitecture index expected by
+ *    the specified function. If the index returned by
+ *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
+ *    will be used instead. default_uarch_index can exceed max_uarch_index.
+ * @param range_i              the number of items to process along the first
+ *    dimension of the 3D grid.
+ * @param range_j              the number of items to process along the second
+ *    dimension of the 3D grid.
+ * @param range_k              the number of items to process along the third
+ *    dimension of the 3D grid.
+ * @param tile_k               the maximum number of items along the third
+ *    dimension of the 3D grid to process in one function call.
+ * @param flags                a bitwise combination of zero or more optional
+ *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
+ *    PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_3d_tile_1d_with_uarch_with_thread(
+    pthreadpool_t threadpool,
+    pthreadpool_task_3d_tile_1d_with_id_with_thread_t function, void *context,
+    uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i,
+    size_t range_j, size_t range_k, size_t tile_k, uint32_t flags);
+
+/**
+ * Process items on a 3D grid with the specified maximum tile size along the
+ * last two grid dimensions.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         function(context, i, j, k,
+ *           min(range_j - j, tile_j), min(range_k - k, tile_k));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 3D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 3D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 3D grid.
+ * @param tile_j      the maximum number of items along the second dimension of
+ *    the 3D grid to process in one function call.
+ * @param tile_k      the maximum number of items along the third dimension of
+ *    the 3D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
+                                        pthreadpool_task_3d_tile_2d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t range_k,
+                                        size_t tile_j, size_t tile_k,
+                                        uint32_t flags);
+
+/**
+ * Process items on a 3D grid with the specified maximum tile size along the
+ * last two grid dimensions using a microarchitecture-aware task function.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   uint32_t uarch_index = cpuinfo_initialize() ?
+ *       cpuinfo_get_current_uarch_index() : default_uarch_index;
+ *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         function(context, uarch_index, i, j, k,
+ *           min(range_j - j, tile_j), min(range_k - k, tile_k));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool           the thread pool to use for parallelisation. If
+ *    threadpool is NULL, all items are processed serially on the calling
+ *    thread.
+ * @param function             the function to call for each tile.
+ * @param context              the first argument passed to the specified
+ *    function.
+ * @param default_uarch_index  the microarchitecture index to use when
+ *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
+ *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
+ *    max_uarch_index value.
+ * @param max_uarch_index      the maximum microarchitecture index expected by
+ *    the specified function. If the index returned by
+ *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
+ *    will be used instead. default_uarch_index can exceed max_uarch_index.
+ * @param range_i              the number of items to process along the first
+ *    dimension of the 3D grid.
+ * @param range_j              the number of items to process along the second
+ *    dimension of the 3D grid.
+ * @param range_k              the number of items to process along the third
+ *    dimension of the 3D grid.
+ * @param tile_j               the maximum number of items along the second
+ *    dimension of the 3D grid to process in one function call.
+ * @param tile_k               the maximum number of items along the third
+ *    dimension of the 3D grid to process in one function call.
+ * @param flags                a bitwise combination of zero or more optional
+ *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
+ *    PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_3d_tile_2d_with_uarch(
+    pthreadpool_t threadpool, pthreadpool_task_3d_tile_2d_with_id_t function,
+    void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
+    size_t range_i, size_t range_j, size_t range_k, size_t tile_j,
+    size_t tile_k, uint32_t flags);
+
+/**
+ * Process items on a 4D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           function(context, i, j, k, l);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 4D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 4D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 4D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 4D grid.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
+                                pthreadpool_task_4d_t function, void *context,
+                                size_t range_i, size_t range_j, size_t range_k,
+                                size_t range_l, uint32_t flags);
+
+/**
+ * Process items on a 4D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l += tile_l)
+ *           function(context, i, j, k, l, min(range_l - l, tile_l));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 4D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 4D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 4D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 4D grid.
+ * @param tile_l      the maximum number of items along the fourth dimension of
+ *    the 4D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
+                                        pthreadpool_task_4d_tile_1d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t range_k,
+                                        size_t range_l, size_t tile_l,
+                                        uint32_t flags);
+
+/**
+ * Process items on a 4D grid with the specified maximum tile size along the
+ * last two grid dimensions.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         for (size_t l = 0; l < range_l; l += tile_l)
+ *           function(context, i, j, k, l,
+ *             min(range_k - k, tile_k), min(range_l - l, tile_l));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 4D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 4D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 4D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 4D grid.
+ * @param tile_k      the maximum number of items along the third dimension of
+ *    the 4D grid to process in one function call.
+ * @param tile_l      the maximum number of items along the fourth dimension of
+ *    the 4D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
+                                        pthreadpool_task_4d_tile_2d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t range_k,
+                                        size_t range_l, size_t tile_k,
+                                        size_t tile_l, uint32_t flags);
+
+/**
+ * Process items on a 4D grid with the specified maximum tile size along the
+ * last two grid dimensions using a microarchitecture-aware task function.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   uint32_t uarch_index = cpuinfo_initialize() ?
+ *       cpuinfo_get_current_uarch_index() : default_uarch_index;
+ *   if (uarch_index > max_uarch_index) uarch_index = default_uarch_index;
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         for (size_t l = 0; l < range_l; l += tile_l)
+ *           function(context, uarch_index, i, j, k, l,
+ *             min(range_k - k, tile_k), min(range_l - l, tile_l));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool           the thread pool to use for parallelisation. If
+ *    threadpool is NULL, all items are processed serially on the calling
+ *    thread.
+ * @param function             the function to call for each tile.
+ * @param context              the first argument passed to the specified
+ *    function.
+ * @param default_uarch_index  the microarchitecture index to use when
+ *    pthreadpool is configured without cpuinfo, cpuinfo initialization failed,
+ *    or index returned by cpuinfo_get_current_uarch_index() exceeds the
+ *    max_uarch_index value.
+ * @param max_uarch_index      the maximum microarchitecture index expected by
+ *    the specified function. If the index returned by
+ *    cpuinfo_get_current_uarch_index() exceeds this value, default_uarch_index
+ *    will be used instead. default_uarch_index can exceed max_uarch_index.
+ * @param range_i              the number of items to process along the first
+ *    dimension of the 4D grid.
+ * @param range_j              the number of items to process along the second
+ *    dimension of the 4D grid.
+ * @param range_k              the number of items to process along the third
+ *    dimension of the 4D grid.
+ * @param range_l              the number of items to process along the fourth
+ *    dimension of the 4D grid.
+ * @param tile_k               the maximum number of items along the third
+ *    dimension of the 4D grid to process in one function call.
+ * @param tile_l               the maximum number of items along the fourth
+ *    dimension of the 4D grid to process in one function call.
+ * @param flags                a bitwise combination of zero or more optional
+ *    flags (PTHREADPOOL_FLAG_DISABLE_DENORMALS or
+ *    PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_4d_tile_2d_with_uarch(
+    pthreadpool_t threadpool, pthreadpool_task_4d_tile_2d_with_id_t function,
+    void *context, uint32_t default_uarch_index, uint32_t max_uarch_index,
+    size_t range_i, size_t range_j, size_t range_k, size_t range_l,
+    size_t tile_k, size_t tile_l, uint32_t flags);
+
+/**
+ * Process items on a 5D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m++)
+ *             function(context, i, j, k, l, m);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 5D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 5D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 5D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 5D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 5D grid.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_5d(pthreadpool_t threadpool,
+                                pthreadpool_task_5d_t function, void *context,
+                                size_t range_i, size_t range_j, size_t range_k,
+                                size_t range_l, size_t range_m, uint32_t flags);
+
+/**
+ * Process items on a 5D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m += tile_m)
+ *             function(context, i, j, k, l, m, min(range_m - m, tile_m));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 5D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 5D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 5D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 5D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 5D grid.
+ * @param tile_m      the maximum number of items along the fifth dimension of
+ *    the 5D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
+                                        pthreadpool_task_5d_tile_1d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t range_k,
+                                        size_t range_l, size_t range_m,
+                                        size_t tile_m, uint32_t flags);
+
+/**
+ * Process items on a 5D grid with the specified maximum tile size along the
+ * last two grid dimensions.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l += tile_l)
+ *           for (size_t m = 0; m < range_m; m += tile_m)
+ *             function(context, i, j, k, l, m,
+ *               min(range_l - l, tile_l), min(range_m - m, tile_m));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 5D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 5D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 5D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 5D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 5D grid.
+ * @param tile_l      the maximum number of items along the fourth dimension of
+ *    the 5D grid to process in one function call.
+ * @param tile_m      the maximum number of items along the fifth dimension of
+ *    the 5D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
+                                        pthreadpool_task_5d_tile_2d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t range_k,
+                                        size_t range_l, size_t range_m,
+                                        size_t tile_l, size_t tile_m,
+                                        uint32_t flags);
+
+/**
+ * Process items on a 6D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m++)
+ *             for (size_t n = 0; n < range_n; n++)
+ *               function(context, i, j, k, l, m, n);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 6D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 6D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 6D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 6D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 6D grid.
+ * @param range_n     the number of items to process along the sixth dimension
+ *    of the 6D grid.
+ * @param tile_n      the maximum number of items along the sixth dimension of
+ *    the 6D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
+                                pthreadpool_task_6d_t function, void *context,
+                                size_t range_i, size_t range_j, size_t range_k,
+                                size_t range_l, size_t range_m, size_t range_n,
+                                uint32_t flags);
+
+/**
+ * Process items on a 6D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m++)
+ *             for (size_t n = 0; n < range_n; n += tile_n)
+ *               function(context, i, j, k, l, m, n, min(range_n - n, tile_n));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 6D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 6D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 6D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 6D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 6D grid.
+ * @param range_n     the number of items to process along the sixth dimension
+ *    of the 6D grid.
+ * @param tile_n      the maximum number of items along the sixth dimension of
+ *    the 6D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
+                                        pthreadpool_task_6d_tile_1d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t range_k,
+                                        size_t range_l, size_t range_m,
+                                        size_t range_n, size_t tile_n,
+                                        uint32_t flags);
+
+/**
+ * Process items on a 6D grid with the specified maximum tile size along the
+ * last two grid dimensions.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m += tile_m)
+ *             for (size_t n = 0; n < range_n; n += tile_n)
+ *               function(context, i, j, k, l, m, n,
+ *                 min(range_m - m, tile_m), min(range_n - n, tile_n));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param function    the function to call for each tile.
+ * @param context     the first argument passed to the specified function.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 6D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 6D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 6D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 6D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 6D grid.
+ * @param range_n     the number of items to process along the sixth dimension
+ *    of the 6D grid.
+ * @param tile_m      the maximum number of items along the fifth dimension of
+ *    the 6D grid to process in one function call.
+ * @param tile_n      the maximum number of items along the sixth dimension of
+ *    the 6D grid to process in one function call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+void pthreadpool_parallelize_6d_tile_2d(pthreadpool_t threadpool,
+                                        pthreadpool_task_6d_tile_2d_t function,
+                                        void *context, size_t range_i,
+                                        size_t range_j, size_t range_k,
+                                        size_t range_l, size_t range_m,
+                                        size_t range_n, size_t tile_m,
+                                        size_t tile_n, uint32_t flags);
+
+/**
+ * Terminates threads in the thread pool and releases associated resources.
+ *
+ * @warning  Accessing the thread pool after a call to this function constitutes
+ *    undefined behaviour and may cause data corruption.
+ *
+ * @param[in,out]  threadpool  The thread pool to destroy.
+ */
+void pthreadpool_destroy(pthreadpool_t threadpool);
+
+#ifndef PTHREADPOOL_NO_DEPRECATED_API
+
+/* Legacy API for compatibility with pre-existing users (e.g. NNPACK) */
+#if defined(__GNUC__)
+#define PTHREADPOOL_DEPRECATED __attribute__((__deprecated__))
+#else
+#define PTHREADPOOL_DEPRECATED
+#endif
+
+typedef void (*pthreadpool_function_1d_t)(void *, size_t);
+typedef void (*pthreadpool_function_1d_tiled_t)(void *, size_t, size_t);
+typedef void (*pthreadpool_function_2d_t)(void *, size_t, size_t);
+typedef void (*pthreadpool_function_2d_tiled_t)(void *, size_t, size_t, size_t,
+                                                size_t);
+typedef void (*pthreadpool_function_3d_tiled_t)(void *, size_t, size_t, size_t,
+                                                size_t, size_t, size_t);
+typedef void (*pthreadpool_function_4d_tiled_t)(void *, size_t, size_t, size_t,
+                                                size_t, size_t, size_t, size_t,
+                                                size_t);
+
+void pthreadpool_compute_1d(pthreadpool_t threadpool,
+                            pthreadpool_function_1d_t function, void *argument,
+                            size_t range) PTHREADPOOL_DEPRECATED;
+
+void pthreadpool_compute_1d_tiled(pthreadpool_t threadpool,
+                                  pthreadpool_function_1d_tiled_t function,
+                                  void *argument, size_t range,
+                                  size_t tile) PTHREADPOOL_DEPRECATED;
+
+void pthreadpool_compute_2d(pthreadpool_t threadpool,
+                            pthreadpool_function_2d_t function, void *argument,
+                            size_t range_i,
+                            size_t range_j) PTHREADPOOL_DEPRECATED;
+
+void pthreadpool_compute_2d_tiled(pthreadpool_t threadpool,
+                                  pthreadpool_function_2d_tiled_t function,
+                                  void *argument, size_t range_i,
+                                  size_t range_j, size_t tile_i,
+                                  size_t tile_j) PTHREADPOOL_DEPRECATED;
+
+void pthreadpool_compute_3d_tiled(pthreadpool_t threadpool,
+                                  pthreadpool_function_3d_tiled_t function,
+                                  void *argument, size_t range_i,
+                                  size_t range_j, size_t range_k, size_t tile_i,
+                                  size_t tile_j,
+                                  size_t tile_k) PTHREADPOOL_DEPRECATED;
+
+void pthreadpool_compute_4d_tiled(pthreadpool_t threadpool,
+                                  pthreadpool_function_4d_tiled_t function,
+                                  void *argument, size_t range_i,
+                                  size_t range_j, size_t range_k,
+                                  size_t range_l, size_t tile_i, size_t tile_j,
+                                  size_t tile_k,
+                                  size_t tile_l) PTHREADPOOL_DEPRECATED;
+
+#endif /* PTHREADPOOL_NO_DEPRECATED_API */
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#ifdef __cplusplus
+
+namespace libpthreadpool {
+namespace detail {
+namespace {
+
+template <class T> void call_wrapper_1d(void *arg, size_t i) {
+  (*static_cast<const T *>(arg))(i);
+}
+
+template <class T>
+void call_wrapper_1d_tile_1d(void *arg, size_t range_i, size_t tile_i) {
+  (*static_cast<const T *>(arg))(range_i, tile_i);
+}
+
+template <class T> void call_wrapper_2d(void *functor, size_t i, size_t j) {
+  (*static_cast<const T *>(functor))(i, j);
+}
+
+template <class T>
+void call_wrapper_2d_tile_1d(void *functor, size_t i, size_t range_j,
+                             size_t tile_j) {
+  (*static_cast<const T *>(functor))(i, range_j, tile_j);
+}
+
+template <class T>
+void call_wrapper_2d_tile_2d(void *functor, size_t range_i, size_t range_j,
+                             size_t tile_i, size_t tile_j) {
+  (*static_cast<const T *>(functor))(range_i, range_j, tile_i, tile_j);
+}
+
+template <class T>
+void call_wrapper_3d(void *functor, size_t i, size_t j, size_t k) {
+  (*static_cast<const T *>(functor))(i, j, k);
+}
+
+template <class T>
+void call_wrapper_3d_tile_1d(void *functor, size_t i, size_t j, size_t range_k,
+                             size_t tile_k) {
+  (*static_cast<const T *>(functor))(i, j, range_k, tile_k);
+}
+
+template <class T>
+void call_wrapper_3d_tile_2d(void *functor, size_t i, size_t range_j,
+                             size_t range_k, size_t tile_j, size_t tile_k) {
+  (*static_cast<const T *>(functor))(i, range_j, range_k, tile_j, tile_k);
+}
+
+template <class T>
+void call_wrapper_4d(void *functor, size_t i, size_t j, size_t k, size_t l) {
+  (*static_cast<const T *>(functor))(i, j, k, l);
+}
+
+template <class T>
+void call_wrapper_4d_tile_1d(void *functor, size_t i, size_t j, size_t k,
+                             size_t range_l, size_t tile_l) {
+  (*static_cast<const T *>(functor))(i, j, k, range_l, tile_l);
+}
+
+template <class T>
+void call_wrapper_4d_tile_2d(void *functor, size_t i, size_t j, size_t range_k,
+                             size_t range_l, size_t tile_k, size_t tile_l) {
+  (*static_cast<const T *>(functor))(i, j, range_k, range_l, tile_k, tile_l);
+}
+
+template <class T>
+void call_wrapper_5d(void *functor, size_t i, size_t j, size_t k, size_t l,
+                     size_t m) {
+  (*static_cast<const T *>(functor))(i, j, k, l, m);
+}
+
+template <class T>
+void call_wrapper_5d_tile_1d(void *functor, size_t i, size_t j, size_t k,
+                             size_t l, size_t range_m, size_t tile_m) {
+  (*static_cast<const T *>(functor))(i, j, k, l, range_m, tile_m);
+}
+
+template <class T>
+void call_wrapper_5d_tile_2d(void *functor, size_t i, size_t j, size_t k,
+                             size_t range_l, size_t range_m, size_t tile_l,
+                             size_t tile_m) {
+  (*static_cast<const T *>(functor))(i, j, k, range_l, range_m, tile_l, tile_m);
+}
+
+template <class T>
+void call_wrapper_6d(void *functor, size_t i, size_t j, size_t k, size_t l,
+                     size_t m, size_t n) {
+  (*static_cast<const T *>(functor))(i, j, k, l, m, n);
+}
+
+template <class T>
+void call_wrapper_6d_tile_1d(void *functor, size_t i, size_t j, size_t k,
+                             size_t l, size_t m, size_t range_n,
+                             size_t tile_n) {
+  (*static_cast<const T *>(functor))(i, j, k, l, m, range_n, tile_n);
+}
+
+template <class T>
+void call_wrapper_6d_tile_2d(void *functor, size_t i, size_t j, size_t k,
+                             size_t l, size_t range_m, size_t range_n,
+                             size_t tile_m, size_t tile_n) {
+  (*static_cast<const T *>(functor))(i, j, k, l, range_m, range_n, tile_m,
+                                     tile_n);
+}
+
+} /* namespace */
+} /* namespace detail */
+} /* namespace libpthreadpool */
+
+/**
+ * Process items on a 1D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range; i++)
+ *     functor(i);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each item.
+ * @param range       the number of items on the 1D grid to process. The
+ *    specified functor will be called once for each item.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_1d(pthreadpool_t threadpool,
+                                       const T &functor, size_t range,
+                                       uint32_t flags = 0) {
+  pthreadpool_parallelize_1d(
+      threadpool, &libpthreadpool::detail::call_wrapper_1d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range, flags);
+}
+
+/**
+ * Process items on a 1D grid with specified maximum tile size.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range; i += tile)
+ *     functor(i, min(range - i, tile));
+ *
+ * When the call returns, all items have been processed and the thread pool is
+ * ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool,
+ *    the calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range       the number of items on the 1D grid to process.
+ * @param tile        the maximum number of items on the 1D grid to process in
+ *    one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range,
+                                               size_t tile,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_1d_tile_1d(
+      threadpool, &libpthreadpool::detail::call_wrapper_1d_tile_1d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range, tile,
+      flags);
+}
+
+/**
+ * Process items on a 2D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       functor(i, j);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each item.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 2D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 2D grid.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_2d(pthreadpool_t threadpool,
+                                       const T &functor, size_t range_i,
+                                       size_t range_j, uint32_t flags = 0) {
+  pthreadpool_parallelize_2d(
+      threadpool, &libpthreadpool::detail::call_wrapper_2d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      flags);
+}
+
+/**
+ * Process items on a 2D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       functor(i, j, min(range_j - j, tile_j));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 2D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 2D grid.
+ * @param tile_j      the maximum number of items along the second dimension of
+ *    the 2D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range_i,
+                                               size_t range_j, size_t tile_j,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_2d_tile_1d(
+      threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_1d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      tile_j, flags);
+}
+
+/**
+ * Process items on a 2D grid with the specified maximum tile size along each
+ * grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i += tile_i)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       functor(i, j,
+ *         min(range_i - i, tile_i), min(range_j - j, tile_j));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 2D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 2D grid.
+ * @param tile_j      the maximum number of items along the first dimension of
+ *    the 2D grid to process in one functor call.
+ * @param tile_j      the maximum number of items along the second dimension of
+ *    the 2D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range_i,
+                                               size_t range_j, size_t tile_i,
+                                               size_t tile_j,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_2d_tile_2d(
+      threadpool, &libpthreadpool::detail::call_wrapper_2d_tile_2d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      tile_i, tile_j, flags);
+}
+
+/**
+ * Process items on a 3D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         functor(i, j, k);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 3D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 3D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 3D grid.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_3d(pthreadpool_t threadpool,
+                                       const T &functor, size_t range_i,
+                                       size_t range_j, size_t range_k,
+                                       uint32_t flags = 0) {
+  pthreadpool_parallelize_3d(
+      threadpool, &libpthreadpool::detail::call_wrapper_3d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, flags);
+}
+
+/**
+ * Process items on a 3D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         functor(i, j, k, min(range_k - k, tile_k));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 3D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 3D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 3D grid.
+ * @param tile_k      the maximum number of items along the third dimension of
+ *    the 3D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range_i,
+                                               size_t range_j, size_t range_k,
+                                               size_t tile_k,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_3d_tile_1d(
+      threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_1d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, tile_k, flags);
+}
+
+/**
+ * Process items on a 3D grid with the specified maximum tile size along the
+ * last two grid dimensions.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j += tile_j)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         functor(i, j, k,
+ *           min(range_j - j, tile_j), min(range_k - k, tile_k));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 3D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 3D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 3D grid.
+ * @param tile_j      the maximum number of items along the second dimension of
+ *    the 3D grid to process in one functor call.
+ * @param tile_k      the maximum number of items along the third dimension of
+ *    the 3D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range_i,
+                                               size_t range_j, size_t range_k,
+                                               size_t tile_j, size_t tile_k,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_3d_tile_2d(
+      threadpool, &libpthreadpool::detail::call_wrapper_3d_tile_2d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, tile_j, tile_k, flags);
+}
+
+/**
+ * Process items on a 4D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           functor(i, j, k, l);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 4D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 4D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 4D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 4D grid.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_4d(pthreadpool_t threadpool,
+                                       const T &functor, size_t range_i,
+                                       size_t range_j, size_t range_k,
+                                       size_t range_l, uint32_t flags = 0) {
+  pthreadpool_parallelize_4d(
+      threadpool, &libpthreadpool::detail::call_wrapper_4d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, range_l, flags);
+}
+
+/**
+ * Process items on a 4D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l += tile_l)
+ *           functor(i, j, k, l, min(range_l - l, tile_l));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 4D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 4D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 4D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 4D grid.
+ * @param tile_l      the maximum number of items along the fourth dimension of
+ *    the 4D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range_i,
+                                               size_t range_j, size_t range_k,
+                                               size_t range_l, size_t tile_l,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_4d_tile_1d(
+      threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_1d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, range_l, tile_l, flags);
+}
+
+/**
+ * Process items on a 4D grid with the specified maximum tile size along the
+ * last two grid dimensions.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k += tile_k)
+ *         for (size_t l = 0; l < range_l; l += tile_l)
+ *           functor(i, j, k, l,
+ *             min(range_k - k, tile_k), min(range_l - l, tile_l));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 4D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 4D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 4D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 4D grid.
+ * @param tile_k      the maximum number of items along the third dimension of
+ *    the 4D grid to process in one functor call.
+ * @param tile_l      the maximum number of items along the fourth dimension of
+ *    the 4D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range_i,
+                                               size_t range_j, size_t range_k,
+                                               size_t range_l, size_t tile_k,
+                                               size_t tile_l,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_4d_tile_2d(
+      threadpool, &libpthreadpool::detail::call_wrapper_4d_tile_2d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, range_l, tile_k, tile_l, flags);
+}
+
+/**
+ * Process items on a 5D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m++)
+ *             functor(i, j, k, l, m);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 5D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 5D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 5D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 5D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 5D grid.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void
+pthreadpool_parallelize_5d(pthreadpool_t threadpool, const T &functor,
+                           size_t range_i, size_t range_j, size_t range_k,
+                           size_t range_l, size_t range_m, uint32_t flags = 0) {
+  pthreadpool_parallelize_5d(
+      threadpool, &libpthreadpool::detail::call_wrapper_5d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, range_l, range_m, flags);
+}
+
+/**
+ * Process items on a 5D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m += tile_m)
+ *             functor(i, j, k, l, m, min(range_m - m, tile_m));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 5D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 5D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 5D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 5D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 5D grid.
+ * @param tile_m      the maximum number of items along the fifth dimension of
+ *    the 5D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range_i,
+                                               size_t range_j, size_t range_k,
+                                               size_t range_l, size_t range_m,
+                                               size_t tile_m,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_5d_tile_1d(
+      threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_1d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, range_l, range_m, tile_m, flags);
+}
+
+/**
+ * Process items on a 5D grid with the specified maximum tile size along the
+ * last two grid dimensions.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l += tile_l)
+ *           for (size_t m = 0; m < range_m; m += tile_m)
+ *             functor(i, j, k, l, m,
+ *               min(range_l - l, tile_l), min(range_m - m, tile_m));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 5D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 5D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 5D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 5D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 5D grid.
+ * @param tile_l      the maximum number of items along the fourth dimension of
+ *    the 5D grid to process in one functor call.
+ * @param tile_m      the maximum number of items along the fifth dimension of
+ *    the 5D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range_i,
+                                               size_t range_j, size_t range_k,
+                                               size_t range_l, size_t range_m,
+                                               size_t tile_l, size_t tile_m,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_5d_tile_2d(
+      threadpool, &libpthreadpool::detail::call_wrapper_5d_tile_2d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, range_l, range_m, tile_l, tile_m, flags);
+}
+
+/**
+ * Process items on a 6D grid.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m++)
+ *             for (size_t n = 0; n < range_n; n++)
+ *               functor(i, j, k, l, m, n);
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 6D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 6D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 6D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 6D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 6D grid.
+ * @param range_n     the number of items to process along the sixth dimension
+ *    of the 6D grid.
+ * @param tile_n      the maximum number of items along the sixth dimension of
+ *    the 6D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_6d(pthreadpool_t threadpool,
+                                       const T &functor, size_t range_i,
+                                       size_t range_j, size_t range_k,
+                                       size_t range_l, size_t range_m,
+                                       size_t range_n, uint32_t flags = 0) {
+  pthreadpool_parallelize_6d(
+      threadpool, &libpthreadpool::detail::call_wrapper_6d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, range_l, range_m, range_n, flags);
+}
+
+/**
+ * Process items on a 6D grid with the specified maximum tile size along the
+ * last grid dimension.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m++)
+ *             for (size_t n = 0; n < range_n; n += tile_n)
+ *               functor(i, j, k, l, m, n, min(range_n - n, tile_n));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 6D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 6D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 6D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 6D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 6D grid.
+ * @param range_n     the number of items to process along the sixth dimension
+ *    of the 6D grid.
+ * @param tile_n      the maximum number of items along the sixth dimension of
+ *    the 6D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool,
+                                               const T &functor, size_t range_i,
+                                               size_t range_j, size_t range_k,
+                                               size_t range_l, size_t range_m,
+                                               size_t range_n, size_t tile_n,
+                                               uint32_t flags = 0) {
+  pthreadpool_parallelize_6d_tile_1d(
+      threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_1d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, range_l, range_m, range_n, tile_n, flags);
+}
+
+/**
+ * Process items on a 6D grid with the specified maximum tile size along the
+ * last two grid dimensions.
+ *
+ * The function implements a parallel version of the following snippet:
+ *
+ *   for (size_t i = 0; i < range_i; i++)
+ *     for (size_t j = 0; j < range_j; j++)
+ *       for (size_t k = 0; k < range_k; k++)
+ *         for (size_t l = 0; l < range_l; l++)
+ *           for (size_t m = 0; m < range_m; m += tile_m)
+ *             for (size_t n = 0; n < range_n; n += tile_n)
+ *               functor(i, j, k, l, m, n,
+ *                 min(range_m - m, tile_m), min(range_n - n, tile_n));
+ *
+ * When the function returns, all items have been processed and the thread pool
+ * is ready for a new task.
+ *
+ * @note If multiple threads call this function with the same thread pool, the
+ *    calls are serialized.
+ *
+ * @param threadpool  the thread pool to use for parallelisation. If threadpool
+ *    is NULL, all items are processed serially on the calling thread.
+ * @param functor     the functor to call for each tile.
+ * @param range_i     the number of items to process along the first dimension
+ *    of the 6D grid.
+ * @param range_j     the number of items to process along the second dimension
+ *    of the 6D grid.
+ * @param range_k     the number of items to process along the third dimension
+ *    of the 6D grid.
+ * @param range_l     the number of items to process along the fourth dimension
+ *    of the 6D grid.
+ * @param range_m     the number of items to process along the fifth dimension
+ *    of the 6D grid.
+ * @param range_n     the number of items to process along the sixth dimension
+ *    of the 6D grid.
+ * @param tile_m      the maximum number of items along the fifth dimension of
+ *    the 6D grid to process in one functor call.
+ * @param tile_n      the maximum number of items along the sixth dimension of
+ *    the 6D grid to process in one functor call.
+ * @param flags       a bitwise combination of zero or more optional flags
+ *    (PTHREADPOOL_FLAG_DISABLE_DENORMALS or PTHREADPOOL_FLAG_YIELD_WORKERS)
+ */
+template <class T>
+inline void pthreadpool_parallelize_6d_tile_2d(
+    pthreadpool_t threadpool, const T &functor, size_t range_i, size_t range_j,
+    size_t range_k, size_t range_l, size_t range_m, size_t range_n,
+    size_t tile_m, size_t tile_n, uint32_t flags = 0) {
+  pthreadpool_parallelize_6d_tile_2d(
+      threadpool, &libpthreadpool::detail::call_wrapper_6d_tile_2d<const T>,
+      const_cast<void *>(static_cast<const void *>(&functor)), range_i, range_j,
+      range_k, range_l, range_m, range_n, tile_m, tile_n, flags);
+}
+
+#endif /* __cplusplus */
+
+#endif /* PTHREADPOOL_H_ */
diff --git a/packages/react-native-executorch/third-party/include/tokenizers-cpp/tokenizers_c.h b/packages/react-native-executorch/third-party/include/tokenizers-cpp/tokenizers_c.h
deleted file mode 100644
index 42a59e94e5..0000000000
--- a/packages/react-native-executorch/third-party/include/tokenizers-cpp/tokenizers_c.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*!
- *  Copyright (c) 2023 by Contributors
- * \file tokenizers_c.h
- * \brief C binding to tokenizers rust library
- */
-#ifndef TOKENIZERS_C_H_
-#define TOKENIZERS_C_H_
-
-// The C API
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stddef.h>
-#include <stdint.h>
-
-typedef void *TokenizerHandle;
-
-typedef struct {
-  int *token_ids;
-  size_t len;
-} TokenizerEncodeResult;
-
-TokenizerHandle tokenizers_new_from_str(const char *json, size_t len);
-
-TokenizerHandle byte_level_bpe_tokenizers_new_from_str(
-    const char *vocab, size_t vocab_len, const char *merges, size_t merges_len,
-    const char *added_tokens, size_t added_tokens_len);
-
-void tokenizers_encode(TokenizerHandle handle, const char *data, size_t len,
-                       int add_special_token, TokenizerEncodeResult *result);
-
-void tokenizers_encode_batch(TokenizerHandle handle, const char **data,
-                             size_t *len, size_t num_seqs,
-                             int add_special_token,
-                             TokenizerEncodeResult *results);
-
-void tokenizers_free_encode_results(TokenizerEncodeResult *results,
-                                    size_t num_seqs);
-
-void tokenizers_decode(TokenizerHandle handle, const uint32_t *data, size_t len,
-                       int skip_special_token);
-
-void tokenizers_get_decode_str(TokenizerHandle handle, const char **data,
-                               size_t *len);
-
-void tokenizers_get_vocab_size(TokenizerHandle handle, size_t *size);
-
-void tokenizers_id_to_token(TokenizerHandle handle, uint32_t id,
-                            const char **data, size_t *len);
-
-// tokenizers_token_to_id stores -1 to *id if the token is not in the vocab
-void tokenizers_token_to_id(TokenizerHandle handle, const char *token,
-                            size_t len, int32_t *id);
-
-void tokenizers_free(TokenizerHandle handle);
-
-#ifdef __cplusplus
-}
-#endif
-#endif // TOKENIZERS_C_H_
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj
index bfa259f667..eedabc07e0 100644
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj
+++ b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.pbxproj
@@ -21,31 +21,7 @@
 		5576B4B72CEF9709005027B7 /* ETModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 5576B4B62CEF9705005027B7 /* ETModel.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		5576B4B92CEF970E005027B7 /* ETModel.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5576B4B82CEF970C005027B7 /* ETModel.mm */; };
 		558699BB2D8AD562004180E5 /* re2.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5576B49E2CEF76CC005027B7 /* re2.xcframework */; };
-		55A815F22D9D40680075A106 /* irunner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55A815F12D9D40680075A106 /* irunner.h */; };
 		55DEEA382D05ABBB004422A3 /* InputType.h in Headers */ = {isa = PBXBuildFile; fileRef = 55DEEA372D05ABB4004422A3 /* InputType.h */; };
-		55DEEBF02D8C45960033DBBA /* HuggingFaceTokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 55DEEBEF2D8C458F0033DBBA /* HuggingFaceTokenizer.h */; settings = {ATTRIBUTES = (Public, ); }; };
-		55DEEBF22D8C459A0033DBBA /* HuggingFaceTokenizer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 55DEEBF12D8C45990033DBBA /* HuggingFaceTokenizer.mm */; };
-		55EA2C3F2CB90C7A004315B3 /* runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C262CB90C7A004315B3 /* runner.cpp */; };
-		55EA2C402CB90C7A004315B3 /* runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C272CB90C7A004315B3 /* runner.h */; };
-		55EA2C412CB90C7A004315B3 /* stats.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C282CB90C7A004315B3 /* stats.h */; };
-		55EA2C422CB90C7A004315B3 /* text_decoder_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C292CB90C7A004315B3 /* text_decoder_runner.cpp */; };
-		55EA2C432CB90C7A004315B3 /* text_decoder_runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2A2CB90C7A004315B3 /* text_decoder_runner.h */; };
-		55EA2C442CB90C7A004315B3 /* text_prefiller.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C2B2CB90C7A004315B3 /* text_prefiller.cpp */; };
-		55EA2C452CB90C7A004315B3 /* text_prefiller.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2C2CB90C7A004315B3 /* text_prefiller.h */; };
-		55EA2C462CB90C7A004315B3 /* text_token_generator.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2D2CB90C7A004315B3 /* text_token_generator.h */; };
-		55EA2C472CB90C7A004315B3 /* util.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C2E2CB90C7A004315B3 /* util.h */; };
-		55EA2C482CB90C7A004315B3 /* sampler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C302CB90C7A004315B3 /* sampler.cpp */; };
-		55EA2C492CB90C7A004315B3 /* sampler.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C312CB90C7A004315B3 /* sampler.h */; };
-		55EA2C4B2CB90C7A004315B3 /* LLaMARunner.mm in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C342CB90C7A004315B3 /* LLaMARunner.mm */; };
-		55EA2C4C2CB90C7A004315B3 /* base64.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C362CB90C7A004315B3 /* base64.h */; };
-		55EA2C4D2CB90C7A004315B3 /* bpe_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C372CB90C7A004315B3 /* bpe_tokenizer.cpp */; };
-		55EA2C4E2CB90C7A004315B3 /* bpe_tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C382CB90C7A004315B3 /* bpe_tokenizer.h */; };
-		55EA2C4F2CB90C7A004315B3 /* llama_tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C392CB90C7A004315B3 /* llama_tiktoken.cpp */; };
-		55EA2C502CB90C7A004315B3 /* llama_tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C3A2CB90C7A004315B3 /* llama_tiktoken.h */; };
-		55EA2C512CB90C7A004315B3 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 55EA2C3B2CB90C7A004315B3 /* tiktoken.cpp */; };
-		55EA2C522CB90C7A004315B3 /* tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C3C2CB90C7A004315B3 /* tiktoken.h */; };
-		55EA2C532CB90C7A004315B3 /* tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C3D2CB90C7A004315B3 /* tokenizer.h */; };
-		55EA2C542CB90E70004315B3 /* LLaMARunner.h in Headers */ = {isa = PBXBuildFile; fileRef = 55EA2C332CB90C7A004315B3 /* LLaMARunner.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		55EA2C572CB90E7D004315B3 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 55EA2C562CB90E7D004315B3 /* Accelerate.framework */; };
 		55EA2C592CB90E80004315B3 /* CoreML.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 55EA2C582CB90E80004315B3 /* CoreML.framework */; };
 		55EA2C5B2CB90E85004315B3 /* libsqlite3.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 55EA2C5A2CB90E85004315B3 /* libsqlite3.tbd */; };
@@ -67,32 +43,8 @@
 		5576B49E2CEF76CC005027B7 /* re2.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = re2.xcframework; path = frameworks/re2.xcframework; sourceTree = "<group>"; };
 		5576B4B62CEF9705005027B7 /* ETModel.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ETModel.h; sourceTree = "<group>"; };
 		5576B4B82CEF970C005027B7 /* ETModel.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ETModel.mm; sourceTree = "<group>"; };
-		55A815F12D9D40680075A106 /* irunner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = irunner.h; sourceTree = "<group>"; };
 		55DEEA372D05ABB4004422A3 /* InputType.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = InputType.h; sourceTree = "<group>"; };
-		55DEEBEF2D8C458F0033DBBA /* HuggingFaceTokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = HuggingFaceTokenizer.h; sourceTree = "<group>"; };
-		55DEEBF12D8C45990033DBBA /* HuggingFaceTokenizer.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = HuggingFaceTokenizer.mm; sourceTree = "<group>"; };
 		55EA2C1C2CB90C22004315B3 /* ExecutorchLib.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = ExecutorchLib.framework; sourceTree = BUILT_PRODUCTS_DIR; };
-		55EA2C262CB90C7A004315B3 /* runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = runner.cpp; sourceTree = "<group>"; };
-		55EA2C272CB90C7A004315B3 /* runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = runner.h; sourceTree = "<group>"; };
-		55EA2C282CB90C7A004315B3 /* stats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stats.h; sourceTree = "<group>"; };
-		55EA2C292CB90C7A004315B3 /* text_decoder_runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_decoder_runner.cpp; sourceTree = "<group>"; };
-		55EA2C2A2CB90C7A004315B3 /* text_decoder_runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_decoder_runner.h; sourceTree = "<group>"; };
-		55EA2C2B2CB90C7A004315B3 /* text_prefiller.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_prefiller.cpp; sourceTree = "<group>"; };
-		55EA2C2C2CB90C7A004315B3 /* text_prefiller.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_prefiller.h; sourceTree = "<group>"; };
-		55EA2C2D2CB90C7A004315B3 /* text_token_generator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_token_generator.h; sourceTree = "<group>"; };
-		55EA2C2E2CB90C7A004315B3 /* util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = util.h; sourceTree = "<group>"; };
-		55EA2C302CB90C7A004315B3 /* sampler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sampler.cpp; sourceTree = "<group>"; };
-		55EA2C312CB90C7A004315B3 /* sampler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sampler.h; sourceTree = "<group>"; };
-		55EA2C332CB90C7A004315B3 /* LLaMARunner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LLaMARunner.h; sourceTree = "<group>"; };
-		55EA2C342CB90C7A004315B3 /* LLaMARunner.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = LLaMARunner.mm; sourceTree = "<group>"; };
-		55EA2C362CB90C7A004315B3 /* base64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = base64.h; sourceTree = "<group>"; };
-		55EA2C372CB90C7A004315B3 /* bpe_tokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bpe_tokenizer.cpp; sourceTree = "<group>"; };
-		55EA2C382CB90C7A004315B3 /* bpe_tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bpe_tokenizer.h; sourceTree = "<group>"; };
-		55EA2C392CB90C7A004315B3 /* llama_tiktoken.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = llama_tiktoken.cpp; sourceTree = "<group>"; };
-		55EA2C3A2CB90C7A004315B3 /* llama_tiktoken.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = llama_tiktoken.h; sourceTree = "<group>"; };
-		55EA2C3B2CB90C7A004315B3 /* tiktoken.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tiktoken.cpp; sourceTree = "<group>"; };
-		55EA2C3C2CB90C7A004315B3 /* tiktoken.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tiktoken.h; sourceTree = "<group>"; };
-		55EA2C3D2CB90C7A004315B3 /* tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tokenizer.h; sourceTree = "<group>"; };
 		55EA2C562CB90E7D004315B3 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
 		55EA2C582CB90E80004315B3 /* CoreML.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreML.framework; path = System/Library/Frameworks/CoreML.framework; sourceTree = SDKROOT; };
 		55EA2C5A2CB90E85004315B3 /* libsqlite3.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libsqlite3.tbd; path = usr/lib/libsqlite3.tbd; sourceTree = SDKROOT; };
@@ -147,68 +99,20 @@
 			children = (
 				55DEEA372D05ABB4004422A3 /* InputType.h */,
 				55EA2C352CB90C7A004315B3 /* Exported */,
-				55EA2C2F2CB90C7A004315B3 /* runner */,
-				55EA2C322CB90C7A004315B3 /* sampler */,
-				55EA2C3E2CB90C7A004315B3 /* tokenizer */,
 				A851C4042CF9F1B600424E93 /* Utils.hpp */,
 			);
 			path = ExecutorchLib;
 			sourceTree = "<group>";
 		};
-		55EA2C2F2CB90C7A004315B3 /* runner */ = {
-			isa = PBXGroup;
-			children = (
-				55A815F12D9D40680075A106 /* irunner.h */,
-				55EA2C262CB90C7A004315B3 /* runner.cpp */,
-				55EA2C272CB90C7A004315B3 /* runner.h */,
-				55EA2C282CB90C7A004315B3 /* stats.h */,
-				55EA2C292CB90C7A004315B3 /* text_decoder_runner.cpp */,
-				55EA2C2A2CB90C7A004315B3 /* text_decoder_runner.h */,
-				55EA2C2B2CB90C7A004315B3 /* text_prefiller.cpp */,
-				55EA2C2C2CB90C7A004315B3 /* text_prefiller.h */,
-				55EA2C2D2CB90C7A004315B3 /* text_token_generator.h */,
-				55EA2C2E2CB90C7A004315B3 /* util.h */,
-			);
-			path = runner;
-			sourceTree = "<group>";
-		};
-		55EA2C322CB90C7A004315B3 /* sampler */ = {
-			isa = PBXGroup;
-			children = (
-				55EA2C302CB90C7A004315B3 /* sampler.cpp */,
-				55EA2C312CB90C7A004315B3 /* sampler.h */,
-			);
-			path = sampler;
-			sourceTree = "<group>";
-		};
 		55EA2C352CB90C7A004315B3 /* Exported */ = {
 			isa = PBXGroup;
 			children = (
-				55DEEBF12D8C45990033DBBA /* HuggingFaceTokenizer.mm */,
-				55DEEBEF2D8C458F0033DBBA /* HuggingFaceTokenizer.h */,
 				5576B4B82CEF970C005027B7 /* ETModel.mm */,
 				5576B4B62CEF9705005027B7 /* ETModel.h */,
-				55EA2C332CB90C7A004315B3 /* LLaMARunner.h */,
-				55EA2C342CB90C7A004315B3 /* LLaMARunner.mm */,
 			);
 			path = Exported;
 			sourceTree = "<group>";
 		};
-		55EA2C3E2CB90C7A004315B3 /* tokenizer */ = {
-			isa = PBXGroup;
-			children = (
-				55EA2C362CB90C7A004315B3 /* base64.h */,
-				55EA2C372CB90C7A004315B3 /* bpe_tokenizer.cpp */,
-				55EA2C382CB90C7A004315B3 /* bpe_tokenizer.h */,
-				55EA2C392CB90C7A004315B3 /* llama_tiktoken.cpp */,
-				55EA2C3A2CB90C7A004315B3 /* llama_tiktoken.h */,
-				55EA2C3B2CB90C7A004315B3 /* tiktoken.cpp */,
-				55EA2C3C2CB90C7A004315B3 /* tiktoken.h */,
-				55EA2C3D2CB90C7A004315B3 /* tokenizer.h */,
-			);
-			path = tokenizer;
-			sourceTree = "<group>";
-		};
 		55EA2C552CB90E7D004315B3 /* Frameworks */ = {
 			isa = PBXGroup;
 			children = (
@@ -238,24 +142,9 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				55EA2C542CB90E70004315B3 /* LLaMARunner.h in Headers */,
 				5576B4B72CEF9709005027B7 /* ETModel.h in Headers */,
-				55DEEBF02D8C45960033DBBA /* HuggingFaceTokenizer.h in Headers */,
-				55EA2C532CB90C7A004315B3 /* tokenizer.h in Headers */,
 				55DEEA382D05ABBB004422A3 /* InputType.h in Headers */,
-				55EA2C412CB90C7A004315B3 /* stats.h in Headers */,
-				55EA2C4E2CB90C7A004315B3 /* bpe_tokenizer.h in Headers */,
-				55EA2C402CB90C7A004315B3 /* runner.h in Headers */,
-				55A815F22D9D40680075A106 /* irunner.h in Headers */,
-				55EA2C432CB90C7A004315B3 /* text_decoder_runner.h in Headers */,
-				55EA2C492CB90C7A004315B3 /* sampler.h in Headers */,
-				55EA2C4C2CB90C7A004315B3 /* base64.h in Headers */,
-				55EA2C452CB90C7A004315B3 /* text_prefiller.h in Headers */,
-				55EA2C522CB90C7A004315B3 /* tiktoken.h in Headers */,
-				55EA2C502CB90C7A004315B3 /* llama_tiktoken.h in Headers */,
-				55EA2C472CB90C7A004315B3 /* util.h in Headers */,
 				A851C4072CF9F1B600424E93 /* Utils.hpp in Headers */,
-				55EA2C462CB90C7A004315B3 /* text_token_generator.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -266,7 +155,6 @@
 			isa = PBXNativeTarget;
 			buildConfigurationList = 55EA2C232CB90C22004315B3 /* Build configuration list for PBXNativeTarget "ExecutorchLib" */;
 			buildPhases = (
-				558699CC2D8B1D30004180E5 /* Build tokenizers cpp */,
 				55EA2C172CB90C22004315B3 /* Headers */,
 				55EA2C182CB90C22004315B3 /* Sources */,
 				55EA2C192CB90C22004315B3 /* Frameworks */,
@@ -327,43 +215,12 @@
 		};
 /* End PBXResourcesBuildPhase section */
 
-/* Begin PBXShellScriptBuildPhase section */
-		558699CC2D8B1D30004180E5 /* Build tokenizers cpp */ = {
-			isa = PBXShellScriptBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			inputFileListPaths = (
-			);
-			inputPaths = (
-			);
-			name = "Build tokenizers cpp";
-			outputFileListPaths = (
-			);
-			outputPaths = (
-				"$(DERIVED_FILE_DIR)/newOutputFile",
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-			shellPath = /bin/sh;
-			shellScript = "set -e\n\nif ! command -v cmake &> /dev/null\nthen\n    echo \"Cmake not found, please install Cmake. \\n1. Download Cmake.app from https://cmake.org/download with version > 3.19. \\n2. Install it to Applications/ folder and run `sudo /Applications/CMake.app/Contents/bin/cmake-gui --install` to install CMake commandline tools.\"\n    exit 1\nfi\n\n# Type a script or drag a script file from your workspace to insert its path.\nCMAKE_DIR=\"$TEMP_DIR/cmake\"\nrm -rf \"$CMAKE_DIR\"\n\nPLATFORM=\"SIMULATORARM64\"\nDEPLOYMENT_TARGET=\"17.0\"\n\nif [[ \"$PLATFORM_NAME\" == \"iphoneos\" ]]; then\n  TOKENIZERS_TARGET=\"aarch64-apple-ios\"\nelif [[ \"$PLATFORM_NAME\" == \"iphonesimulator\" ]]; then\n  TOKENIZERS_TARGET=\"x86_64-apple-ios\"\nfi\n\nif [[ \"$PLATFORM_NAME\" == *\"iphoneos\"* ]]; then\n  PLATFORM=\"OS64\"\nelif [[ \"$PLATFORM_NAME\" == *\"macos\"* ]]; then\n  PLATFORM=\"MAC_ARM64\"\n  DEPLOYMENT_TARGET=\"10.15\"\nfi\n\n\ncmake_build_tokenizers() {\n    export PATH=\"$PATH:$HOME/.cargo/bin\"\n    export PATH=\"/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib:$PATH\"\n    export LIBRARY_PATH=\"$LIBRARY_PATH:/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib\"\n    local src_dir=$1\n    local target=$2\n    local extra_args=(\"$@\")\n    local build_dir=\"$CMAKE_DIR/build/$(basename \"$src_dir\")\"\n    mkdir -p \"$build_dir\" && cd \"$build_dir\"\n    \n    if [[ \"$PLATFORM\" == \"MAC_ARM64\" ]]; then\n        extra_args+=(-DCMAKE_INSTALL_BUNDLEDIR=\"${CMAKE_DIR}/bin\")\n        extra_args+=(-DCMAKE_MACOSX_BUNDLE=OFF)\n    fi\n    \n    cmake \"$src_dir\" -DCMAKE_TOOLCHAIN_FILE=\"$SRCROOT/../../ios/ios.toolchain.cmake\" \\\n         -DPLATFORM=\"$PLATFORM\" \\\n         -DDEPLOYMENT_TARGET=\"$DEPLOYMENT_TARGET\" \\\n         -DENABLE_BITCODE=FALSE \\\n         -DENABLE_ARC=TRUE \\\n         -DENABLE_VISIBILITY=TRUE \\\n         -DCMAKE_INSTALL_PREFIX=\"$CMAKE_DIR\" \\\n         -DMLC_ENABLE_SENTENCEPIECE_TOKENIZER=ON\n          \n    cmake --build . --config \"Release\" --target \"install\"\n}\n\ncmake_build_tokenizers \"$SRCROOT/../../../../../third-party/tokenizers-cpp\" \"install\"\n\necho \"$(find $CMAKE_DIR/lib -name \"*.a\" | sed -E 's|^.*/lib([^/]+)\\.a|-l\\1|g' | tr '\\n' ' ')\" > \"$CMAKE_DIR/linker_flags\"\n\n";
-		};
-/* End PBXShellScriptBuildPhase section */
-
 /* Begin PBXSourcesBuildPhase section */
 		55EA2C182CB90C22004315B3 /* Sources */ = {
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
-				55EA2C482CB90C7A004315B3 /* sampler.cpp in Sources */,
-				55EA2C3F2CB90C7A004315B3 /* runner.cpp in Sources */,
-				55EA2C422CB90C7A004315B3 /* text_decoder_runner.cpp in Sources */,
-				55EA2C4D2CB90C7A004315B3 /* bpe_tokenizer.cpp in Sources */,
-				55EA2C4F2CB90C7A004315B3 /* llama_tiktoken.cpp in Sources */,
 				5576B4B92CEF970E005027B7 /* ETModel.mm in Sources */,
-				55EA2C442CB90C7A004315B3 /* text_prefiller.cpp in Sources */,
-				55EA2C512CB90C7A004315B3 /* tiktoken.cpp in Sources */,
-				55DEEBF22D8C459A0033DBBA /* HuggingFaceTokenizer.mm in Sources */,
-				55EA2C4B2CB90C7A004315B3 /* LLaMARunner.mm in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -542,7 +399,6 @@
 					"$(BUILT_PRODUCTS_DIR)/libbackend_mps_ios.a",
 					"-force_load",
 					"$(BUILT_PRODUCTS_DIR)/libexecutorch_ios.a",
-					"@$(TEMP_DIR)/cmake/linker_flags",
 				);
 				"OTHER_LDFLAGS[sdk=iphonesimulator*]" = (
 					"$(inherited)",
@@ -560,7 +416,6 @@
 					"$(BUILT_PRODUCTS_DIR)/libbackend_mps_simulator.a",
 					"-force_load",
 					"$(BUILT_PRODUCTS_DIR)/libexecutorch_simulator.a",
-					"@$(TEMP_DIR)/cmake/linker_flags",
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = com.swmansion.Executorch;
 				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
@@ -624,7 +479,6 @@
 					"$(BUILT_PRODUCTS_DIR)/libbackend_mps_ios.a",
 					"-force_load",
 					"$(BUILT_PRODUCTS_DIR)/libexecutorch_ios.a",
-					"@$(TEMP_DIR)/cmake/linker_flags",
 				);
 				"OTHER_LDFLAGS[sdk=iphonesimulator*]" = (
 					"$(inherited)",
@@ -642,7 +496,6 @@
 					"$(BUILT_PRODUCTS_DIR)/libbackend_mps_simulator.a",
 					"-force_load",
 					"$(BUILT_PRODUCTS_DIR)/libexecutorch_simulator.a",
-					"@$(TEMP_DIR)/cmake/linker_flags",
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = com.swmansion.Executorch;
 				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h
deleted file mode 100644
index 4332cf811d..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#import <Foundation/Foundation.h>
-
-@interface HuggingFaceTokenizer : NSObject
-
-- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath;
-- (NSArray<NSNumber *> *)encode:(NSString *)text;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds;
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
-    skipSpecialTokens:(BOOL)skipSpecialTokens;
-- (NSUInteger)getVocabSize;
-- (NSString *)idToToken:(NSInteger)tokenId;
-- (NSInteger)tokenToId:(NSString *)token;
-
-@end
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm
deleted file mode 100644
index 38d7cdfd09..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/HuggingFaceTokenizer.mm
+++ /dev/null
@@ -1,80 +0,0 @@
-#import "HuggingFaceTokenizer.h"
-#include <fstream>
-#include <iostream>
-#include <stdexcept>
-#include <string>
-#include <tokenizers-cpp/tokenizers_cpp.h>
-
-std::string loadBytesFromFile(const std::string &path) {
-  std::ifstream fs(path, std::ios::in | std::ios::binary);
-  if (fs.fail()) {
-    throw std::runtime_error("Failed to open tokenizer file");
-  }
-  std::string data;
-  fs.seekg(0, std::ios::end);
-  size_t size = static_cast<size_t>(fs.tellg());
-  fs.seekg(0, std::ios::beg);
-  data.resize(size);
-  fs.read(data.data(), size);
-  return data;
-}
-
-@implementation HuggingFaceTokenizer {
-  std::unique_ptr<tokenizers::Tokenizer> _tokenizer;
-}
-
-- (instancetype)initWithTokenizerPath:(NSString *)tokenizerPath {
-  self = [super init];
-  if (self) {
-    auto blob = loadBytesFromFile([tokenizerPath UTF8String]);
-    _tokenizer = tokenizers::Tokenizer::FromBlobJSON(blob);
-  }
-  return self;
-}
-
-- (NSArray<NSNumber *> *)encode:(NSString *)text {
-  std::vector<int32_t> result = _tokenizer->Encode([text UTF8String]);
-  NSMutableArray<NSNumber *> *encodedResult =
-      [[NSMutableArray alloc] initWithCapacity:result.size()];
-  for (int32_t tokenId : result) {
-    [encodedResult addObject:@(tokenId)];
-  }
-
-  return encodedResult;
-}
-
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds {
-  return [self decode:tokenIds skipSpecialTokens:NO];
-}
-
-- (NSString *)decode:(NSArray<NSNumber *> *)tokenIds
-    skipSpecialTokens:(BOOL)skipSpecialTokens {
-  std::vector<int32_t> stdTokenIds;
-  stdTokenIds.reserve([tokenIds count]);
-  for (NSNumber *tokenId in tokenIds) {
-    stdTokenIds.push_back([tokenId intValue]);
-  }
-  std::string decodedString =
-      _tokenizer->Decode(stdTokenIds, skipSpecialTokens);
-  return [NSString stringWithUTF8String:decodedString.c_str()];
-}
-
-- (NSUInteger)getVocabSize {
-  return (NSUInteger)_tokenizer->GetVocabSize();
-}
-
-- (NSString *)idToToken:(NSInteger)tokenId {
-  std::string token = _tokenizer->IdToToken(static_cast<int32_t>(tokenId));
-  return [NSString stringWithUTF8String:token.c_str()];
-}
-
-- (NSInteger)tokenToId:(NSString *)token {
-  std::string stdToken = [token UTF8String];
-  return (NSInteger)_tokenizer->TokenToId(stdToken);
-}
-
-- (void)dealloc {
-  _tokenizer.reset();
-}
-
-@end
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h
deleted file mode 100644
index d8638cfa6a..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#import <UIKit/UIKit.h>
-
-NS_ASSUME_NONNULL_BEGIN
-
-FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain;
-
-NS_SWIFT_NAME(Runner)
-@interface LLaMARunner : NSObject
-
-- (instancetype)initWithModelPath:(NSString *)filePath
-                    tokenizerPath:(NSString *)tokenizerPath;
-- (BOOL)isLoaded;
-- (BOOL)loadWithError:(NSError **)error;
-- (BOOL)generate:(NSString *)prompt
-    withTokenCallback:(nullable void (^)(NSString *))callback
-                error:(NSError **)error;
-- (void)stop;
-
-+ (instancetype)new NS_UNAVAILABLE;
-- (instancetype)init NS_UNAVAILABLE;
-
-@end
-
-NS_ASSUME_NONNULL_END
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm
deleted file mode 100644
index b50bfb7b01..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/Exported/LLaMARunner.mm
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#import "LLaMARunner.h"
-
-#import "runner.h"
-#import <ExecuTorch/ExecuTorchLog.h>
-
-using namespace ::torch::executor;
-
-NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
-
-@interface LLaMARunner () <ExecuTorchLogSink>
-@end
-
-@implementation LLaMARunner {
-  std::unique_ptr<example::Runner> _runner;
-}
-
-- (instancetype)initWithModelPath:(NSString *)modelPath
-                    tokenizerPath:(NSString *)tokenizerPath {
-  self = [super init];
-  if (self) {
-    [ExecuTorchLog.sharedLog addSink:self];
-    _runner = std::make_unique<example::Runner>(modelPath.UTF8String,
-                                                tokenizerPath.UTF8String);
-  }
-  return self;
-}
-
-- (void)dealloc {
-  [ExecuTorchLog.sharedLog removeSink:self];
-}
-
-- (BOOL)isLoaded {
-  return _runner->is_loaded();
-}
-
-- (BOOL)loadWithError:(NSError **)error {
-  const auto status = _runner->load();
-  if (status != Error::Ok) {
-    if (error) {
-      *error = [NSError errorWithDomain:LLaMARunnerErrorDomain
-                                   code:(NSInteger)status
-                               userInfo:nil];
-    }
-    return NO;
-  }
-  return YES;
-}
-
-- (BOOL)generate:(NSString *)prompt
-    withTokenCallback:(nullable void (^)(NSString *))callback
-                error:(NSError **)error {
-  const auto status = _runner->generate(
-      prompt.UTF8String,
-      [callback](const std::string &token) { callback(@(token.c_str())); });
-  if (status != Error::Ok) {
-    if (error) {
-      *error = [NSError errorWithDomain:LLaMARunnerErrorDomain
-                                   code:(NSInteger)status
-                               userInfo:nil];
-      return NO;
-    }
-  }
-  return YES;
-}
-
-- (void)stop {
-  _runner->stop();
-}
-
-#pragma mark - ExecuTorchLogSink
-
-- (void)logWithLevel:(ExecuTorchLogLevel)level
-           timestamp:(NSTimeInterval)timestamp
-            filename:(NSString *)filename
-                line:(NSUInteger)line
-             message:(NSString *)message {
-  NSUInteger totalSeconds = (NSUInteger)timestamp;
-  NSUInteger hours = (totalSeconds / 3600) % 24;
-  NSUInteger minutes = (totalSeconds / 60) % 60;
-  NSUInteger seconds = totalSeconds % 60;
-  NSUInteger microseconds = (timestamp - totalSeconds) * 1000000;
-  NSLog(@"%c %02lu:%02lu:%02lu.%06lu executorch:%s:%zu] %s", (char)level, hours,
-        minutes, seconds, microseconds, filename.UTF8String, line,
-        message.UTF8String);
-}
-
-@end
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h
deleted file mode 100644
index 722fe3900e..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/base64.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-// @lint-ignore-every LICENSELINT
-/**************************************************************************
-   Copyright (c) 2023 sewenew
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
- *************************************************************************/
-
-#pragma once
-
-#include <cassert>
-#include <executorch/runtime/core/error.h>
-#include <executorch/runtime/core/result.h>
-#include <executorch/runtime/platform/assert.h>
-#include <string>
-#include <string_view>
-
-namespace executorch {
-namespace extension {
-namespace llm {
-using Error = executorch::runtime::Error;
-template <typename T> using Result = executorch::runtime::Result<T>;
-
-namespace base64 {
-
-Result<std::string> decode(const std::string_view &input);
-
-namespace detail {
-
-constexpr uint32_t DECODE_TABLE[] = {
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62,  255,
-    255, 255, 63,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  255, 255,
-    255, 255, 255, 255, 255, 0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
-    10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
-    25,  255, 255, 255, 255, 255, 255, 26,  27,  28,  29,  30,  31,  32,  33,
-    34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
-    49,  50,  51,  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-    255};
-
-inline Error validate(uint32_t v) {
-  ET_CHECK_OR_RETURN_ERROR(v != 255, InvalidArgument, "invalid char");
-  return Error::Ok;
-}
-
-inline Error decode(const std::string_view &input, std::string &output) {
-  ET_CHECK_OR_RETURN_ERROR(input.size() == 4, InvalidArgument,
-                           "input length must be 4, got %zu", input.size());
-
-  uint32_t val = 0;
-
-  uint8_t c = input[0];
-  auto v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = v;
-
-  c = input[1];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  c = input[2];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  c = input[3];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  output.push_back(static_cast<char>((val >> 16) & 0xFF));
-  output.push_back(static_cast<char>((val >> 8) & 0xFF));
-  output.push_back(static_cast<char>(val & 0xFF));
-  return Error::Ok;
-}
-
-inline Error decode_1_padding(const std::string_view &input,
-                              std::string &output) {
-  ET_CHECK_OR_RETURN_ERROR(input.size() == 3, InvalidArgument,
-                           "input length must be 3, got %zu", input.size());
-
-  uint32_t val = 0;
-
-  uint8_t c = input[0];
-  auto v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = v;
-
-  c = input[1];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  c = input[2];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  output.push_back(static_cast<char>((val >> 10) & 0xFF));
-  output.push_back(static_cast<char>((val >> 2) & 0xFF));
-  return Error::Ok;
-}
-
-inline Error decode_2_padding(const std::string_view &input,
-                              std::string &output) {
-  ET_CHECK_OR_RETURN_ERROR(input.size() == 2, InvalidArgument,
-                           "input length must be 2, got %zu", input.size());
-
-  uint32_t val = 0;
-
-  uint8_t c = input[0];
-  auto v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = v;
-
-  c = input[1];
-  v = DECODE_TABLE[c];
-  ET_CHECK_OK_OR_RETURN_ERROR(validate(v));
-  val = (val << 6) | v;
-
-  output.push_back(static_cast<char>((val >> 4) & 0xFF));
-  return Error::Ok;
-}
-
-} // namespace detail
-
-inline Result<std::string> decode(const std::string_view &input) {
-  ET_CHECK_OR_RETURN_ERROR(!input.empty(), InvalidArgument, "empty input");
-
-  // Faster than `input.size() % 4`.
-  ET_CHECK_OR_RETURN_ERROR(
-      (input.size() & 3) == 0 && input.size() >= 4, InvalidArgument,
-      "input length must be larger than 4 and is multiple of 4, got %zu",
-      input.size());
-
-  std::string output;
-  output.reserve(input.size() / 4 * 3);
-  auto idx = 0U;
-  for (; idx < input.size() - 4; idx += 4) {
-    ET_CHECK_OK_OR_RETURN_ERROR(detail::decode(input.substr(idx, 4), output));
-  }
-
-  // Last 4 bytes. Might contain paddings.
-  if (input[idx + 3] == '=') {
-    if (input[idx + 2] == '=') {
-      // Tow paddings.
-      ET_CHECK_OK_OR_RETURN_ERROR(
-          detail::decode_2_padding(input.substr(idx, 2), output));
-    } else {
-      // One padding.
-      ET_CHECK_OK_OR_RETURN_ERROR(
-          detail::decode_1_padding(input.substr(idx, 3), output));
-    }
-  } else {
-    // No padding.
-    ET_CHECK_OK_OR_RETURN_ERROR(detail::decode(input.substr(idx, 4), output));
-  }
-
-  return output;
-}
-
-} // namespace base64
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
-
-namespace torch {
-namespace executor {
-namespace base64 {
-// TODO(T197294990): Remove these deprecated aliases once all users have moved
-// to the new `::executorch` namespaces.
-using ::executorch::extension::llm::base64::decode;
-} // namespace base64
-} // namespace executor
-} // namespace torch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp
deleted file mode 100644
index aa0a6d1baa..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.cpp
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include "bpe_tokenizer.h"
-
-#include <cstring>
-
-using ::executorch::runtime::Error;
-using ::executorch::runtime::Result;
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-static int compare_tokens(const void *a, const void *b) {
-  if (((TokenIndex *)a)->str == nullptr) {
-    return -1;
-  }
-  if (((TokenIndex *)b)->str == nullptr) {
-    return 1;
-  }
-  return strcmp(((TokenIndex *)a)->str, ((TokenIndex *)b)->str);
-}
-
-BPETokenizer::BPETokenizer() : Tokenizer() {
-  for (int i = 0; i < 256; i++) {
-    byte_pieces_[i * 2] = (unsigned char)i;
-    byte_pieces_[i * 2 + 1] = '\0';
-  }
-}
-
-/**
- * @brief Load the tokenizer from a file. The tokenizer file contains the
- * vocabulary and scores. The format is: the first integer is the maximum
- * token length, followed by a list of (word_len, word) pairs. Here we
- * are reading all the vocabulary into memory and keep it sorted for fast
- * lookup.
- *
- * @param tokenizer_path The path to the tokenizer file.
- * @return Error
- */
-Error BPETokenizer::load(const std::string &tokenizer_path) {
-  if (initialized_) {
-    ET_LOG(Info, "Tokenizer already initialized");
-    return Error::Ok;
-  }
-  // read in the file
-  FILE *file = fopen(tokenizer_path.c_str(), "rb");
-  if (!file) {
-    ET_LOG(Error, "couldn't load %s", tokenizer_path.c_str());
-    return Error::InvalidArgument;
-  }
-  int32_t metadata[4];
-  for (int i = 0; i < 4; i++) {
-    if (fread(metadata + i, sizeof(int32_t), 1, file) != 1) {
-      ET_LOG(Error,
-             "Failed to read the metadata at position %d, the tokenizer file "
-             "is not valid!",
-             i);
-      return Error::InvalidArgument;
-    }
-  }
-
-  // now we have two vocab_sizes one from the model and another from the
-  // tokenizer file.
-  int32_t tokenizer_vocab_size = metadata[0];
-  vocab_size_ = tokenizer_vocab_size;
-  bos_tok_ = metadata[1];
-  eos_tok_ = metadata[2];
-  max_token_length_ = metadata[3];
-
-  // allocate space for the vocabulary
-  vocab_ = std::make_unique<char *[]>(vocab_size_);
-  vocab_scores_ = std::make_unique<float[]>(vocab_size_);
-  sorted_vocab_ = std::make_unique<TokenIndex[]>(vocab_size_);
-
-  // read in the vocabulary
-  for (int i = 0; i < vocab_size_; i++) {
-    if (fread(vocab_scores_.get() + i, sizeof(float), 1, file) != 1) {
-      // This is allowed, we just pad the rest of the vocab with <pad> strings
-      std::string padding = "<pad>";
-      vocab_[i] = new char[padding.length() + 1];
-      strcpy(vocab_[i], padding.c_str());
-      vocab_[i][padding.length()] = '\0';
-      continue;
-    }
-    int32_t len;
-    if (fread(&len, sizeof(int32_t), 1, file) != 1) {
-      ET_LOG(Error, "Failed to read the length of the word at index %d", i);
-      return Error::InvalidArgument;
-    }
-    vocab_[i] = new char[len + 1];
-    if (fread(vocab_[i], len, 1, file) != 1) {
-      ET_LOG(Error, "Failed to read the word, total length %d, index %d\n", len,
-             i);
-      return Error::InvalidArgument;
-    }
-    vocab_[i][len] = '\0'; // add the string terminating token
-  }
-  fclose(file);
-
-  for (int32_t i = 0; i < vocab_size_; i++) {
-    sorted_vocab_[i].str = vocab_[i];
-    sorted_vocab_[i].id = i;
-  }
-  qsort(sorted_vocab_.get(), vocab_size_, sizeof(TokenIndex), compare_tokens);
-
-  initialized_ = true;
-  return Error::Ok;
-}
-
-BPETokenizer::~BPETokenizer() {
-  for (int i = 0; i < vocab_size_; i++) {
-    delete[] vocab_[i];
-  }
-}
-
-/**
- * @brief Decode a token into string.
- *
- * @param prev_token The previous token.
- * @param token The current token.
- * @return Result<std::string> A pointer to the string representation of the
- * token.
- */
-Result<std::string> BPETokenizer::decode(uint64_t prev_token,
-                                         uint64_t token) const {
-  ET_CHECK_OK_OR_RETURN_ERROR(Tokenizer::decode_verify(token));
-  const char *piece = vocab_[token];
-  // following BOS token, sentencepiece decoder strips any leading
-  // whitespace
-  if (prev_token == bos_tok_ && piece[0] == ' ') {
-    piece++;
-  }
-  // careful, some tokens designate raw bytes, and look like e.g. '<0x01>'
-  // parse this and convert and return the actual byte
-  unsigned char byte_val;
-  if (sscanf(piece, "<0x%02hhX>", &byte_val) == 1) {
-    piece = (char *)byte_pieces_ + byte_val * 2;
-  }
-  std::string res(piece);
-  return res;
-}
-
-static int32_t str_lookup(const char *str, TokenIndex *sorted_vocab,
-                          int32_t vocab_size) {
-  // efficiently find the perfect match for str in vocab, return its index or -1
-  // if not found
-  TokenIndex tok = {.str = str}; // acts as the key to search for
-  TokenIndex *res = (TokenIndex *)bsearch(&tok, sorted_vocab, vocab_size,
-                                          sizeof(TokenIndex), compare_tokens);
-  return res != nullptr ? res->id : -1;
-}
-
-/**
- * @brief Encode a string into a sequence of tokens.
- *
- * @param text The string to be encoded.
- * @param bos The number of BOS to prepend to the token list.
- * @param eos The number of EOS to append to the token list.
- * @param tokens The output tokens.
- * @param n_tokens The number of tokens.
- * @return Result<std::vector<uint64_t>>
- */
-Result<std::vector<uint64_t>>
-BPETokenizer::encode(const std::string &text, int8_t bos, int8_t eos) const {
-  if (!initialized_) {
-    ET_LOG(Error, "Tokenizer not initialized");
-    return Error::NotSupported;
-  }
-  // encode the string text (input) into an upper-bound preallocated tokens[]
-  // array bos != 0 means prepend the BOS token (=1), eos != 0 means append the
-  // EOS token (=2)
-  if (text.empty()) {
-    ET_LOG(Error, "cannot encode empty text");
-    return Error::InvalidArgument;
-  }
-
-  // create a temporary buffer that will store merge candidates of always two
-  // consecutive tokens *2 for concat, +1 for null terminator +2 for UTF8 (in
-  // case max_token_length is 1)
-  char *str_buffer = new char[max_token_length_ * 2 + 1 + 2];
-  size_t str_len = 0;
-
-  // start at 0 tokens
-  std::vector<uint64_t> tokens;
-
-  // add optional BOS token, if desired
-  if (bos >= 0) {
-    while (bos--) {
-      tokens.push_back(bos_tok_);
-    }
-  } else {
-    ET_LOG(Error, "bos %d should be >= 0", bos);
-    return Error::InvalidArgument;
-  }
-
-  // add_dummy_prefix is true by default
-  // so prepend a dummy prefix token to the input string, but only if text != ""
-  // TODO: pretty sure this isn't correct in the general case but I don't have
-  // the energy to read more of the sentencepiece code to figure out what it's
-  // doing
-  const char *space = " ";
-  if (text[0] != '\0') {
-    int dummy_prefix = str_lookup(space, sorted_vocab_.get(), vocab_size_);
-    tokens.push_back(dummy_prefix);
-  }
-
-  // Okay UTF-8 time. This will get messy. Here is the reference from Wikipedia:
-  // Code point ↔ UTF-8 conversion
-  // First code point  Last code point  Byte 1  Byte 2  Byte 3  Byte 4
-  // U+0000  U+007F      0xxxxxxx
-  // U+0080  U+07FF      110xxxxx  10xxxxxx
-  // U+0800  U+FFFF      1110xxxx  10xxxxxx  10xxxxxx
-  // U+10000  U+10FFFF    11110xxx  10xxxxxx  10xxxxxx  10xxxxxx
-
-  // process the raw (UTF-8) byte sequence of the input string
-  for (const char *c = text.c_str(); *c != '\0'; c++) {
-    // reset buffer if the current byte is ASCII or a leading byte
-    // 0xC0 is 11000000, so (*c & 0xC0) keeps the first 2 bits and zeros the
-    // rest 0x80 is 10000000 in UTF-8, all continuation bytes start with "10" in
-    // first two bits so in English this is: "if this byte is not a continuation
-    // byte"
-    if ((*c & 0xC0) != 0x80) {
-      // this byte must be either a leading byte (11...) or an ASCII char
-      // (0x...)
-      // => reset our location, as we're starting a new UTF-8 codepoint
-      str_len = 0;
-    }
-
-    // append the current byte to the buffer
-    str_buffer[str_len++] =
-        *c; // ++ is post-increment, incremented after this line
-    str_buffer[str_len] = '\0';
-
-    // while the next character is a continuation byte, continue appending
-    // but if there are too many of them, just stop to avoid overruning
-    // str_buffer size.
-    if ((*(c + 1) & 0xC0) == 0x80 && str_len < 4) {
-      continue;
-    }
-
-    // ok c+1 is not a continuation byte, so we've read in a full codepoint
-    int id = str_lookup(str_buffer, sorted_vocab_.get(), vocab_size_);
-    if (id != -1) {
-      // we found this codepoint in vocab, add it as a token
-      tokens.push_back(id);
-    } else {
-      // byte_fallback encoding: just encode each byte as a token
-      // +3 is here because the first 3 vocab elements are <unk>, <s>, </s>
-      // so the individual bytes only start at index 3
-      for (int i = 0; i < str_len; i++) {
-        tokens.push_back((unsigned char)str_buffer[i] + 3);
-      }
-    }
-    str_len = 0; // protect against a sequence of stray UTF8 continuation bytes
-  }
-
-  // merge the best consecutive pair each iteration, according the scores in
-  // vocab_scores
-  while (1) {
-    float best_score = -1e10;
-    int best_id = -1;
-    int best_idx = -1;
-
-    for (int i = 0; i < tokens.size() - 1; i++) {
-      // check if we can merge the pair (tokens[i], tokens[i+1])
-      snprintf(str_buffer, max_token_length_ * 2 + 3, "%s%s", vocab_[tokens[i]],
-               vocab_[tokens[i + 1]]);
-      int id = str_lookup(str_buffer, sorted_vocab_.get(), vocab_size_);
-      if (id != -1 && vocab_scores_[id] > best_score) {
-        // this merge pair exists in vocab! record its score and position
-        best_score = vocab_scores_[id];
-        best_id = id;
-        best_idx = i;
-      }
-    }
-
-    if (best_idx == -1) {
-      break; // we couldn't find any more pairs to merge, so we're done
-    }
-
-    // merge the consecutive pair (best_idx, best_idx+1) into new token best_id
-    tokens[best_idx] = best_id;
-    // delete token at position best_idx+1, shift the entire sequence back 1
-    for (int i = best_idx + 1; i < tokens.size() - 1; i++) {
-      tokens[i] = tokens[i + 1];
-    }
-    tokens.pop_back(); // token length decreased
-  }
-
-  // add optional EOS (=2) token, if desired
-  if (eos >= 0) {
-    while (eos--) {
-      tokens.push_back(eos_tok_);
-    }
-  } else {
-    ET_LOG(Error, "eos %d should be >= 0", eos);
-    return Error::InvalidArgument;
-  }
-
-  delete[] str_buffer;
-  return Result(tokens);
-}
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h
deleted file mode 100644
index b619905793..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/bpe_tokenizer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include "tokenizer.h"
-#include <memory>
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-struct TokenIndex {
-  const char *str;
-  int32_t id;
-};
-
-// A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code
-// won't work with this class, it needs to go through tokenizer.py first.
-class BPETokenizer : public Tokenizer {
-public:
-  explicit BPETokenizer();
-  ~BPETokenizer() override;
-
-  ::executorch::runtime::Error load(const std::string &tokenizer_path) override;
-
-  ::executorch::runtime::Result<std::vector<uint64_t>>
-  encode(const std::string &input, int8_t bos, int8_t eos) const override;
-
-  ::executorch::runtime::Result<std::string>
-  decode(uint64_t prev_token, uint64_t token) const override;
-
-private:
-  std::unique_ptr<char *[]> vocab_ = nullptr;
-  std::unique_ptr<float[]> vocab_scores_ = nullptr;
-  std::unique_ptr<TokenIndex[]> sorted_vocab_ = nullptr;
-  unsigned int max_token_length_ = 0;
-  unsigned char byte_pieces_[512]; // stores all single-byte strings
-};
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
-
-namespace torch {
-namespace executor {
-// TODO(T197294990): Remove these deprecated aliases once all users have moved
-// to the new `::executorch` namespaces.
-using ::executorch::extension::llm::BPETokenizer;
-using ::executorch::extension::llm::TokenIndex;
-} // namespace executor
-} // namespace torch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp
deleted file mode 100644
index 8bc7ef4879..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include "llama_tiktoken.h"
-
-namespace example {
-
-using ::executorch::extension::llm::Tiktoken;
-
-namespace {
-static constexpr int32_t kSpecialTokensSize = 256;
-static constexpr size_t kBOSTokenIndex = 0;
-static constexpr size_t kEOSTokenIndex = 1;
-
-static inline std::unique_ptr<std::vector<std::string>>
-_get_default_special_tokens() {
-  auto special_tokens =
-      std::make_unique<std::vector<std::string>>(std::vector<std::string>{
-          "<|begin_of_text|>", "<|end_of_text|>",
-          "<|reserved_special_token_0|>", "<|reserved_special_token_1|>",
-          "<|finetune_right_pad_id|>", "<|step_id|>", "<|start_header_id|>",
-          "<|end_header_id|>", "<|eom_id|>", "<|eot_id|>", "<|python_tag|>"});
-  // pad the rest of the special tokens with reserved tokens
-  ssize_t reserved_special_token_num = 2;
-  while (special_tokens->size() < kSpecialTokensSize) {
-    special_tokens->emplace_back("<|reserved_special_token_" +
-                                 std::to_string(reserved_special_token_num++) +
-                                 "|>");
-  }
-  return special_tokens;
-}
-
-static inline std::unique_ptr<std::vector<std::string>>
-_get_multimodal_special_tokens() {
-  auto special_tokens =
-      std::make_unique<std::vector<std::string>>(std::vector<std::string>{
-          "<|begin_of_text|>", "<|end_of_text|>",
-          "<|reserved_special_token_0|>", "<|reserved_special_token_1|>",
-          "<|reserved_special_token_2|>", "<|reserved_special_token_3|>",
-          "<|start_header_id|>", "<|end_header_id|>", "<|eom_id|>",
-          "<|eot_id|>", "<|image|>"});
-
-  // pad the rest of the special tokens with reserved tokens except the last
-  // one
-  ssize_t reserved_special_token_num = 4;
-  while (special_tokens->size() < kSpecialTokensSize - 1) {
-    special_tokens->emplace_back("<|reserved_special_token_" +
-                                 std::to_string(reserved_special_token_num++) +
-                                 "|>");
-  }
-
-  special_tokens->emplace_back("<|python_tag|>");
-
-  return special_tokens;
-}
-
-std::unique_ptr<std::vector<std::string>> _get_special_tokens(Version version) {
-  switch (version) {
-  case Version::Multimodal:
-    return _get_multimodal_special_tokens();
-  default:
-    return _get_default_special_tokens();
-  }
-}
-
-} // namespace
-
-std::unique_ptr<Tiktoken> get_tiktoken_for_llama(Version version) {
-  return std::make_unique<Tiktoken>(_get_special_tokens(version),
-                                    kBOSTokenIndex, kEOSTokenIndex);
-}
-
-} // namespace example
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h
deleted file mode 100644
index 10e106f116..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/llama_tiktoken.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include "tiktoken.h"
-
-namespace example {
-
-enum class Version {
-  Default,
-  Multimodal,
-};
-
-std::unique_ptr<::executorch::extension::llm::Tiktoken>
-get_tiktoken_for_llama(Version version = Version::Default);
-
-} // namespace example
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp
deleted file mode 100644
index aad4de6f75..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.cpp
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-// Adopted from https://github.com/sewenew/tokenizer
-
-// @lint-ignore-every LICENSELINT
-/**************************************************************************
-   Copyright (c) 2023 sewenew
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
- *************************************************************************/
-
-#include "tiktoken.h"
-#include "base64.h"
-#include <executorch/runtime/core/result.h>
-#include <fstream>
-#include <limits>
-
-using ::executorch::runtime::Error;
-using ::executorch::runtime::Result;
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-// ------------------------------Util start------------------------------------
-
-static uint64_t _max_size() { return std::numeric_limits<uint64_t>::max(); }
-
-static Re2UPtr _create_regex(const std::string &pattern) {
-  assert(!pattern.empty());
-
-  return std::make_unique<re2::RE2>("(" + pattern + ")");
-}
-
-static Re2UPtr _build_special_token_regex(const Encoder &special_encoder) {
-  std::string special_pattern;
-  for (const auto &ele : special_encoder) {
-    if (!special_pattern.empty()) {
-      special_pattern += "|";
-    }
-    special_pattern += re2::RE2::QuoteMeta(ele.first);
-  }
-
-  if (special_pattern.empty()) {
-    return nullptr;
-  }
-
-  return _create_regex(special_pattern);
-}
-
-static Result<std::pair<std::string, uint64_t>>
-_parse(const std::string &line) {
-  // Tiktoken format
-  // https://github.com/openai/tiktoken/blob/main/tiktoken/load.py#L140 <base64
-  // encoded token str> <rank>
-  auto pos = line.find(" ");
-  ET_CHECK_OR_RETURN_ERROR(pos != std::string::npos, InvalidArgument,
-                           "invalid tiktoken line: %s", line.c_str());
-
-  auto token = ET_UNWRAP(base64::decode({line.data(), pos}));
-  uint64_t rank = 0;
-  try {
-    rank = std::stoul(line.substr(pos + 1));
-  } catch (const std::exception &) {
-    ET_CHECK_OR_RETURN_ERROR(false, InvalidArgument, "invalid encoder rank: %s",
-                             line.c_str());
-  }
-
-  return std::pair{std::move(token), rank};
-}
-
-static Result<Encoder> _load_encoder(const std::string &path) {
-  std::ifstream file(path);
-  ET_CHECK_OR_RETURN_ERROR(file, InvalidArgument,
-                           "failed to open encoder file: %s", path.c_str());
-
-  Encoder encoder;
-  std::string line;
-  while (std::getline(file, line)) {
-    auto [token, rank] = ET_UNWRAP(_parse(line));
-
-    ET_CHECK_OR_RETURN_ERROR(encoder.emplace(std::move(token), rank).second,
-                             InvalidArgument, "duplicate item: %s",
-                             line.c_str());
-  }
-
-  return encoder;
-}
-
-static Result<Decoder> _build_decoder(const Encoder &encoder) {
-  Decoder decoder;
-  for (const auto &[k, v] : encoder) {
-    decoder.emplace(v, k);
-  }
-
-  ET_CHECK_OR_RETURN_ERROR(encoder.size() == decoder.size(), InvalidArgument,
-                           "duplicate items in encoder");
-
-  return decoder;
-}
-
-static std::vector<uint64_t>
-_byte_pair_merge(const std::string &piece,
-                 const std::unordered_map<std::string, uint64_t> &ranks,
-                 std::function<uint64_t(uint64_t, uint64_t)> func) {
-  // This is a vector of (start, rank).
-  // The rank is of the byte pair starting at position start.
-  // The rank of the last item in the vector is not a valid value.
-  std::vector<std::pair<uint64_t, uint64_t>> parts;
-  parts.reserve(piece.size() + 1);
-  for (auto idx = 0U; idx < piece.size() + 1; ++idx) {
-    parts.emplace_back(idx, _max_size());
-  }
-
-  auto get_rank =
-      [&piece, &ranks](const std::vector<std::pair<uint64_t, uint64_t>> &parts,
-                       uint64_t start_idx,
-                       uint64_t skip) -> std::optional<uint64_t> {
-    if (start_idx + skip + 2 < parts.size()) {
-      auto s = parts[start_idx].first;
-      auto e = parts[start_idx + skip + 2].first;
-      auto key = piece.substr(s, e - s);
-      auto iter = ranks.find(key);
-      if (iter != ranks.end()) {
-        return iter->second;
-      }
-    }
-    return std::nullopt;
-  };
-
-  // We look up the ranks once in the beginning and iteratively update
-  // them during each merge, which reduces the number of rank lookups.
-  for (auto i = 0U; i < parts.size() - 2; ++i) {
-    auto rank = get_rank(parts, i, 0);
-    if (rank) {
-      // usize::MAX is a sentinel value and cannot be a valid rank
-      ET_CHECK_MSG(*rank != _max_size(), "rank is too large");
-      parts[i].second = *rank;
-    }
-  }
-
-  // If you have n parts and m merges, this does O(mn) work.
-  // We could do something with a heap and do O(m log n) work.
-  // It is important to consider that n is often small (<100), and as such
-  // the cache-locality benefits outweigh the algorithmic complexity downsides
-  // of the `parts` vector data structure above.
-
-  // Note that we hash bytes, not token pairs. As long as we train BPE the way
-  // we currently do, this is equivalent. An easy way to break this would be
-  // to decouple merge priority from token index or to prevent specific token
-  // merges.
-  while (true) {
-    if (parts.size() == 1) {
-      break;
-    }
-
-    // usize::MAX is a sentinel rank value allowing us to
-    // take the min more quickly
-    auto min_rank = std::make_pair<uint64_t, uint64_t>(_max_size(), 0);
-    for (auto i = 0U; i < parts.size() - 1; ++i) {
-      auto rank = parts[i].second;
-      if (rank < min_rank.first) {
-        min_rank.first = rank;
-        min_rank.second = i;
-      }
-    }
-
-    if (min_rank.first != _max_size()) {
-      auto i = min_rank.second;
-
-      // NOTE: We are about to remove parts[i + 1]. We do not do it
-      // yet because there are cache-locality benefits to updating
-      // parts[i] and parts[i-1] before removing, which could thrash
-      // the cache. Thus, we update the rank calculation by skipping over
-      // parts[i + 1], by invoking `get_rank!` with `skip = 1`.
-      auto rank = get_rank(parts, i, 1);
-      if (rank) {
-        parts[i].second = *rank;
-      } else {
-        parts[i].second = _max_size();
-      }
-      if (i > 0) {
-        rank = get_rank(parts, i - 1, 1);
-        if (rank) {
-          parts[i - 1].second = *rank;
-        } else {
-          parts[i - 1].second = _max_size();
-        }
-      }
-
-      parts.erase(parts.begin() + (i + 1));
-    } else {
-      break;
-    }
-  }
-  std::vector<uint64_t> out;
-  out.reserve(parts.size() - 1);
-  for (auto i = 0U; i < parts.size() - 1; ++i) {
-    auto s = parts[i].first;
-    auto e = parts[i + 1].first;
-    out.push_back(func(s, e));
-  }
-  return out;
-}
-
-static std::vector<uint64_t> _byte_pair_encode(const std::string &piece,
-                                               const Encoder &encoder) {
-  if (piece.size() == 1) {
-    auto iter = encoder.find(piece);
-    if (iter != encoder.end()) {
-      return std::vector<uint64_t>({iter->second});
-    } else {
-      // TODO: is it possible?
-      return {};
-    }
-  }
-
-  return _byte_pair_merge(piece, encoder,
-                          [&piece, &encoder](uint64_t start, uint64_t stop) {
-                            std::string key = piece.substr(start, stop - start);
-                            auto iter = encoder.find(key);
-                            if (iter != encoder.end()) {
-                              return iter->second;
-                            } else {
-                              // TODO: what if key does not exist? Should we
-                              // return `unknown`? assert(false); // ??
-                              return uint64_t(0);
-                            }
-                          });
-}
-// ------------------------------Util end------------------------------------
-// -------------------------private method start-------------------------------
-
-template <typename T>
-std::pair<std::optional<std::string>, re2::StringPiece>
-Tiktoken::_split_with_allowed_special_token(re2::StringPiece &input,
-                                            const T &allowed_special) const {
-  if (!_special_token_regex) {
-    return std::make_pair(std::nullopt, input);
-  }
-
-#if __cplusplus >= 202002L
-  auto start = input.begin();
-#else
-  const char *start = input.data();
-#endif
-  std::string special;
-  while (true) {
-    if (!re2::RE2::FindAndConsume(&input, *_special_token_regex, &special)) {
-      // No special token.
-      break;
-    }
-
-    if (allowed_special.count(special) == 1) {
-      // Found an allowed special token, split the text with it.
-#if __cplusplus >= 202002L
-      return std::make_pair(
-          special,
-          re2::StringPiece(start, input.begin() - start - special.size()));
-#else
-      return std::make_pair(
-          special,
-          re2::StringPiece(start, (input.data() - start) - special.size()));
-#endif
-    } // else try to find the next special token
-  }
-
-  return std::make_pair(std::nullopt, input);
-}
-
-void Tiktoken::_encode(re2::StringPiece &input, std::vector<uint64_t> &ret,
-                       uint64_t &last_piece_token_len) const {
-  std::string piece;
-  assert(_regex);
-  while (re2::RE2::FindAndConsume(&input, *_regex, &piece)) {
-    auto iter = _encoder.find(piece);
-    if (iter != _encoder.end()) {
-      last_piece_token_len = 1;
-      ret.push_back(iter->second);
-      continue;
-    }
-    auto tokens = _byte_pair_encode(piece, _encoder);
-    last_piece_token_len = tokens.size();
-    ret.insert(ret.end(), tokens.begin(), tokens.end());
-  }
-}
-
-template <typename T>
-std::pair<std::vector<uint64_t>, uint64_t>
-Tiktoken::_encode_with_special_token(const std::string &text,
-                                     const T &allowed_special) const {
-  std::vector<uint64_t> tokens;
-  uint64_t last_piece_token_len = 0;
-  re2::StringPiece input(text);
-  while (true) {
-    auto [special, sub_input] =
-        _split_with_allowed_special_token(input, allowed_special);
-
-    _encode(sub_input, tokens, last_piece_token_len);
-
-    if (special) {
-      uint64_t token = 0;
-      try {
-        token = _special_token_encoder.at(*special);
-      } catch (const std::out_of_range &) {
-        // Should never go here, since special pattern includes all special
-        // chars.
-        ET_CHECK_MSG(false, "unknown special token: %s", special->c_str());
-      }
-
-      tokens.push_back(token);
-      last_piece_token_len = 0;
-    } else {
-      break;
-    }
-  }
-
-  // last_piece_token_len is how many tokens came from the last regex split.
-  // This is used for determining unstable tokens, since you can't merge
-  // across (stable) regex splits
-  return std::make_pair(tokens, last_piece_token_len);
-}
-
-Encoder Tiktoken::_build_special_token_encoder(ssize_t num_base_tokens) const {
-  Encoder special_token_encoder;
-  for (ssize_t i = 0; i < _special_tokens->size(); ++i) {
-    special_token_encoder.emplace(_special_tokens->at(i), num_base_tokens + i);
-  }
-  return special_token_encoder;
-}
-
-// -------------------------private method end-------------------------------
-// -------------------------public method start-------------------------------
-
-Tiktoken::Tiktoken(std::unique_ptr<std::vector<std::string>> special_tokens,
-                   size_t bos_token_index, size_t eos_token_index)
-    : Tokenizer(), _special_tokens(std::move(special_tokens)),
-      _bos_token_index(bos_token_index), _eos_token_index(eos_token_index) {
-  ET_CHECK_MSG(_bos_token_index < _special_tokens->size(),
-               "invalid bos_token_index %zu", _bos_token_index);
-  ET_CHECK_MSG(_eos_token_index < _special_tokens->size(),
-               "invalid eos_token_index %zu", _eos_token_index);
-}
-
-Error Tiktoken::load(const std::string &path) {
-  _encoder = ET_UNWRAP(_load_encoder(path));
-  _special_token_encoder = _build_special_token_encoder(_encoder.size());
-
-  _decoder = ET_UNWRAP(_build_decoder(_encoder));
-  _special_token_decoder = ET_UNWRAP(_build_decoder(_special_token_encoder));
-
-  _regex = _create_regex(_pattern);
-  // Warmup re2 as it is slow on the first run, void the return value as it's
-  // not needed Refer to
-  // https://github.com/google/re2/blob/6dcd83d60f7944926bfd308cc13979fc53dd69ca/re2/fuzzing/re2_fuzzer.cc#L136-L141
-  (void)_regex->ReverseProgramSize();
-
-  _special_token_regex = _build_special_token_regex(_special_token_encoder);
-  // Same as above, warm up re2
-  (void)_special_token_regex->ReverseProgramSize();
-
-  // initialize vocab_size, bos_tok, eos_tok
-  vocab_size_ = _encoder.size() + _special_token_encoder.size();
-  bos_tok_ = _special_token_encoder.at(_special_tokens->at(_bos_token_index));
-  eos_tok_ = _special_token_encoder.at(_special_tokens->at(_eos_token_index));
-
-  initialized_ = true;
-  return Error::Ok;
-}
-
-Result<std::vector<uint64_t>> Tiktoken::encode(const std::string &text,
-                                               int8_t bos, int8_t eos) const {
-  if (!initialized_) {
-    return Error::NotSupported;
-  }
-  auto res = _encode_with_special_token(text, _special_token_encoder).first;
-  for (auto i = 0; i < bos; ++i) {
-    res.insert(res.begin(), bos_tok_);
-  }
-  for (auto i = 0; i < eos; ++i) {
-    res.push_back(eos_tok_);
-  }
-  return Result<std::vector<uint64_t>>(std::move(res));
-}
-
-Result<std::string> Tiktoken::decode(uint64_t prev, uint64_t cur) const {
-  (void)prev;
-  ET_CHECK_OK_OR_RETURN_ERROR(Tokenizer::decode_verify(cur));
-  std::string ret;
-
-  std::string token_bytes;
-  auto iter = _decoder.find(cur);
-  if (iter != _decoder.end()) {
-    token_bytes = iter->second;
-  } else {
-    iter = _special_token_decoder.find(cur);
-    if (iter != _special_token_decoder.end()) {
-      token_bytes = iter->second;
-    } else {
-      ET_CHECK_MSG(false, "unknown token: %" PRIu64, cur);
-    }
-  }
-  ret += token_bytes;
-
-  return ret;
-}
-// -------------------------public method end-------------------------------
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h
deleted file mode 100644
index 5eed7e94c8..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tiktoken.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include "tokenizer.h"
-#include <memory>
-#include <optional>
-#include <re2/re2.h>
-#include <unordered_map>
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-using Encoder = std::unordered_map<std::string, uint64_t>;
-using Decoder = std::unordered_map<uint64_t, std::string>;
-using Re2UPtr = std::unique_ptr<re2::RE2>;
-
-class Tiktoken : public Tokenizer {
-public:
-  /**
-   * @param[in] special_tokens List of special tokens including bos, eos;
-   * @param[in] bos_token_index Index of the bos token in special_tokens;
-   * @param[in] eos_token_index Index of the eos token in special_tokens.
-   */
-  explicit Tiktoken(std::unique_ptr<std::vector<std::string>> special_tokens,
-                    size_t bos_token_index, size_t eos_token_index);
-
-  ::executorch::runtime::Error load(const std::string &tokenizer_path) override;
-
-  ::executorch::runtime::Result<std::vector<uint64_t>>
-  encode(const std::string &input, int8_t bos, int8_t eos) const override;
-
-  ::executorch::runtime::Result<std::string>
-  decode(uint64_t prev_token, uint64_t token) const override;
-
-private:
-  template <typename T>
-  std::pair<std::optional<std::string>, re2::StringPiece>
-  _split_with_allowed_special_token(re2::StringPiece &input,
-                                    const T &allowed_special) const;
-
-  void _encode(re2::StringPiece &input, std::vector<uint64_t> &ret,
-               uint64_t &last_piece_token_len) const;
-
-  template <typename T>
-  std::pair<std::vector<uint64_t>, uint64_t>
-  _encode_with_special_token(const std::string &text,
-                             const T &allowed_special) const;
-
-  Encoder _build_special_token_encoder(ssize_t num_base_tokens) const;
-
-  std::unique_ptr<std::vector<std::string>> _special_tokens;
-  size_t _bos_token_index;
-  size_t _eos_token_index;
-  // Removed negative lookahead \s+(?!\S) since it's not supported by RE2.
-  const std::string _pattern =
-      R"((?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+)";
-  Encoder _encoder;
-  Encoder _special_token_encoder;
-  Decoder _decoder;
-  Decoder _special_token_decoder;
-
-  Re2UPtr _regex;
-  Re2UPtr _special_token_regex;
-};
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
-
-namespace torch {
-namespace executor {
-// TODO(T197294990): Remove these deprecated aliases once all users have moved
-// to the new `::executorch` namespaces.
-using ::executorch::extension::llm::Decoder;
-using ::executorch::extension::llm::Encoder;
-using ::executorch::extension::llm::Re2UPtr;
-using ::executorch::extension::llm::Tiktoken;
-} // namespace executor
-} // namespace torch
diff --git a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h b/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h
deleted file mode 100644
index 948cccc0d7..0000000000
--- a/packages/react-native-executorch/third-party/ios/ExecutorchLib/ExecutorchLib/tokenizer/tokenizer.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include <cinttypes>
-// patternlint-disable-next-line executorch-cpp-nostdinc
-#include <string>
-// patternlint-disable-next-line executorch-cpp-nostdinc
-#include <vector>
-
-#include <executorch/runtime/core/error.h>
-#include <executorch/runtime/core/result.h>
-
-namespace executorch {
-namespace extension {
-namespace llm {
-
-// A tokenizer interface.
-class Tokenizer {
-public:
-  explicit Tokenizer() {}
-  virtual ~Tokenizer() {}
-
-  virtual ::executorch::runtime::Error
-  load(const std::string &tokenizer_path) = 0;
-
-  virtual ::executorch::runtime::Result<std::vector<uint64_t>>
-  encode(const std::string &input, int8_t bos, int8_t eos) const = 0;
-
-  ::executorch::runtime::Error decode_verify(uint64_t token) const {
-    if (!initialized_) {
-      ET_LOG(Error, "Tokenizer not initialized");
-      return ::executorch::runtime::Error::NotSupported;
-    }
-    if (token >= vocab_size_) {
-      ET_LOG(Error, "token  %" PRIu64 " is out side of vacab range %d", token,
-             vocab_size_);
-      return ::executorch::runtime::Error::NotSupported;
-    }
-    return ::executorch::runtime::Error::Ok;
-  }
-
-  virtual ::executorch::runtime::Result<std::string>
-  decode(uint64_t prev_token, uint64_t token) const = 0;
-
-  // getters
-  int32_t vocab_size() const { return vocab_size_; }
-
-  uint64_t bos_tok() const { return bos_tok_; }
-
-  uint64_t eos_tok() const { return eos_tok_; }
-
-protected:
-  bool initialized_ = false;
-  int32_t vocab_size_ = 0;
-  uint64_t bos_tok_ = 0;
-  uint64_t eos_tok_ = 0;
-};
-
-} // namespace llm
-} // namespace extension
-} // namespace executorch
-
-namespace torch {
-namespace executor {
-// TODO(T197294990): Remove these deprecated aliases once all users have moved
-// to the new `::executorch` namespaces.
-using ::executorch::extension::llm::Tokenizer;
-} // namespace executor
-} // namespace torch